In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 10 15:07:04 2024

"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Setting the font size for all plot elements
plt.rcParams.update({'font.size': 16})
# Load the dataset
df = pd.read_csv('smoking.csv')  # Update with your actual file path

# Create a larger figure to accommodate larger graphs
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(26, 26), 
                         gridspec_kw={'height_ratios': [1, 0.7], 'hspace': 0.3, 'wspace': 0.3})

# Dashboard Title
fig.suptitle('Socio-Economic Profiles and Smoking Patterns', fontsize=30)


# Calculate the percentage of smokers by ethnicity
smoking_rates = df.groupby('ethnicity')['smoke'].value_counts(normalize=True).unstack()
smoking_rates = smoking_rates['Yes'] * 100  # Get only the 'Yes' percentages and convert to percentage

# Sort the rates for plotting
smoking_rates_sorted = smoking_rates.sort_values()

# Create a multicolored horizontal bar chart for Percentage of Smokers by Ethnicity
colors = plt.cm.Spectral(np.linspace(0, 1, len(smoking_rates_sorted)))
smoking_rates_sorted.plot(kind='barh', color=colors, ax=axes[0, 0])
axes[0, 0].set_title('Percentage of Smokers by Ethnicity')
axes[0, 0].set_xlabel('Percentage of Smokers')

# Smoking Prevalence Among Different Nationalities (Pie Chart - Top 3)
top_nationalities = df[df['smoke'] == 'Yes']['nationality'].value_counts().head(3)
axes[0, 1].pie(top_nationalities, labels=top_nationalities.index, autopct='%1.1f%%', startangle=140, textprops={'fontsize': 20})
axes[0, 1].set_title('Top 3 Nationalities by Smoking Prevalence')


# Smoking Status by Gender (Bar Chart)
smoking_gender = df.groupby('gender')['smoke'].value_counts().unstack()
smoking_gender.plot(kind='bar', stacked=False, ax=axes[1, 0])
axes[1, 0].set_title('Smoking Status by Gender')
axes[1, 0].set_ylabel('Count')

# Average Number of Cigarettes Smoked on Weekdays and Weekends (Line Chart)
avg_cigarettes = df.groupby('age')[['amt_weekdays', 'amt_weekends']].mean()
avg_cigarettes.plot(kind='line', ax=axes[1, 1])
axes[1, 1].set_title('Average Number of Cigarettes Smoked')
axes[1, 1].set_xlabel('Age')
axes[1, 1].set_ylabel('Average Number of Cigarettes')

plt.tight_layout(rect=[0, 0.1, 1, 0.93])
plt.subplots_adjust(hspace=0.3, wspace=0.3)

# Add descriptive lines at the bottom center of the poster
# Add descriptive lines at the bottom center of the poster
descriptive_text = (
    "Amongst the knwon ethnicities, Mixed has the highest number of smokers\n"
    "British nationals have the highest smoking prevalence at 34.3%, followed by English at 53.7%, and Scottish at 12.0%.\n"
    "More males smoke than females, with approximately 650 males smoking compared to 600 females.\n"
    "The average number of cigarettes smoked daily fluctuates, with peaks observed in the age groups 20-30 and 60-70."
)
# Adjust the y-coordinate to move the text up and increase font size
plt.figtext(0.5, 0.06, descriptive_text, ha='center', va='top', fontsize=16, 
            wrap=True)

# Add student ID and name at the bottom right side corner with increased font size
plt.figtext(0.95, 0.02, 'Student ID: 22081224\nStudent Name: Prem Kumar Surya',
            ha='right', va='top', fontsize=12)

# Save the dashboard
# plt.savefig('22081224.png', dpi=300)

# Show the dashboard
plt.show()

# Close the figure to free memory
plt.close(fig)
