In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
df = pd.read_csv("Cleaned Final.csv")
df.head()

In [None]:
pd.crosstab(df.HadHeartAttack,df.Sex)

In [None]:
ct = pd.crosstab(df.HadHeartAttack, df.Sex)
plt.figure(figsize=(8, 4))
sns.heatmap(ct, annot=True, fmt='d', cmap='Reds')
plt.title('Heart Attacks by Sex')
plt.show()

In [None]:
# Create the crosstab
heartattack_gender = pd.crosstab(df['HadHeartAttack'], df['Sex'])
print("Heart Attack by Gender Crosstab:")
print(heartattack_gender)

In [None]:
# Bar chart - Count comparison
plt.figure(figsize=(10, 6))
heartattack_gender.plot(kind='bar', color=['lightblue', 'lightpink'])
plt.title('Heart Attack Prevalence by Gender - Count Comparison')
plt.xlabel('Had Heart Attack (0=No, 1=Yes)')
plt.ylabel('Number of People')
plt.legend(title='Gender')
plt.xticks(rotation=0)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
pd.crosstab(df.HadHeartAttack,df.SleepHours)

In [None]:
# Create the crosstab
sleep_heart = pd.crosstab(df['SleepHours'], df['HadHeartAttack'])

# Convert to percentages by sleep hours for better comparison
sleep_heart_pct = sleep_heart.div(sleep_heart.sum(axis=1), axis=0) * 100

# Create the visualization
plt.figure(figsize=(14, 8))

# Plot 1: Heatmap of heart attack percentages by sleep hours
plt.subplot(2, 1, 1)
sns.heatmap(sleep_heart_pct.T, 
            annot=True, 
            fmt='.1f', 
            cmap='RdYlGn_r', 
            cbar_kws={'label': 'Heart Attack Rate (%)'},
            linewidths=0.5)
plt.title('Heart Attack Rate by Sleep Hours (Percentage Heatmap)', fontsize=14, fontweight='bold')
plt.ylabel('Had Heart Attack\n(0=No, 1=Yes)')
plt.xlabel('Sleep Hours per Night')
plt.yticks(rotation=0)

 

In [None]:
#44.4% FROM PEOPLE WHO HAVE ANGINA HAD HEART ATTACKS
pd.crosstab(df.HadHeartAttack,df.HadAngina)

In [None]:
ct = pd.crosstab(df.HadHeartAttack, df.HadAngina)

 # Sum across both conditions
total_counts = ct.sum()

plt.figure(figsize=(8, 8))
total_counts.plot.pie(autopct='%1.1f%%', colors=['lightcoral', 'lightblue'])
plt.title('Overall Angina Distribution')
plt.ylabel('')  # Remove ylabel
plt.show()

In [None]:
#Depressive disorder has almost no effect on heart attack rate
pd.crosstab(df.HadHeartAttack,df.HadDepressiveDisorder)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Get the actual values from your data
heart_attack_values = df['HadHeartAttack'].unique()
depressive_values = df['HadDepressiveDisorder'].unique()

# Use the first value as "Yes" equivalent, second as "No" equivalent
ct.loc[heart_attack_values[0]].plot.pie(ax=ax1, autopct='%1.1f%%', colors=['lightcoral', 'lightblue'])
ax1.set_title(f'Depressive Disorder - {heart_attack_values[0]} Heart Attack')

ct.loc[heart_attack_values[1]].plot.pie(ax=ax2, autopct='%1.1f%%', colors=['lightcoral', 'lightblue'])
ax2.set_title(f'Depressive Disorder - {heart_attack_values[1]} Heart Attack')

plt.tight_layout()
plt.show()

In [None]:
# people who have diabetes has 336% higher chance of having heart attack
pd.crosstab(df.HadHeartAttack,df.HadDiabetes)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

 # Create crosstab
ct = pd.crosstab(df.HadHeartAttack, df.HadDiabetes)

# Interactive bar chart
fig = px.bar(ct, barmode='group', title='Heart Attack vs Diabetes')
fig.update_layout(
    xaxis_title='Had Heart Attack',
    yaxis_title='Count',
    legend_title='Had Diabetes'
)
fig.show()

In [None]:
fig = px.imshow(ct, 
                text_auto=True,
                color_continuous_scale='Blues',
                title='Heart Attack vs Diabetes - Heatmap')
fig.update_xaxes(title='Had Diabetes')
fig.update_yaxes(title='Had Heart Attack')
fig.show()

In [None]:
pd.crosstab(df.HighRiskLastYear,df.HadDiabetes)

In [None]:
 # Create crosstab
ct = pd.crosstab(df.HighRiskLastYear, df.HadDiabetes)

# Interactive bar chart
fig = px.bar(ct, barmode='group', 
             title='High Risk Last Year vs Diabetes',
             labels={'value': 'Count', 'HighRiskLastYear': 'High Risk Last Year'})
fig.update_layout(
    xaxis_title='High Risk Last Year',
    yaxis_title='Count', 
    legend_title='Had Diabetes'
)
fig.show()

In [None]:
pd.crosstab(df.HighRiskLastYear,df.TetanusLast10Tdap)

In [None]:
# Convert to percentages
ct_pct = ct.div(ct.sum(axis=1), axis=0) * 100

fig = px.bar(ct_pct, barmode='stack',
             title='High Risk Last Year vs Tetanus Vaccination (%)',
             labels={'value': 'Percentage', 'HighRiskLastYear': 'High Risk Last Year'})
fig.update_layout(
    xaxis_title='High Risk Last Year',
    yaxis_title='Percentage',
    legend_title='Tetanus Vaccination'
)
fig.show()

In [None]:
fig = make_subplots(rows=1, cols=2,
                    specs=[[{'type':'domain'}, {'type':'domain'}]],
                    subplot_titles=['High Risk', 'Not High Risk'])

fig.add_trace(go.Pie(labels=ct.columns, values=ct.iloc[0], hole=0.4), 1, 1)
fig.add_trace(go.Pie(labels=ct.columns, values=ct.iloc[1], hole=0.4), 1, 2)

fig.update_traces(hoverinfo='label+percent+value')
fig.update_layout(title_text="Tetanus Vaccination by High Risk Status")
fig.show()

In [None]:
pd.crosstab(df.HighRiskLastYear,df.FluVaxLast12)

In [None]:
pd.crosstab(df.HighRiskLastYear,df.HIVTesting)

In [None]:
pd.crosstab(df.PhysicalActivities,df.HadHeartAttack)

In [None]:
plt.figure(figsize=(12,8))
sns.countplot(x='HadDiabetes',hue="HadHeartAttack",data=df,palette=['#1CA53B',"red"])
plt.legend(["didn't have heart attack","Had heart attack"])
plt.title("Heart attack frequency for people who had diabetes")
plt.xlabel("had diabetes")
plt.ylabel("heart attack")
plt.show()

In [None]:
#people of older age face higher risk of heart attacks
plt.figure(figsize=(12,8))
sns.countplot(x='AgeCategory',hue="HadHeartAttack",data=df,palette=['#1CA53B',"red"])
plt.legend(["didn't have heart attack","Had heart attack"])
plt.title("Heart attack frequency for people of different age groups")
plt.xlabel("AGE")
plt.ylabel("heart attack")
plt.show()

In [None]:
#people of older age face higher risk of heart attacks
plt.figure(figsize=(12,8))
sns.countplot(x='AgeCategory',hue="HadStroke",data=df,palette=['#1CA53B',"red"])
plt.legend(["didn't have stroke","Had stroke"])
plt.title("stroke probability for people of different age groups")
plt.xlabel("AGE")
plt.ylabel("stroke")
plt.show()

In [None]:
#higher probability of heart attack for smokers
pd.crosstab(df.SmokerStatus,df.HadHeartAttack)

In [None]:
crosstab_data = pd.crosstab(df.SmokerStatus,df.HadHeartAttack)
plt.figure(figsize=(8, 6))
sns.heatmap(crosstab_data, annot=True, fmt='d', cmap='Reds', cbar_kws={'label': 'Count'})
plt.title('Smoking Status vs Heart Attacks - Heatmap', fontsize=16, fontweight='bold')
plt.xlabel('Had Heart Attack (0=No, 1=Yes)')
plt.ylabel('Smoking Status')
plt.tight_layout()
plt.show()

In [None]:
df['GeneralHealth'].value_counts(normalize=True) * 100


In [None]:
plt.figure(figsize=(6,6))
df['GeneralHealth'].value_counts(normalize=True).mul(100).plot(kind='pie',autopct='%.1f%%',
    startangle=90,
    colors=sns.color_palette('Set1'))
plt.title('General Health Distribution', fontsize=16, fontweight='bold')
plt.ylabel('')
plt.show()


In [None]:
diseases = ['HadDiabetes', 'HadHeartAttack', 'HadStroke', 'HadDepressiveDisorder']
(df[diseases] == 'Yes').mean() * 100

In [None]:
df.groupby('GeneralHealth')['SleepHours'].mean().sort_values(ascending=False)


In [None]:
sleep_df = df.groupby('GeneralHealth')['SleepHours'].mean().reset_index()
sleep_df.columns = ['GeneralHealth', 'AverageSleepHours']
colors = ['green', 'lightgreen', 'gold', 'orange', 'red']
fig = px.bar(
    sleep_df,
    x='GeneralHealth',
    y='AverageSleepHours',
    title='Average Sleep Hours by General Health Status',
    labels={'AverageSleepHours': 'Average Sleep Hours', 'GeneralHealth': 'General Health'},
    color='GeneralHealth',
    color_discrete_map=dict(zip(sleep_df['GeneralHealth'].unique(), colors))
)

fig.update_layout(
    showlegend=False,
    xaxis=dict(tickangle=-45),
    title=dict(font=dict(size=14, weight='bold')),
    xaxis_title=dict(font=dict(size=12)),
    yaxis_title=dict(font=dict(size=14)),
    width=800,
    height=600
)

fig.show()


In [None]:
df.groupby('GeneralHealth')['BMI'].mean().sort_values(ascending=False)


In [None]:
bmi_by_health = df.groupby('GeneralHealth')['BMI'].mean().sort_values(ascending=False)

colors = ['green', 'lightgreen', 'gold', 'orange', 'red'] 
plt.figure(figsize=(10, 6))
bars = plt.bar(bmi_by_health.index, bmi_by_health.values, color=colors, edgecolor='black')
plt.title('Average BMI by General Health Status', fontsize=14, fontweight='bold')
plt.xlabel('General Health', fontsize=12)
plt.ylabel('Average BMI', fontsize=12)
plt.xticks(rotation=45)
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.1f}',ha='center', va='bottom', fontweight='bold')
plt.axhline(y=18.5, color='blue', linestyle='--', alpha=0.9, label='Underweight')
plt.axhline(y=25, color='green', linestyle='--', alpha=0.9, label='Normal')
plt.axhline(y=30, color='orange', linestyle='--', alpha=0.9, label='Obese')

plt.legend()
plt.grid(axis='y', alpha=0.2)
plt.tight_layout()
plt.show()

In [None]:
df.groupby('PhysicalActivities')['MentalHealthDays'].mean()


In [None]:
mental_by_activity = df.groupby('PhysicalActivities')['MentalHealthDays'].mean()

plt.figure(figsize=(8, 6))
bars = plt.bar(mental_by_activity.index, mental_by_activity.values, 
               color=['red', 'blue'], alpha=0.7, edgecolor='black')
plt.title('Average Mental Health Days by Physical Activity', fontsize=14, fontweight='bold')
plt.xlabel('Physical Activity (0=No, 1=Yes)', fontsize=12)
plt.ylabel('Average Mental Health Days', fontsize=12)
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.1f} days',
             ha='center', va='bottom', fontweight='bold', fontsize=11)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
df.groupby('Sex')['SmokerStatus'].value_counts(normalize=True).unstack().fillna(0) * 100

In [None]:
ct = df.groupby('Sex')['SmokerStatus'].value_counts(normalize=True).unstack().fillna(0) * 100
fig = px.bar(ct, barmode='group',  title='Smoking Status by Gender (%)',labels={'value': 'Percentage', 'Sex': 'Gender'})
fig.update_layout(
    xaxis_title='Gender',
    yaxis_title='Percentage (%)', 
    legend_title='Smoking Status')
fig.show()

In [None]:
df.groupby('AgeCategory')['BMI'].mean()


In [None]:
bmi_by_age = df.groupby('AgeCategory')['BMI'].mean().reset_index()

fig = px.line(bmi_by_age, x='AgeCategory', y='BMI',
              title='Average BMI by Age Category',markers=True,line_shape='linear')
fig.update_layout(
    xaxis_title='Age Category',
    yaxis_title='Average BMI',
    xaxis={'categoryorder': 'array', 'categoryarray': sorted(bmi_by_age['AgeCategory'])}
)
fig.show()

In [None]:
df[['SleepHours', 'MentalHealthDays']].corr()


In [None]:
fig = px.scatter(df, 
    x='SleepHours', 
    y='MentalHealthDays', trendline='ols',
    title='Relationship Between Sleep Hours and Mental Health Days',
    labels={'SleepHours': 'Sleep Hours', 'MentalHealthDays': 'Mental Health Days'},
    color='SleepHours')
fig.update_traces(hovertemplate='Sleep: %{x} hrs<br>Mental Days: %{y}<extra></extra>')
fig.show()


In [None]:
df[['BMI', 'PhysicalHealthDays']].corr()


In [None]:
df['BMI_Category'] = pd.cut(df['BMI'], bins=[0, 18.5, 25, 30, 100],
labels=['Underweight', 'Normal', 'Overweight', 'Obese'])
fig = px.box(
    df,
    x='BMI_Category',
    y='PhysicalHealthDays',
    color='BMI_Category',
    title='Physical Health Days by BMI Category',
    labels={'BMI_Category': 'BMI Category', 'PhysicalHealthDays': 'Physical Health Days'})
fig.show()


In [None]:
(df[df['HadDiabetes'] == 'Yes']
 .groupby('AgeCategory')['HadDiabetes']
 .count() / df.groupby('AgeCategory')['HadDiabetes'].count()) * 100


In [None]:
df[df['GeneralHealth'] == 'Poor']['BMI'].mean()
plt.figure(figsize=(8,5))
sns.barplot(data=df, x='GeneralHealth', y='BMI', hue='GeneralHealth', errorbar=None, palette='Set2', legend=False)
plt.title('Average BMI by General Health')
plt.xlabel('General Health')
plt.ylabel('Average BMI')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()


In [None]:
pd.crosstab(df.HadHeartAttack, df.HadDiabetes, normalize='columns') * 100


In [None]:
ct = pd.crosstab(df.HadHeartAttack, df.HadDiabetes, normalize='columns') * 100
ct_df = ct.reset_index()
ct_df = ct_df.melt(id_vars='HadHeartAttack',var_name='HadDiabetes', value_name='Percentage')
fig = px.bar(ct_df,
             x='HadHeartAttack',y='Percentage', color='HadDiabetes', barmode='group',
             title='Heart Attack vs Diabetes')
fig.update_layout(
    xaxis_title='Had Heart Attack',
    yaxis_title='Percentage',
    legend_title='Had Diabetes')
fig.show()

In [None]:
pd.crosstab(df.HadHeartAttack, df.Sex, normalize='columns') * 100


In [None]:
ct = pd.crosstab(df.HadHeartAttack, df.Sex, normalize='columns') * 100
ct_df = ct.reset_index()
ct_df = ct_df.melt(id_vars='HadHeartAttack', 
                   var_name='Sex', 
                   value_name='Percentage')
ct_df['HeartAttack_Label'] = ct_df['HadHeartAttack'].map({0: 'No', 1: 'Yes'})
fig = px.bar(ct_df, x='HeartAttack_Label',y='Percentage', color='Sex',
             barmode='group',
             title='Heart Attack vs Gender')
fig.update_layout(  xaxis_title='Had Heart Attack',
    yaxis_title='Percentage',legend_title='Gender'
)
fig.show()

In [None]:
pd.crosstab(df.HadStroke, df.HadHeartAttack, normalize='columns') * 100


In [None]:
ct = pd.crosstab(df.HadStroke, df.HadHeartAttack, normalize='columns') * 100
ct_df = ct.reset_index()
ct_df = ct_df.melt(id_vars='HadStroke', 
                   var_name='HadHeartAttack',value_name='Percentage')
ct_df['Stroke_Label'] = ct_df['HadStroke'].map({0: 'No', 1: 'Yes'})
ct_df['HeartAttack_Label'] = ct_df['HadHeartAttack'].map({0: 'No', 1: 'Yes'})
fig = px.bar(ct_df,x='HeartAttack_Label',y='Percentage',
             color='Stroke_Label',barmode='group',
             title='Stroke Prevalence by Heart Attack Status (%)',
             text=ct_df['Percentage'].apply(lambda x: f'{x:.1f}%'))
fig.update_layout( xaxis_title='Had Heart Attack',
    yaxis_title='Percentage (%)',  legend_title='Had Stroke'
)
fig.show()

In [None]:
pd.crosstab(df.HadAsthma, df.SmokerStatus, normalize='columns') * 100


In [None]:
ct = pd.crosstab(df.HadAsthma, df.SmokerStatus, normalize='columns') * 100
ct_df = ct.reset_index()
ct_df = ct_df.melt(id_vars='HadAsthma',  var_name='SmokerStatus', value_name='Percentage')
fig = px.bar(ct_df,
             y='SmokerStatus',x='Percentage',color='HadAsthma',orientation='h',barmode='group',
            title='Asthma vs Smoking Status')
fig.update_layout(
    yaxis_title='Smoker Status',
    xaxis_title='Percentage',)
fig.show()

In [None]:
pd.crosstab(df.HadDepressiveDisorder, df.PhysicalActivities, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadDepressiveDisorder'], df['PhysicalActivities'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadDepressiveDisorder', var_name='PhysicalActivities', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='PhysicalActivities',
    y='Percentage',
    color='HadDepressiveDisorder',
    barmode='group',
    title='Relation between Physical Activities and Depressive Disorder (%)',
    text='Percentage',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
fig.update_layout(yaxis_title='Percentage (%)', xaxis_title='Physical Activities', plot_bgcolor='white', font=dict(size=14))
fig.show()

In [None]:
pd.crosstab(df.HadCOPD, df.SmokerStatus, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadCOPD'], df['SmokerStatus'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadCOPD', var_name='SmokerStatus', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='SmokerStatus',
    y='Percentage',
    color='HadCOPD',
    barmode='group',
    title='Relation between Smoker Status and COPD (%)',
    text='Percentage',
    color_discrete_sequence=px.colors.qualitative.Pastel1  
)

fig.update_traces(
    texttemplate='%{text:.1f}%',
    textposition='inside',
    insidetextanchor='middle'
)

fig.update_layout(
    title_font=dict(size=20, family='Arial', color='#333'),
    xaxis_title='Smoker Status',
    yaxis_title='Percentage (%)',
    plot_bgcolor='#f9f9f9',
    paper_bgcolor='white',
    font=dict(size=14, color='#333'),
    bargap=0.25,
    showlegend=True,
    legend_title_text='Had COPD',
    title_x=0.5  
)

fig.show()


In [None]:
pd.crosstab(pd.cut(df.BMI, bins=[0,18.5,25,30,35,100], labels=['Underweight','Normal','Overweight','Obese','Severe Obese']), 
            df.HadDiabetes, normalize='columns') * 100


In [None]:
bmi_bins = pd.cut(
    df['BMI'],
    bins=[0, 18.5, 25, 30, 35, 100],
    labels=['Underweight', 'Normal', 'Overweight', 'Obese', 'Severe Obese']
)

cross_tab = pd.crosstab(bmi_bins, df['HadDiabetes'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='BMI', var_name='HadDiabetes', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='BMI',
    y='Percentage',
    color='HadDiabetes',
    barmode='stack',
    title='Relation between BMI Category and Diabetes (%)',
    text='Percentage',
    color_discrete_sequence=['#2E86AB', '#F4B400']
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='BMI Category',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=0,
    legend_title_text='Had Diabetes'
)

fig.show()

In [None]:
pd.crosstab(df.PhysicalActivities, df.GeneralHealth, normalize='index') * 100


In [None]:
cross_tab = pd.crosstab(df['PhysicalActivities'], df['GeneralHealth'], normalize='index') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='PhysicalActivities', var_name='GeneralHealth', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='PhysicalActivities',
    y='Percentage',
    color='GeneralHealth',
    barmode='stack',
    title='General Health Distribution by Physical Activity (%)',
    text='Percentage',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='Physical Activities',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=0,
    legend_title_text='General Health'
)

fig.show()

In [None]:
df.groupby('GeneralHealth')['SleepHours'].mean()


In [None]:
mean_sleep = df.groupby('GeneralHealth')['SleepHours'].mean().reset_index()

plt.figure(figsize=(8,5))
sns.barplot(data=mean_sleep, x='GeneralHealth', y='SleepHours', hue='GeneralHealth', palette='Set2', legend=False)

plt.title('Average Sleep Hours by General Health')
plt.xlabel('General Health')
plt.ylabel('Average Sleep Hours')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
pd.crosstab(df.HadStroke, df.AgeCategory, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadStroke'], df['AgeCategory'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadStroke', var_name='AgeCategory', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='AgeCategory',
    y='Percentage',
    color='HadStroke',
    barmode='stack',
    title='Stroke Percentage by Age Category',
    text='Percentage',
    color_discrete_sequence=['#2E86AB', '#F4B400']
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='Age Category',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=45,
    legend_title_text='Had Stroke'
)

fig.show()

In [None]:
pd.crosstab(df.HadHeartAttack, df.AgeCategory, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadHeartAttack'], df['AgeCategory'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadHeartAttack', var_name='AgeCategory', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='AgeCategory',
    y='Percentage',
    color='HadHeartAttack',
    barmode='stack',
    title='Heart Attack Percentage by Age Category',
    text='Percentage',
    color_discrete_sequence=['#2E86AB', '#F4B400']
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='Age Category',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=45,
    legend_title_text='Had Heart Attack'
)

fig.show()


In [None]:
pd.crosstab(df.HadDiabetes, df.PhysicalActivities, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadDiabetes'], df['PhysicalActivities'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadDiabetes', var_name='PhysicalActivities', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='PhysicalActivities',
    y='Percentage',
    color='HadDiabetes',
    barmode='stack',
    title='Diabetes Percentage by Physical Activity (%)',
    text='Percentage',
    color_discrete_sequence=['#2E86AB', '#F4B400']
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='Physical Activities',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=0,
    legend_title_text='Had Diabetes'
)

fig.show()


In [None]:
df.groupby('HadDepressiveDisorder')['MentalHealthDays'].mean()


In [None]:
mean_mental = df.groupby('HadDepressiveDisorder')['MentalHealthDays'].mean().reset_index()

plt.figure(figsize=(6,5))
sns.barplot(data=mean_mental, x='HadDepressiveDisorder', y='MentalHealthDays', hue='HadDepressiveDisorder', palette='Set2', legend=False)

plt.title('Average Mental Health Days by Depressive Disorder')
plt.xlabel('Had Depressive Disorder')
plt.ylabel('Average Mental Health Days')

for i, val in enumerate(mean_mental['MentalHealthDays']):
    plt.text(i, val + 0.3, f'{val:.1f}', ha='center', fontsize=11)

plt.tight_layout()
plt.show()


In [None]:
pd.crosstab(df.HadKidneyDisease, df.HadDiabetes, normalize='columns') * 100


In [None]:
cross_tab = pd.crosstab(df['HadKidneyDisease'], df['HadDiabetes'], normalize='columns') * 100
cross_tab_reset = cross_tab.reset_index().melt(id_vars='HadKidneyDisease', var_name='HadDiabetes', value_name='Percentage')

fig = px.bar(
    cross_tab_reset,
    x='HadDiabetes',
    y='Percentage',
    color='HadKidneyDisease',
    barmode='stack',
    title='Kidney Disease Percentage by Diabetes Status (%)',
    text='Percentage',
    color_discrete_sequence=['#2E86AB', '#F4B400']
)

fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(
    yaxis_title='Percentage (%)',
    xaxis_title='Had Diabetes',
    plot_bgcolor='white',
    font=dict(size=14),
    xaxis_tickangle=0,
    legend_title_text='Had Kidney Disease'
)

fig.show()


In [None]:
df.groupby('HadArthritis')['PhysicalHealthDays'].mean()


In [None]:
mean_physical = df.groupby('HadArthritis')['PhysicalHealthDays'].mean().reset_index()

plt.figure(figsize=(6,5))
sns.barplot(data=mean_physical, x='HadArthritis', y='PhysicalHealthDays', hue='HadArthritis', palette='Set2', legend=False)

plt.title('Average Physical Health Days by Arthritis Status')
plt.xlabel('Had Arthritis')
plt.ylabel('Average Physical Health Days')

for i, val in enumerate(mean_physical['PhysicalHealthDays']):
    plt.text(i, val + 0.3, f'{val:.1f}', ha='center', fontsize=11)

plt.tight_layout()
plt.show()
