In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

# Question 1: Distribution of Age Among Heart Failure Patients
plt.figure(figsize=(10, 6))
sns.histplot(df['age'], bins=20, kde=True)
plt.title('Distribution of Age Among Heart Failure Patients')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Question 2: Death Rate Variation with Age
df['age_group'] = pd.cut(df['age'], bins=[40, 50, 60, 70, 80, 90], labels=['40-50', '50-60', '60-70', '70-80', '80-90'])
age_death_rate = df.groupby('age_group')['DEATH_EVENT'].mean() * 100

plt.figure(figsize=(10, 6))
age_death_rate.plot(kind='bar', color='salmon')
plt.title('Death Rate by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Death Rate (%)')
plt.show()

In [None]:
# Question 3: Percentage of Male and Female Patients
gender_distribution = df['sex'].value_counts(normalize=True) * 100
gender_distribution.plot(kind='pie', autopct='%1.1f%%', labels=['Male', 'Female'], colors=['lightblue', 'lightgreen'])
plt.title('Percentage of Male and Female Patients')
plt.ylabel('')
plt.show()

In [None]:
# Question 4: Platelet Count Variation Among Different Age Groups
plt.figure(figsize=(10, 6))
sns.boxplot(x='age_group', y='platelets', data=df, palette='viridis')
plt.title('Platelet Count Variation Among Different Age Groups')
plt.xlabel('Age Group')
plt.ylabel('Platelet Count')
plt.show()

In [None]:
# Question 5: Correlation Between Creatinine and Sodium Levels
correlation = df['serum_creatinine'].corr(df['serum_sodium'])
print(f'Correlation between Serum Creatinine and Serum Sodium: {correlation:.2f}')

plt.figure(figsize=(10, 6))
sns.scatterplot(x='serum_creatinine', y='serum_sodium', data=df)
plt.title('Correlation Between Serum Creatinine and Serum Sodium')
plt.xlabel('Serum Creatinine')
plt.ylabel('Serum Sodium')
plt.show()

In [None]:
# Question 6: High Blood Pressure Prevalence Between Male and Female Patients
hbp_by_gender = df.groupby('sex')['high_blood_pressure'].mean() * 100
hbp_by_gender.plot(kind='bar', color=['blue', 'green'])
plt.title('High Blood Pressure Prevalence by Gender')
plt.xlabel('Gender')
plt.ylabel('Percentage with High Blood Pressure')
plt.xticks([0, 1], ['Male', 'Female'], rotation=0)
plt.show()

In [None]:
# Question 7: Relationship Between Smoking Habits and Heart Failure
smoking_death_rate = df.groupby('smoking')['DEATH_EVENT'].mean() * 100
smoking_death_rate.plot(kind='bar', color='orange')
plt.title('Death Rate by Smoking Status')
plt.xlabel('Smoking')
plt.ylabel('Death Rate (%)')
plt.xticks([0, 1], ['Non-Smoker', 'Smoker'], rotation=0)
plt.show()

In [None]:
# Question 8: Patterns in Death Events Across Different Age Groups
death_by_age_group = df.groupby('age_group')['DEATH_EVENT'].mean() * 100
death_by_age_group.plot(kind='bar', color='red')
plt.title('Death Events Across Age Groups')
plt.xlabel('Age Group')
plt.ylabel('Death Event Rate (%)')
plt.show()

In [None]:
# Question 9: Difference in Ejection Fraction Between Patients with and without Diabetes
plt.figure(figsize=(10, 6))
sns.boxplot(x='diabetes', y='ejection_fraction', data=df, palette='coolwarm')
plt.title('Ejection Fraction for Diabetic vs Non-Diabetic Patients')
plt.xlabel('Diabetes')
plt.ylabel('Ejection Fraction')
plt.xticks([0, 1], ['No Diabetes', 'Diabetes'])
plt.show()

In [None]:
# Question 10: Serum Creatinine Level Variation Between Survivors and Non-Survivors
plt.figure(figsize=(10, 6))
sns.boxplot(x='DEATH_EVENT', y='serum_creatinine', data=df, palette='magma')
plt.title('Serum Creatinine Level by Survival Status')
plt.xlabel('Survived')
plt.ylabel('Serum Creatinine')
plt.xticks([0, 1], ['Survived', 'Did Not Survive'])
plt.show()