In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the dataset
df = pd.read_csv('cardio_train.csv', sep=';')

In [None]:
# Calculate age in years
df['age_years'] = df['age'] / 365

In [None]:
# Display DataFrame information
df.info()

In [None]:
# Describe the DataFrame
df.describe()

# Visualize Gender vs Cardio Counts

This section visualizes the relationship between gender and cardio outcomes. The plot shows the counts of cardio outcomes (0 and 1) for each gender (1 and 2).

In [None]:
# Group data by gender and cardio outcome
gender_cardio_counts = df.groupby(['gender', 'cardio']).size().unstack(fill_value=0)

# Plot grouped bar chart
ax = gender_cardio_counts.plot(kind='bar', stacked=False)
plt.title('Counts of Cardio Outcomes by Gender')
plt.xlabel('Gender (1 = Female, 2 = Male)')
plt.ylabel('Count')
plt.xticks([0, 1], ['Female', 'Male'], rotation=0)
plt.legend(title='Cardio Outcome', labels=['No Disease (0)', 'Disease (1)'])
plt.tight_layout()
plt.show()

In [None]:
# Calculate counts and percentage of cardio infection by gender
gender_counts = df['gender'].value_counts().sort_index()
cardio_infection_counts = df[df['cardio'] == 1]['gender'].value_counts().sort_index()
infection_percentage = (cardio_infection_counts / gender_counts * 100).round(2)

print("Gender counts:")
print(gender_counts)
print("\nCardio infection counts (cardio=1):")
print(cardio_infection_counts)
print("\nPercentage of infection by gender:")
print(infection_percentage)

In [None]:
# Box plot of systolic blood pressure (ap_hi) by gender
plt.figure(figsize=(8, 6))
sns.boxplot(x='gender', y='ap_hi', data=df)
plt.xlabel('Gender (1 = Female, 2 = Male)')
plt.ylabel('Systolic Blood Pressure (ap_hi)')
plt.title('Box Plot of Systolic Blood Pressure by Gender')
plt.xticks([0, 1], ['Female', 'Male'])
plt.tight_layout()
plt.show()

In [None]:
# Box plot of systolic blood pressure (ap_hi) by gender
plt.figure(figsize=(8, 6))
sns.boxplot(x='gender', y='ap_lo', data=df)
plt.xlabel('Gender (1 = Female, 2 = Male)')
plt.ylabel('Systolic Blood Pressure (ap_lo)')
plt.title('Box Plot of Systolic Blood Pressure by Gender')
plt.xticks([0, 1], ['Female', 'Male'])
plt.tight_layout()
plt.show()