In [5]:
import pandas as pd

# Load the dataset
df = pd.read_csv('D:/studies girl/certificate/data.csv')

# Print the columns to verify
print(df.columns)


Index(['Feature', 'Variable Type', 'Variable', 'Value Type'], dtype='object')


In [6]:
# Strip any leading/trailing spaces from column names
df.columns = df.columns.str.strip()


In [7]:
print(df.head())  # Print the first few rows to check the data


                   Feature         Variable Type Variable        Value Type
0                      Age    Objective\nFeature      age        int (days)
1                   Height    Objective\nFeature   height          int (cm)
2                   Weight    Objective\nFeature   weight        float (kg)
3                   Gender    Objective\nFeature   gender  categorical code
4  Systolic blood pressure  Examination\nFeature    ap_hi               int


In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset
df = pd.read_csv('D:/studies girl/certificate/data.csv')

# Print column names to debug
print(df.columns)

# If column names have extra spaces, clean them
df.columns = df.columns.str.strip()

# Check the first few rows of the dataframe to understand its structure
print(df.head())

# Ensure the column 'height' exists and proceed
if 'height' in df.columns:
    # 1. Add an overweight column
    df['height_m'] = df['height'] / 100  # Convert height to meters
    df['BMI'] = df['weight'] / (df['height_m'] ** 2)  # Calculate BMI
    df['overweight'] = df['BMI'].apply(lambda x: 1 if x > 25 else 0)  # Determine overweight
    df.drop(columns=['height_m', 'BMI'], inplace=True)  # Drop temporary columns

    # 2. Normalize the data
    df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1)
    df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1)

    # 3. Draw the Categorical Plot
    def draw_cat_plot():
        # Convert data to long format
        df_cat = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])
        
        # Group and reformat the data
        df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='total')
        df_cat = df_cat.rename(columns={'variable': 'feature'})
        
        # Draw the categorical plot
        fig = sns.catplot(x='feature', hue='value', col='cardio', data=df_cat, kind='count')
        fig.set_axis_labels('Feature', 'Count')
        fig.set_titles('Cardio: {col_name}')
        
        plt.show()

    draw_cat_plot()

    # 4. Clean the data
    df_heat = df[
        (df['ap_lo'] <= df['ap_hi']) &
        (df['height'] >= df['height'].quantile(0.025)) &
        (df['height'] <= df['height'].quantile(0.975)) &
        (df['weight'] >= df['weight'].quantile(0.025)) &
        (df['weight'] <= df['weight'].quantile(0.975))
    ]

    # 5. Draw the Heat Map
    def draw_heat_map():
        # Calculate correlation matrix
        corr = df_heat.corr()
        
        # Generate a mask for the upper triangle
        mask = np.triu(np.ones_like(corr, dtype=bool))
        
        # Set up the matplotlib figure
        plt.figure(figsize=(12, 10))
        
        # Draw the heatmap
        sns.heatmap(corr, mask=mask, annot=True, fmt='.1f', cmap='coolwarm', center=0,
                    square=True, linewidths=0.5, cbar_kws={"shrink": .5})
        
        plt.show()

    draw_heat_map()
else:
    print("Column 'height' not found in the dataset.")


Index(['Feature', 'Variable Type', 'Variable', 'Value Type'], dtype='object')
                   Feature         Variable Type Variable        Value Type
0                      Age    Objective\nFeature      age        int (days)
1                   Height    Objective\nFeature   height          int (cm)
2                   Weight    Objective\nFeature   weight        float (kg)
3                   Gender    Objective\nFeature   gender  categorical code
4  Systolic blood pressure  Examination\nFeature    ap_hi               int
Column 'height' not found in the dataset.
