In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# 1. Import the data from medical_examination.csv and assign it to the df variable.
df = pd.read_csv('medical_examination.csv')

# 2. Add an 'overweight' column to the data.
# Calculate BMI: weight in kg divided by the square of height in meters.
# Height is in cm, so we convert it to meters.
df['overweight'] = (df['weight'] / ((df['height'] / 100) ** 2)).apply(
    lambda bmi: 1 if bmi > 25 else 0)

# 3. Normalize data by making 0 always good and 1 always bad.
# If the value of cholesterol or gluc is 1, set the value to 0.
# If the value is more than 1, set the value to 1.
df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1)
df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1)


def draw_cat_plot():
    # 4. Create a DataFrame for the cat plot using pd.melt with values from
    # cholesterol, gluc, smoke, alco, active, and overweight in the df_cat variable.
    df_cat = pd.melt(
        df,
        id_vars=['cardio'],
        value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])

    # 5. Group and reformat the data in df_cat to split it by cardio.
    # Show the counts of each feature.
    # You will have to rename one of the columns for the catplot to work correctly.
    df_cat = pd.DataFrame(
        df_cat.groupby(
            ['cardio', 'variable', 'value']
        )['value'].count()
    ).rename(columns={'value': 'total'}).reset_index()

    # 6. Convert the data into long format and create a chart that shows the
    # value counts of the categorical features using the following method
    # provided by the seaborn library import: sns.catplot().
    fig = sns.catplot(
        x='variable',
        y='total',
        hue='value',
        col='cardio',
        data=df_cat,
        kind='bar'
    ).fig

    # 7. Get the figure for the output and store it in the fig variable.
    # This is already done in the previous step.

    # 8. Do not modify the next two lines
    fig.savefig('catplot.png')
    return fig


def draw_heat_map():
    # 9. Clean the data in the df_heat variable by filtering out the following
    # patient segments that represent incorrect data:
    #   - diastolic pressure is higher than systolic
    #   - height is less than the 2.5th percentile
    #   - height is more than the 97.5th percentile
    #   - weight is less than the 2.5th percentile
    #   - weight is more than the 97.5th percentile
    df_heat = df[(df['ap_lo'] <= df['ap_hi']) &
                 (df['height'] >= df['height'].quantile(0.025)) &
                 (df['height'] <= df['height'].quantile(0.975)) &
                 (df['weight'] >= df['weight'].quantile(0.025)) &
                 (df['weight'] <= df['weight'].quantile(0.975))]

    # 10. Calculate the correlation matrix and store it in the corr variable.
    corr = df_heat.corr()

    # 11. Generate a mask for the upper triangle and store it in the mask variable.
    mask = np.triu(corr)

    # 12. Set up the matplotlib figure.
    fig, ax = plt.subplots(figsize=(12, 10))

    # 13. Plot the correlation matrix using the method provided by the seaborn
    # library import: sns.heatmap().
    sns.heatmap(
        corr,
        mask=mask,
        annot=True,
        fmt='.1f',
        linewidths=.5,
        ax=ax,
        center=0,
        square=True,
        cbar_kws={"shrink": .8}
    )

    # 14. Do not modify the next two lines.
    fig.savefig('heatmap.png')
    return fig
