## Be Heart Smart

In [None]:
# Import our dependencies
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np

In [None]:
%matplotlib notebook

Features:

1. Age | Objective Feature | age | int (days)
2. Height | Objective Feature | height | int (cm) |
3. Weight | Objective Feature | weight | float (kg) |
4. Gender | Objective Feature | gender | categorical code |1= women, 2= men
5. Systolic blood pressure | Examination Feature | ap_hi | int |
6. Diastolic blood pressure | Examination Feature | ap_lo | int |
7. Cholesterol | Examination Feature | cholesterol | 1: Normal (<200), 2: Moderate (200 - 239), 3: High (>240) |
8. Glucose | Examination Feature | gluc | 1: Normal (<100), 2:Moderate (100 - 125), 3: High (>126) |
9. Smoking | Subjective Feature | smoke | binary |
10. Alcohol intake | Subjective Feature | alco | binary |
11. Physical activity | Subjective Feature | active | binary |
12. Presence or absence of cardiovascular disease | Target Variable | cardio | binary |


In [None]:
path = ("../Resources/Cardio_data.csv")
cardio_df = pd.read_csv(path)
cardio_df.head()

In [None]:
# Rename columns
cardio_df = cardio_df.rename(columns={'gluc': 'glucose','alco':'alcohol_intake'})
cardio_df.head()

In [None]:
# list of column names
cardio_df.columns

In [None]:
# Check the data type
cardio_df.dtypes

In [None]:
# Checking for null values
cardio_df.count()

In [None]:
# Change the age from days to years.
cardio_df["age_in_years"] = round(cardio_df["age"]/365,0)
cardio_df.head()

In [None]:
cardio_df.shape

In [None]:
# Dropping the age column
cardio_df = cardio_df.drop(["age"], axis=1)
cardio_df.head()

In [None]:
cardio_df = cardio_df[['id', 'age_in_years','gender', 'height', 'weight','systole','diastole','cholesterol','glucose','smoke','alcohol_intake','active','cardio']]
cardio_df.head()


## Cleaning up the dataset

In [None]:
data = [cardio_df["systole"]]
fig1, ax1 = plt.subplots(figsize = (8,8))
ax1.set_title('Systole outliers')
ax1.boxplot(data, notch= True, showfliers=False)
plt.show()

In [None]:
# Filter all rows for which the patients  systole is greater than 400 and less than 80
new_df = cardio_df.drop(cardio_df[(cardio_df["systole"] > 200)].index)
new_df = new_df.drop(new_df[(new_df["systole"] < 80)].index)
print(new_df.shape)
new_df.head()

In [None]:
data = [cardio_df["diastole"]]
fig1, ax1 = plt.subplots(figsize = (8,8))
ax1.set_title('Diastole outliers')
ax1.boxplot(data, notch= True, showfliers=False)
plt.show()

In [None]:
# Filter all rows for which the patients diastole is greater than 585 and less than 50
diastole_df = new_df.drop(new_df[(new_df["diastole"] > 200)].index)
diastole_df = diastole_df.drop(diastole_df[(diastole_df["diastole"] < 50)].index)
print(diastole_df.shape)
diastole_df.head()

In [None]:
data = [cardio_df["weight"]]
fig1, ax1 = plt.subplots(figsize = (8,8))
ax1.set_title('Weight outliers')
ax1.boxplot(data, notch= True, showfliers=False)
plt.show()

In [None]:
# Filter all rows for which the patients weight is less than 60kgs
weight_df = diastole_df.drop(diastole_df[(diastole_df["weight"] < 60)].index)
print(weight_df.shape)
weight_df.head()

In [None]:
data = [cardio_df["height"]]
fig1, ax1 = plt.subplots(figsize = (8,8))
ax1.set_title('Height outliers')
ax1.boxplot(data, notch= True,showfliers=False)
plt.show()

In [None]:
# Filter all rows for which the patients height is less than 140 cms
height_df = weight_df.drop(weight_df[(weight_df["height"] < 140)].index)
height_df = height_df.drop(height_df[(height_df["height"]> 200)].index)
print(height_df.shape)
height_df.head()

In [None]:
cholesterol_cardio_df = height_df["cardio"].groupby(height_df["cholesterol"]).mean()
cholesterol_cardio_df.head()

In [None]:
cholesterol_cardio_df.plot.bar(color='r', alpha=0.5, align="center")
# Create labels for the x and y axes.
plt.xlabel("Cholesterol levels")
plt.ylabel("Number")
# Create a title.
plt.title("Cardiac disease based on Cholesterol Levels")
# Add the legend.
plt.legend()


In [None]:
print(f" People with very high Cholesterol level have a greater chance of developing heart disease.")

In [None]:
glucose_cardio_df = height_df["cardio"].groupby(height_df["glucose"]).mean()
glucose_cardio_df.head()

In [None]:
glucose_cardio_df.plot.bar(color='green', alpha=0.5, align="center")
# Create labels for the x and y axes.
plt.xlabel("Glucose levels")
plt.ylabel("Number")
# Create a title.
plt.title("Cardiac disease based on Glucose Levels")
# Add the legend.
plt.legend()

In [None]:
print(f" People with very high Glucose level have a greater chance of developing heart disease.")

In [None]:
gender_cardio_df = height_df["cardio"].groupby(height_df["gender"]).mean()
gender_cardio_df.head()

In [None]:
gender_cardio_df.plot.bar(color='blue', alpha=0.5, align="center")
# Create labels for the x and y axes.
plt.xlabel("gender")
plt.ylabel("Number")
# Create a title.
plt.title("Cardiac disease based on Gender")
# Add the legend.
plt.legend()

In [None]:
alcohol_cardio_df = height_df["cardio"].groupby(height_df["alcohol_intake"]).mean()
alcohol_cardio_df.head()

In [None]:
alcohol_cardio_df.plot.bar(color='cyan', alpha=0.5, align="center")
# Create labels for the x and y axes.
plt.xlabel("Alcohol Consumption")
plt.ylabel("Number")
# Create a title.
plt.title("Cardiac disease based on Alcohol Consumption")
# Add the legend.
plt.legend()

In [None]:
print(f" Alcohol consumption alone doesnot Contribute to developing heart disease.")