# EE0005 Group Project

In [1]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import math
sb.set()


    Age | Objective Feature | age | int (days)
    Height | Objective Feature | height | int (cm) |
    Weight | Objective Feature | weight | float (kg) |
    Gender | Objective Feature | gender | categorical code |
    Systolic blood pressure | Examination Feature | ap_hi | int |
    Diastolic blood pressure | Examination Feature | ap_lo | int |
    Cholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well above normal |
    Glucose | Examination Feature | gluc | 1: normal, 2: above normal, 3: well above normal |
    Smoking | Subjective Feature | smoke | binary |
    Alcohol intake | Subjective Feature | alco | binary |
    Physical activity | Subjective Feature | active | binary |
    Presence or absence of cardiovascular disease | Target Variable | cardio | binary |


In [2]:
heartdata = pd.read_csv('cardio_train.csv')
heartdata.head(n = 10000)

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,14249,20427,1,166,65.0,130,80,1,1,0,0,0,0
9996,14251,18214,1,160,93.0,140,80,2,1,0,0,1,1
9997,14252,14665,1,158,66.0,130,90,2,2,0,0,1,0
9998,14253,18328,1,168,70.0,160,80,3,1,0,0,1,1


In [3]:
heartdata.shape

(70000, 13)

In [4]:
heartdata.dtypes

id               int64
age              int64
gender           int64
height           int64
weight         float64
ap_hi            int64
ap_lo            int64
cholesterol      int64
gluc             int64
smoke            int64
alco             int64
active           int64
cardio           int64
dtype: object

In [5]:
heartdata['years'] = (heartdata['age']/365).round().astype('int')
years = pd.DataFrame(heartdata['years'])
years.head()

Unnamed: 0,years
0,50
1,55
2,52
3,48
4,48


In [6]:
height = pd.DataFrame(heartdata['height'])
height.head()

Unnamed: 0,height
0,168
1,156
2,165
3,169
4,156


In [7]:
weight = pd.DataFrame(heartdata['weight'])
weight.head()

Unnamed: 0,weight
0,62.0
1,85.0
2,64.0
3,82.0
4,56.0


In [8]:
heartdata['bmi'] = heartdata['weight'] / (heartdata['height']/100)**2
bmi = pd.DataFrame(heartdata['bmi'])
bmi.head()

Unnamed: 0,bmi
0,21.96712
1,34.927679
2,23.507805
3,28.710479
4,23.011177


MAP is Mean Arterial Pressure

In [9]:
heartdata['MAP'] = ((2 * heartdata['ap_lo'] + heartdata['ap_hi'])/3)
MAP = pd.DataFrame(heartdata['MAP'])
MAP.head()

Unnamed: 0,MAP
0,90.0
1,106.666667
2,90.0
3,116.666667
4,73.333333


In [10]:
newheartdata = heartdata.drop(columns = ['id', 'age'])

In [92]:
newheartdata.head()

Unnamed: 0,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,years,bmi,MAP
0,2,168,62.0,110,80,1,1,0,0,1,0,50,21.96712,90.0
1,1,156,85.0,140,90,3,1,0,0,1,1,55,34.927679,106.666667
2,1,165,64.0,130,70,3,1,0,0,0,1,52,23.507805,90.0
3,2,169,82.0,150,100,1,1,0,0,1,1,48,28.710479,116.666667
4,1,156,56.0,100,60,1,1,0,0,0,0,48,23.011177,73.333333


In [30]:
male_df = pd.DataFrame(newheartdata[newheartdata['gender'] == 1])
male_df

Unnamed: 0,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,years,bmi,MAP
1,1,156,85.0,140,90,3,1,0,0,1,1,55,34.927679,106.666667
2,1,165,64.0,130,70,3,1,0,0,0,1,52,23.507805,90.000000
4,1,156,56.0,100,60,1,1,0,0,0,0,48,23.011177,73.333333
5,1,151,67.0,120,80,2,2,0,0,0,0,60,29.384676,93.333333
6,1,157,93.0,130,80,3,1,0,0,1,0,61,37.729725,96.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69993,1,172,70.0,130,90,1,1,0,0,1,1,54,23.661439,103.333333
69994,1,165,80.0,150,80,1,1,0,0,1,1,58,29.384757,103.333333
69996,1,158,126.0,140,90,2,2,0,0,1,1,62,50.472681,106.666667
69998,1,163,72.0,135,80,1,2,0,0,0,1,61,27.099251,98.333333


In [31]:
female_df = pd.DataFrame(newheartdata[newheartdata['gender'] == 2])
female_df

Unnamed: 0,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,years,bmi,MAP
0,2,168,62.0,110,80,1,1,0,0,1,0,50,21.967120,90.000000
3,2,169,82.0,150,100,1,1,0,0,1,1,48,28.710479,116.666667
7,2,178,95.0,130,90,3,3,0,0,1,1,62,29.983588,103.333333
11,2,173,60.0,120,80,1,1,0,0,1,0,52,20.047446,93.333333
12,2,165,60.0,120,80,1,1,0,0,0,0,41,22.038567,93.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69981,2,182,110.0,130,90,2,2,0,0,1,1,48,33.208550,103.333333
69984,2,168,80.0,120,80,1,1,0,0,1,1,49,28.344671,93.333333
69986,2,180,78.0,120,80,1,1,0,0,1,0,50,24.074074,93.333333
69995,2,168,76.0,120,80,1,1,1,0,1,0,53,26.927438,93.333333


---

## Problem 1 : Predicting MAP using BMI

Extract the required variables from the dataset, as mentioned in the problem.   

In [32]:
maleBMI = pd.DataFrame(male_df['bmi'])
maleMAP = pd.DataFrame(male_df['MAP'])

In [None]:
trainDF = pd.concat([ho, houseSalePrice], axis = 1).reindex(houseGrLivArea.index)
sb.jointplot(data=trainDF, x='GrLivArea', y='SalePrice', height = 12)