# Pandas & Numpy

In [1]:
import pandas as pd
import numpy as np

# Loading Dataset

In [2]:
data = pd.read_csv("../assets/heart-disease.csv", na_filter=False)
print(data.head())
print(data.dtypes.nunique())


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
2


In [3]:
# One Hot Encoding
pd.get_dummies(data)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


# Handle missing values

In [4]:
# Handle missing values
data = data.fillna(data.mean())
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


# Data Cleaning

In [5]:
# Data Cleaning
data = data.drop_duplicates()
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


# Data Transformation

In [6]:
# Data Transformation
data = pd.get_dummies(data, columns=["age"])
data

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,...,age_65,age_66,age_67,age_68,age_69,age_70,age_71,age_74,age_76,age_77
0,1,3,145,233,1,0,150,0,2.3,0,...,0,0,0,0,0,0,0,0,0,0
1,1,2,130,250,0,1,187,0,3.5,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,130,204,0,0,172,0,1.4,2,...,0,0,0,0,0,0,0,0,0,0
3,1,1,120,236,0,1,178,0,0.8,2,...,0,0,0,0,0,0,0,0,0,0
4,0,0,120,354,0,1,163,1,0.6,2,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,0,0,140,241,0,1,123,1,0.2,1,...,0,0,0,0,0,0,0,0,0,0
299,1,3,110,264,0,1,132,0,1.2,1,...,0,0,0,0,0,0,0,0,0,0
300,1,0,144,193,1,1,141,0,3.4,1,...,0,0,0,1,0,0,0,0,0,0
301,1,0,130,131,0,1,115,1,1.2,1,...,0,0,0,0,0,0,0,0,0,0


# Normalization


In [7]:
# Normalization
data = (data - data.mean()) / data.std()
data

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,...,age_65,age_66,age_67,age_68,age_69,age_70,age_71,age_74,age_76,age_77
0,0.681525,1.973195,0.762800,-0.260852,2.385833,-1.000880,0.018795,-0.697187,1.082226,-2.267418,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
1,0.681525,1.004244,-0.091249,0.067628,-0.417753,0.900163,1.634266,-0.697187,2.115415,-2.267418,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
2,-1.462439,0.035293,-0.091249,-0.821201,-0.417753,-1.000880,0.979345,-0.697187,0.307334,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
3,0.681525,0.035293,-0.660615,-0.202885,-0.417753,0.900163,1.241314,-0.697187,-0.209261,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
4,-1.462439,-0.933658,-0.660615,2.077155,-0.417753,0.900163,0.586393,1.429586,-0.381459,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,-1.462439,-0.933658,0.478117,-0.106273,-0.417753,0.900163,-1.160063,1.429586,-0.725855,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
299,0.681525,1.973195,-1.229981,0.338141,-0.417753,0.900163,-0.767111,-0.697187,0.135136,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
300,0.681525,-0.933658,0.705863,-1.033747,2.385833,0.900163,-0.374158,-0.697187,2.029316,-0.644764,...,-0.164684,-0.153786,-0.174971,8.617036,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
301,0.681525,-0.933658,-0.091249,-2.231734,-0.417753,0.900163,-1.509354,1.429586,0.135136,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544


# Data Splitting


In [8]:
# Data Splitting
train_data = data.iloc[:int(data.shape[0] * 0.8), :]
test_data = data.iloc[int(data.shape[0] * 0.8):, :]

data

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,...,age_65,age_66,age_67,age_68,age_69,age_70,age_71,age_74,age_76,age_77
0,0.681525,1.973195,0.762800,-0.260852,2.385833,-1.000880,0.018795,-0.697187,1.082226,-2.267418,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
1,0.681525,1.004244,-0.091249,0.067628,-0.417753,0.900163,1.634266,-0.697187,2.115415,-2.267418,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
2,-1.462439,0.035293,-0.091249,-0.821201,-0.417753,-1.000880,0.979345,-0.697187,0.307334,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
3,0.681525,0.035293,-0.660615,-0.202885,-0.417753,0.900163,1.241314,-0.697187,-0.209261,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
4,-1.462439,-0.933658,-0.660615,2.077155,-0.417753,0.900163,0.586393,1.429586,-0.381459,0.977891,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,-1.462439,-0.933658,0.478117,-0.106273,-0.417753,0.900163,-1.160063,1.429586,-0.725855,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
299,0.681525,1.973195,-1.229981,0.338141,-0.417753,0.900163,-0.767111,-0.697187,0.135136,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
300,0.681525,-0.933658,0.705863,-1.033747,2.385833,0.900163,-0.374158,-0.697187,2.029316,-0.644764,...,-0.164684,-0.153786,-0.174971,8.617036,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
301,0.681525,-0.933658,-0.091249,-2.231734,-0.417753,0.900163,-1.509354,1.429586,0.135136,-0.644764,...,-0.164684,-0.153786,-0.174971,-0.115665,-0.100001,-0.115665,-0.100001,-0.057544,-0.057544,-0.057544
