## Import The Data

In [155]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import recall_score

import warnings
warnings.filterwarnings('ignore')

In [156]:
df = pd.read_csv('heart.csv')

## Understand The Data

In [157]:
print(df.shape[0])
print(df.shape[1])

1025
14


In [158]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [159]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [160]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434146,0.69561,0.942439,131.611707,246.0,0.149268,0.529756,149.114146,0.336585,1.071512,1.385366,0.754146,2.323902,0.513171
std,9.07229,0.460373,1.029641,17.516718,51.59251,0.356527,0.527878,23.005724,0.472772,1.175053,0.617755,1.030798,0.62066,0.50007
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,211.0,0.0,0.0,132.0,0.0,0.0,1.0,0.0,2.0,0.0
50%,56.0,1.0,1.0,130.0,240.0,0.0,1.0,152.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,275.0,0.0,1.0,166.0,1.0,1.8,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


## Train the Model 

In [161]:
x, y= df.drop(('target'), axis= 1), df['target']

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size= 0.4, random_state= 9)

In [162]:
forest = RandomForestClassifier(random_state= 9)

forest.fit(xTrain, yTrain)

In [163]:
nb_clf = GaussianNB()

nb_clf.fit(xTrain, yTrain)

In [164]:
gb_clf = GradientBoostingClassifier()

gb_clf.fit(xTrain, yTrain)

In [165]:
scaler = StandardScaler()

xTrainScaled = scaler.fit_transform(xTrain)
xTestScaled = scaler.transform(xTest)

In [166]:
knn = KNeighborsClassifier()

knn.fit(xTrainScaled, yTrain)

In [167]:
log = LogisticRegression()

log.fit(xTrainScaled, yTrain)

In [168]:
svc = SVC()

svc.fit(xTrainScaled, yTrain)

In [169]:
forest.score(xTest, yTest)

0.9853658536585366

In [170]:
nb_clf.score(xTest, yTest)

0.8463414634146341

In [171]:
gb_clf.score(xTest, yTest)

0.9707317073170731

In [172]:
knn.score(xTestScaled, yTest)

0.8536585365853658

In [173]:
log.score(xTestScaled, yTest)

0.8731707317073171

In [174]:
svc.score(xTestScaled, yTest)

0.9341463414634147

In [175]:
yPreds = forest.predict(xTest)
print('FOREST: ', recall_score(yTest, yPreds))

yPreds = nb_clf.predict(xTest)
print('NB: ', recall_score(yTest, yPreds))

yPreds = gb_clf.predict(xTest)
print('GB: ', recall_score(yTest, yPreds))

yPreds = knn.predict(xTestScaled)
print('KNN: ', recall_score(yTest, yPreds))

yPreds = log.predict(xTestScaled)
print('LOG: ', recall_score(yTest, yPreds))

yPreds = svc.predict(xTestScaled)
print('SVC: ', recall_score(yTest, yPreds))

FOREST:  0.9859154929577465
NB:  0.9014084507042254
GB:  0.9859154929577465
KNN:  0.8732394366197183
LOG:  0.92018779342723
SVC:  0.9530516431924883
