# Heart Model

In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("Heart_disease_dataset.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df.shape

(303, 14)

In [6]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

### Feature Scaling

In [8]:
X = df.drop("target", axis="columns")
Y = df["target"]

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [22]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


### Logistic regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score 
lr_score = cross_val_score(LogisticRegression(), X, Y, cv=10)
lr_score.mean()

0.8116129032258066

### Random Forest

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
rf_clf = GridSearchCV(RandomForestClassifier(), {'n_estimators':[ 10, 20, 30, 35, 40, 45]}, cv=10, return_train_score=False)
rf_clf.fit(X, Y)
rf_clf.cv_results_

{'mean_fit_time': array([0.02200005, 0.03600006, 0.05440032, 0.0636014 , 0.07300186,
        0.07830038]),
 'std_fit_time': array([0.00400002, 0.00489891, 0.00549942, 0.00540834, 0.00627744,
        0.00464902]),
 'mean_score_time': array([0.        , 0.00399995, 0.00540006, 0.00620012, 0.0049001 ,
        0.00649998]),
 'std_score_time': array([0.        , 0.00489892, 0.00473706, 0.00404468, 0.00388458,
        0.00449995]),
 'param_n_estimators': masked_array(data=[10, 20, 30, 35, 40, 45],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_estimators': 10},
  {'n_estimators': 20},
  {'n_estimators': 30},
  {'n_estimators': 35},
  {'n_estimators': 40},
  {'n_estimators': 45}],
 'split0_test_score': array([0.90322581, 0.87096774, 0.90322581, 0.90322581, 0.90322581,
        0.90322581]),
 'split1_test_score': array([0.80645161, 0.77419355, 0.77419355, 0.87096774, 0.80645161,
        0.87096774]),
 'split2_te

In [15]:
rf_result_df = pd.DataFrame(rf_clf.cv_results_)
rf_result_df[['param_n_estimators', 'mean_test_score']]

Unnamed: 0,param_n_estimators,mean_test_score
0,10,0.831505
1,20,0.80172
2,30,0.821828
3,35,0.851075
4,40,0.828065
5,45,0.828172


### SVM

In [16]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
svm_clf = GridSearchCV(SVC(), {'C': [1,10,20], 'kernel': ['rbf','linear']}, cv=10, return_train_score=False)
svm_clf.fit(X, Y)
svm_clf.cv_results_

{'mean_fit_time': array([0.00440032, 0.00380018, 0.00410025, 0.02380047, 0.00600002,
        0.04600008]),
 'std_fit_time': array([0.00066336, 0.00060015, 0.00053855, 0.01100767, 0.00489899,
        0.02154069]),
 'mean_score_time': array([0.00099998, 0.00090005, 0.00080001, 0.00020001, 0.        ,
        0.001     ]),
 'std_score_time': array([9.53674316e-08, 5.38588145e-04, 4.00006784e-04, 4.00018696e-04,
        0.00000000e+00, 2.99999714e-03]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20

In [17]:
svm_result_df = pd.DataFrame(svm_clf.cv_results_)
svm_result_df[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.82828
1,1,linear,0.828172
2,10,rbf,0.785484
3,10,linear,0.831505
4,20,rbf,0.788602
5,20,linear,0.831505


### Create Model and save it

In [19]:
svm_clf = SVC(C=10,kernel='linear')
svm_clf.fit(X,Y)

SVC(C=10, kernel='linear')

In [20]:
import pickle
pickle.dump(svm_clf, open('heart.pkl', 'wb'))