In [1]:
import pandas as pd
import numpy as np

import seaborn as sns

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
# Dataset from kaggle
# https://www.kaggle.com/dileep070/heart-disease-prediction-using-logistic-regression
dataset = pd.read_csv('framingham.csv')
dataset.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [3]:
# Drop all the NAs
dataset.dropna(inplace = True)

In [4]:
# Identify the dependent and Independent Variables
X = dataset.iloc[:,:-1].values
Y = dataset.iloc[:,-1].values

In [5]:
# Split into training and test dataset
X_train , X_test , Y_train , Y_test= train_test_split(X, Y, test_size = 0.2)

In [6]:
clf = SVC(gamma="auto")

In [7]:
clf.fit(X_train , Y_train)

SVC(gamma='auto')

In [8]:
Y_pred1 = clf.predict(X_test)

In [9]:
# Confusion Matrix
cm_score = confusion_matrix(Y_pred1, Y_test)
print("Confusion Matrix")
print(cm_score)

# Accuracy Score
acc_score = accuracy_score(Y_pred1, Y_test)
print("Accuracy Score")
print(acc_score)

Confusion Matrix
[[617 115]
 [  0   0]]
Accuracy Score
0.842896174863388


#### Using Grid Search to get the best parameters

In [10]:
from sklearn.model_selection import GridSearchCV
#grid search for optimum parameters
C = [0.001, 0.01, 0.1, 1, 10]
gamma = [0.001, 0.01, 0.1, 1]
param_grid = {'C': C, 'gamma' : gamma}
best_clf = GridSearchCV(SVC(kernel='rbf', probability=True), param_grid, cv=10)

In [11]:
# train the model
best_clf.fit(X_train,Y_train)
best_clf.best_params_

{'C': 1, 'gamma': 0.01}

In [None]:
best_predict = best_clf.predict(X_test)

In [None]:
# Confusion Matrix
cm_score = confusion_matrix(best_predict, Y_test)
print("Confusion Matrix")
print(cm_score)

# Accuracy Score
acc_score = accuracy_score(best_predict, Y_test)
print("Accuracy Score")
print(acc_score)

#### Reference Script
https://www.kaggle.com/amayomordecai/heart-disease-risk-prediction-machine-learning