# Heart disease Prediction : SVM Approach

### importing Libraries

In [15]:
import numpy as np
import pandas as pd

### Reading Dataset

In [16]:
df = pd.read_csv("heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


### Checking Dataset for irregular data

In [17]:
df.dtypes

age           int64
sex           int64
cp            int64
trtbps        int64
chol          int64
fbs           int64
restecg       int64
thalachh      int64
exng          int64
oldpeak     float64
slp           int64
caa           int64
thall         int64
output        int64
dtype: object

In [18]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [19]:
x = df[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall']]
x = np.asarray(x)
x[:3]

array([[ 63. ,   1. ,   3. , 145. , 233. ,   1. ,   0. , 150. ,   0. ,
          2.3,   0. ,   0. ,   1. ],
       [ 37. ,   1. ,   2. , 130. , 250. ,   0. ,   1. , 187. ,   0. ,
          3.5,   0. ,   0. ,   2. ],
       [ 41. ,   0. ,   1. , 130. , 204. ,   0. ,   0. , 172. ,   0. ,
          1.4,   2. ,   0. ,   2. ]])

In [20]:
y = df['output']
y = np.asarray(y)
y[:5]

array([1, 1, 1, 1, 1], dtype=int64)

### Splitting Dataset into **Train** and **Test** parts

In [21]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

## Creating and Training SVM Model with different Kernels

class sklearn.svm.SVC(*, C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None)

In [36]:
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import jaccard_score

kernels = ["linear", "poly", "rbf", "sigmoid"]
results = list()

for kernel in kernels:
    clf = svm.SVC(kernel=kernel)
    clf.fit(x_train, y_train) 
    
    yhat = clf.predict(x_test)
    
    accuracy = accuracy_score(y_test, yhat)
    f1Score = f1_score(y_test, yhat, average='weighted')
    jaccardscore = jaccard_score(y_test, yhat)
    
    score = [kernel, f1Score, jaccardscore, accuracy]
    results.append(score)

### Sorting and Displaying results in a human readable pandas dataset

In [37]:
results.sort(key=lambda x: x[3], reverse=True)
results = pd.DataFrame(results, columns=["Kernel", "F1 Score", "Jaccard Score", "Accuracy"])
results

Unnamed: 0,Kernel,F1 Score,Jaccard Score,Accuracy
0,linear,0.917758,0.871795,0.918033
1,poly,0.714877,0.638298,0.721311
2,rbf,0.65114,0.607843,0.672131
3,sigmoid,0.43806,0.590164,0.590164


### Getting the most efficient result(s)

In [38]:
max_score = results["Accuracy"].max()
max_result = results[results["Accuracy"] == max_score]
max_result

Unnamed: 0,Kernel,F1 Score,Jaccard Score,Accuracy
0,linear,0.917758,0.871795,0.918033
