## Heart Faliure Prediction using SVM(Support Vector Machine)

Athul Mathew Konoor - 20016  M-Tech AI and DS 19AI613 Machine Learning Lab Evaluation- Hyper Parameter Tuning.

In [None]:
import pandas as pd

import matplotlib.pyplot as plt

import scikitplot as skplt

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report 
from sklearn.linear_model import SGDClassifier

from sklearn.svm import SVC

## Feature Selection

In [None]:
# Read csv file into dataframe
df = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

Y = df['DEATH_EVENT']
X = df[['age', 'ejection_fraction','serum_creatinine','time']]
X.head()

In [None]:
import seaborn as sns

# Heatmap to Invertigate Correlation in Data
sns.set()
fig, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(df.corr(), linewidths=.5, ax=ax, cmap='Blues')

plt.show()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify = Y, test_size=0.2, random_state=52)

print('Shape of X_train:', X_train.shape)
print('Shape of X_test:', X_test.shape)
print('Shape of Y_train:', Y_train.shape)
print('Shape of Y_test:', Y_test.shape)

## Try different kernel tuning mecahnisms

In [None]:
# Changing the kernel function

kernels = ["linear", "poly", "rbf", "sigmoid"]

In [None]:
report_listt = []

i = 1
for k in kernels:
    print("Model",i,"with Kernel =", k)
    i = i + 1
    model = SVC(kernel= k, C = .01)
    model.fit(X_train, Y_train)

    Y_predict = model.predict(X_test)
    
    report = classification_report(Y_test, Y_predict, output_dict=True)
    report_listt.append(report)
    
    print(classification_report(Y_test, Y_predict))

In [None]:
import matplotlib.pyplot as plt

y_prec = []
y_rec = []

for i in range(len(kernels)):
    y_prec.append(report_listt[i]['macro avg']['precision'])
    y_rec.append(report_listt[i]['macro avg']['recall'])
    
for i in range(len(kernels)):
    print("Kernel -",kernels[i]," :: Avg Accuracy -", y_prec[i])
    
# creating the bar plot 
plt.plot(kernels, y_prec)
plt.title("Kernels) vs Average Precision")
plt.xlabel("Kernels")
plt.ylabel("Average Precision")
plt.savefig("kernels_svm1.png")
plt.show()

Only Linear kernel provides good accuracy. We now have Regualarisation and Gamma parameter to tune.

## Hyperparameter Tuning of C (Regularization parameter)

In [None]:
# Change values of c to identify the best model.

clistt = [10, 1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0.001]

report_listt = []

i = 1
for c in clistt:
    print("Model",i,"with C =", c)
    i = i + 1
    model = SVC(kernel='linear', C = c)
    model.fit(X_train, Y_train)

    Y_predict = model.predict(X_test)
    
    report = classification_report(Y_test, Y_predict, output_dict=True)
    report_listt.append(report)
    
    print(classification_report(Y_test, Y_predict))
    

## Plot - C(Regularization parameter) vs Average Precision.

In [None]:
import matplotlib.pyplot as plt

y_prec = []

for i in range(len(report_listt)):
    y_prec.append(report_listt[i]['macro avg']['precision'])
    
for i in range(len(clistt)):
    print("C(Regularization parameter) -",clistt[i]," :: Avg Accuracy -", y_prec[i])


In [None]:
# creating the line plot 
plt.plot(clistt, y_prec)
plt.title("C(Regularization parameter) vs Average Precision")
plt.xlabel("C(Regularization parameter)")
plt.ylabel("Average Precision")
plt.savefig("c_svm.png")
plt.show()

## Hyperparameter Tuning of Gamma (Kernel coefficient)

In [None]:
# Change values of gamma to identify the best model.

glistt = [1, 0.1, 0.01, 0.001, 0.0001]

report_listt = []

i = 1
for g in glistt:
    print("Model",i,"with gamma =", g)
    i = i + 1
    model = SVC(kernel='linear', gamma = g)
    model.fit(X_train, Y_train)

    Y_predict = model.predict(X_test)
    
    report = classification_report(Y_test, Y_predict, output_dict=True)
    report_listt.append(report)
    
    print(classification_report(Y_test, Y_predict))

## Plot - Gamma (Kernel coefficient) vs Average Precision.

In [None]:
import matplotlib.pyplot as plt

y_prec = []

for i in range(len(report_listt)):
    y_prec.append(report_listt[i]['macro avg']['precision'])
    
for i in range(len(glistt)):
    print("Gamma (Kernel coefficient) -",glistt[i]," :: Avg Accuracy -", y_prec[i])

In [None]:
# creating the bar plot 
plt.plot(glistt, y_prec)
plt.title("Gamma (Kernel coefficient) vs Average Precision.")
plt.xlabel("Gamma (Kernel coefficient)")
plt.ylabel("Average Precision")
plt.savefig("gamma_svm.png")
plt.show()

All gamma values gives the same precision as per above live chart

## Confusion Matrix and Classification Report of current best Model

In [None]:
# Best model is for  C  =  0.2
from sklearn.metrics import accuracy_score

model = SVC(kernel='linear', C = .2)
model.fit(X_train, Y_train)

Y_predict = model.predict(X_test)

skplt.metrics.plot_confusion_matrix(Y_test, Y_predict, figsize=(4,4), 
                                title='Confusion Matrix: SVM',
                                normalize=True,
                                cmap='Blues')

s1 = accuracy_score(Y_test, Y_predict)
s1

In [None]:
print(classification_report(Y_test, Y_predict))

## Apply Grid search feature in sklearn to find best hyperparamenters(C, gamma).

Applying Gridsearch to find the best Model.

In [None]:
# from sklearn.model_selection import GridSearchCV 
  
# # defining parameter range 
# param_grid = {'C': [0.01, 0.1, 0.2, 0.5, 1, 10, 100, 1000],  
#               'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
#               'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}  
  
# grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 
  
# # fitting the model for grid search 
# grid.fit(X_train, Y_train)

In [None]:
# # print best parameter after tuning 
# print(grid.best_params_) 
  
# # print how our model looks after hyper-parameter tuning 
# print(grid.best_estimator_)

## Best SVM using Grid Search

In [None]:
model = SVC(kernel='poly', C = 0.001, gamma = 0.01)
model.fit(X_train, Y_train)

Y_predict = model.predict(X_test)

skplt.metrics.plot_confusion_matrix(Y_test, Y_predict, figsize=(6,6), 
                                    title='Confusion Matrix: SVM',
                                    normalize=True,
                                    cmap='Blues')

s1 = accuracy_score(Y_test, Y_predict)

plt.show()
s1

In [None]:
report = classification_report(Y_test, Y_predict)
print(report)

## Final Model Confusion matrix and Classification Report

In [None]:
model = SVC(kernel='poly', C = 0.001, gamma = 0.01)
model.fit(X_train, Y_train)

Y_predict = model.predict(X_test)

skplt.metrics.plot_confusion_matrix(Y_test, Y_predict, figsize=(6,6), 
                                    title='Confusion Matrix: SVM',
                                    normalize=True,
                                    cmap='Blues')

s1 = accuracy_score(Y_test, Y_predict)

plt.show()
s1