## Introduction
- Using SVM
- Slack Parameters including error cost, C, can be changed (higher C --> less tolerance for misclassification; more accurate; less generalization)
- Kernels can be changed: linear or nonlinear kernels, etc.
- Used to classify Diabetes  
- Display the effect of varying parameters with the Confusion Matrices 

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from IPython import display
import math


In [None]:
# read data from CSV into pd DataFrame
# separate into input and output 
df = pd.read_csv('diabetes.csv')
x = df.iloc[:,:-1].to_numpy()
y = np.ravel(df.iloc[:,-1:].to_numpy())

# normalize the input
X_norm = (x - x.min(axis=0)) / (x.max(axis=0) - x.min(axis=0))

# split into train/test with train_test_split() use the normalized input
train_x, test_x, train_y, test_y = train_test_split(X_norm, y, test_size=0.20)

# define class names (used in Conflusion Matrix later)
class_names = ['Not Diabetic', 'Diabetic']

# set up different SVM models
# change slack parameters and kernels with `C` and `kernel`
C_values = [0.01,1,10]
kernel_values = ['linear','poly','sigmoid','rbf']
cm_storage = []
classification_storage = []

for kernel in kernel_values:
    for C in C_values:
        # create SVM using SVC
        svm_ = SVC(gamma='auto',C=C,kernel=kernel,probability=True)
        svm_.fit(train_x, train_y)

        # accuracy: compare y_svm and test_y
#         y_svm = svm_.predict(test_x)
        y_proba = svm_.predict_proba(test_x)
        y_svm = np.argmax(y_proba,axis=1)
        
        acc_svm = accuracy_score(test_y,y_svm)
        print("Slack: {}".format(C))
        print("Kernel: {}".format(kernel))
        print("SVM Accuracy: {}%".format(acc_svm*100))

        # Confusion Matrix
        cm_svm = confusion_matrix(test_y, y_svm)
        report = classification_report(test_y, y_svm, target_names=class_names)
        cm_storage.append(cm_svm)
        classification_storage.append(report)
        print(cm_svm)
        print(report)
        print("*********************")

#         disp = ConfusionMatrixDisplay(confusion_matrix=cm_svm,display_labels=class_names)
#         disp.plot()

In [None]:
print(classification_storage[0])
for i in range(12):
    print(classification_storage[i])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_storage[i],display_labels=class_names)
    disp.plot()