## Support Vector Machine classifier 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 


In [2]:
df = pd.read_csv('diabetes.csv')

In [4]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [7]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [9]:
X

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [10]:
from sklearn.model_selection import train_test_split


In [11]:
# splitting the data into train and test data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [12]:
# lets scale the data 
from sklearn.preprocessing import StandardScaler

In [16]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
from sklearn.svm import SVC

In [18]:
svc_classifier = SVC()

In [19]:
svc_classifier.fit(X_train_scaled, y_train)

SVC()

In [20]:
y_pred = svc_classifier.predict(X_test_scaled)

In [21]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [22]:
print(confusion_matrix(y_true= y_test, y_pred= y_pred))
print(accuracy_score(y_true= y_test, y_pred= y_pred))
print(classification_report(y_true= y_test, y_pred= y_pred))

[[134  12]
 [ 36  49]]
0.7922077922077922
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       146
           1       0.80      0.58      0.67        85

    accuracy                           0.79       231
   macro avg       0.80      0.75      0.76       231
weighted avg       0.79      0.79      0.78       231



## Hyper-parameter tuning

In [47]:
parameters = {
    'C': [0.1, 0.3, 0.5, 0.75, 1 ],
    'kernel':['linear', 'poly', 'rbf', 'sigmoid']

}

In [48]:
from sklearn.model_selection import GridSearchCV

In [51]:
classifier_cv = GridSearchCV(estimator=svc_classifier, param_grid=parameters, cv= 5)

In [52]:
classifier_cv.fit(X_train_scaled, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 0.3, 0.5, 0.75, 1],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']})

In [53]:
classifier_cv.best_params_

{'C': 1, 'kernel': 'linear'}

In [54]:
y_pred_cv = classifier_cv.predict(X_test_scaled)

In [55]:
print(accuracy_score(y_test, y_pred_cv))

0.7922077922077922
