# Support Vector Machines with Python

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
%matplotlib inline

## Get the Data

In [None]:
df = pd.read_csv('diabetes.csv')
X = df.drop('Outcome',axis=1)
y = df['Outcome']
X = X[['BMI','Glucose']]
X

## Train Test Split and Standardize

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=5,stratify=y)
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Train the Support Vector Classifier

In [None]:
from sklearn.svm import SVC

In [None]:
model = SVC(C=1,kernel='linear')
model.fit(X_train,y_train)

## Predictions and Evaluations

Now let's predict using the trained model.

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,predictions))

In [None]:
#Functions to plot decision boundaries
def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    return xx, yy

def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

In [None]:
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
model = SVC(C=1,kernel='poly')
model.fit(X_train,y_train)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
model = SVC(C=0.1,kernel='rbf')
model.fit(X_train,y_train)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
model = SVC(C=1,kernel='rbf')
model.fit(X_train,y_train)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
model = SVC(C=10,kernel='rbf')
model.fit(X_train,y_train)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
model = SVC(C=100,kernel='rbf')
model.fit(X_train,y_train)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of SVC ')
# Set-up grid for plotting.
X0, X1 = X_train[:, 0], X_train[:, 1]
xx, yy = make_meshgrid(X0, X1)

plot_contours(ax, model, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('Glucose')
ax.set_xlabel('BMI')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

# With all variables

In [None]:
df = pd.read_csv('diabetes.csv')
X = df.drop('Outcome',axis=1)
y = df['Outcome']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=35,stratify=y)
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
model = SVC(C=1,kernel='linear')
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)
model.score(X_test,y_test)

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1,0.1,0.01,0.001],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
                   {'kernel':['poly'], 'C':[1, 10, 100, 1000]}]
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),tuned_parameters,refit=True,verbose=2,scoring='accuracy')
# May take awhile!
grid.fit(X_train,y_train)

In [None]:
print(grid.best_params_)

In [None]:
model = SVC(C=10,gamma=0.001,kernel='rbf')
model.fit(X_train,y_train)
predictions = model.predict(X_test)

In [None]:
model.score(X_test,y_test)

In [None]:
print(confusion_matrix(y_test,predictions))