Colab Link: https://colab.research.google.com/drive/1ZGpldf5pkelyL6P-d9jFFClKMTKxbGfb

In [50]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## K-Fold Cross Validation

### Data Prep

#### Read

In [85]:
data = pd.read_csv('data/ClassificationData1.csv')
data.head(2)

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0


In [86]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

### Train Test Split

In [87]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 0)

### Feature Scaling

In [88]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

### Build Model

In [89]:
from sklearn.svm import SVC
c_svc = SVC(kernel = 'linear', random_state = 0)
c_svc.fit(X_train, y_train)

### Apply K-Fold Cross Validation

In [90]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = c_svc, X = X_train, y = y_train, cv = 10)   # 10 training splits
accuracies

array([0.78125, 0.78125, 0.90625, 0.6875 , 0.875  , 0.625  , 0.78125,
       0.90625, 0.96875, 0.875  ])

In [91]:
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 81.88 %
Standard Deviation: 10.16 %


### Predict

In [92]:
y_pred_svc = c_svc.predict(sc.transform(X_test))

### Confusion Matrix

In [93]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred_svc))

[[57  1]
 [ 6 16]]


### Evaluate

In [94]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_svc)

0.9125

## Grid Search

### Data Prep

#### Read

In [95]:
data = pd.read_csv('data/ClassificationData1.csv')
data.head(2)

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0


In [96]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

### Train Test Split

In [97]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 0)

### Feature Scaling

In [98]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

### Build Model

In [99]:
from sklearn.svm import SVC
c_svc = SVC(kernel = 'linear', random_state = 0)
c_svc.fit(X_train, y_train)

### Apply Grid Search

In [100]:
from sklearn.model_selection import GridSearchCV

In [101]:
# Specify hyperparameters to test
parameters = [{'C': [0.2, 0.4, 0.6, 0.8, 1], 'kernel': ['linear']},
              {'C': [0.2, 0.4, 0.6, 0.8, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

grid_search = GridSearchCV(estimator = c_svc,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)  # all processors in the machine will be used
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 90.62 %
Best Parameters: {'C': 0.2, 'gamma': 0.9, 'kernel': 'rbf'}


### Rebuild Model with best parameters and Predict

In [105]:
c_svc_mod = SVC(C=0.2, kernel = 'rbf', gamma=0.9, random_state = 0)
c_svc_mod.fit(X_train, y_train)

In [106]:
y_pred_svc_mod = c_svc_mod.predict(sc.transform(X_test))

### Confusion Matrix

In [107]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred_svc_mod))

[[55  3]
 [ 1 21]]


### Evaluate

In [108]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_svc_mod)

0.95