# Exercise - SVM for classification of 10 classes

1. Try to split your training data (again using **train_test_split**) to obtain a validation set. Try to optimize the performance of your model on the validation data, by trying different kernels (linear, poly, and rbf), different values of C, different decision function (ovr or ovo), and perhaps even other stuff (you can find a full list of options to tune at https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html}.

**See slides for more details!**

In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import svm
import pandas as pd
import numpy as np

# Load the digits dataset
X, y = load_digits(return_X_y=True)

# We use `train_test_split` to split our data into a train and a test set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Now split the train data to also obtain validation data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

(1437, 64) (360, 64) (1437,) (360,)
(1257, 64) (540, 64) (360, 64) (1257,) (540,) (360,)


In [8]:
kernels = ['poly','rbf','linear'] # input values seperated by ",".
Cs = [0.01,10,50,100] # input values seperated by ",".
decision_functions = ['ovr','ovo'] # input values seperated by ",".

results = []

for kernel in kernels:
    for C in Cs:
        for decision_function in decision_functions:
            svm_current = svm.SVC(kernel=kernel, C=C, decision_function_shape=decision_function)
            svm_current.fit(X_train, y_train)
            y_val_hat = svm_current.predict(X_val)
            accuracy = accuracy_score(y_val, y_val_hat)

            results.append([accuracy, kernel, C, decision_function])

results = pd.DataFrame(results)
results.columns = ['Accuracy', 'Kernel', 'C', 'Decision function']
print(results)

    Accuracy  Kernel       C Decision function
0   0.907407    poly    0.01               ovr
1   0.907407    poly    0.01               ovo
2   0.988889    poly   10.00               ovr
3   0.988889    poly   10.00               ovo
4   0.988889    poly   50.00               ovr
5   0.988889    poly   50.00               ovo
6   0.988889    poly  100.00               ovr
7   0.988889    poly  100.00               ovo
8   0.092593     rbf    0.01               ovr
9   0.092593     rbf    0.01               ovo
10  0.988889     rbf   10.00               ovr
11  0.988889     rbf   10.00               ovo
12  0.988889     rbf   50.00               ovr
13  0.988889     rbf   50.00               ovo
14  0.988889     rbf  100.00               ovr
15  0.988889     rbf  100.00               ovo
16  0.979630  linear    0.01               ovr
17  0.979630  linear    0.01               ovo
18  0.979630  linear   10.00               ovr
19  0.979630  linear   10.00               ovo
20  0.979630 

In [9]:
# Extract best parameters.
results[results['Accuracy'] == results['Accuracy'].max()]

Unnamed: 0,Accuracy,Kernel,C,Decision function
2,0.988889,poly,10.0,ovr
3,0.988889,poly,10.0,ovo
4,0.988889,poly,50.0,ovr
5,0.988889,poly,50.0,ovo
6,0.988889,poly,100.0,ovr
7,0.988889,poly,100.0,ovo
10,0.988889,rbf,10.0,ovr
11,0.988889,rbf,10.0,ovo
12,0.988889,rbf,50.0,ovr
13,0.988889,rbf,50.0,ovo


In [10]:
# Initialize your final model
svm_optimized = svm.SVC(kernel='poly', degree=5, C = 1000)

# Use both training and validation data to fit it (np.concatenate "stacks" the array like rbind in R)
svm_optimized.fit(np.concatenate([X_train, X_val]), np.concatenate([y_train, y_val]))

# Predict on test data
y_val_hat_optimized = svm_optimized.predict(X_test)

# Obtain and check accuracy on test data
accuracy_optimized = accuracy_score(y_val_hat_optimized, y_test)
print(f'Optimized SVM achieved {round(accuracy_optimized * 100, 1)}% accuracy.')

Optimized SVM achieved 100.0% accuracy.
