In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math, time

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
from tensorflow.keras.datasets import mnist
(X_train, y_train),(X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
X_train = np.array([i.flatten() for i in X_train])
X_test = np.array([i.flatten() for i in X_test])
y_train = y_train
y_test = y_test
print('mnist dataset shape:')
print('X_train: ' + str(X_train.shape))
print('y_train: ' + str(y_train.shape))
print('X_test: ' + str(X_test.shape))
print('y_test: ' + str(y_test.shape))

mnist dataset shape:
X_train: (60000, 784)
y_train: (60000,)
X_test: (10000, 784)
y_test: (10000,)


In [None]:
df = pd.DataFrame(X_train)
df['label'] = y_train
df_train = df.groupby('label', group_keys=False).apply(lambda x: x.sample(1200))
df_train = df_train.sample(frac=1)

df = pd.DataFrame(X_test)
df['label'] = y_test
df_test = df.groupby('label', group_keys=False).apply(lambda x: x.sample(240))
df_test = df_test.sample(frac=1)
print('Sampled Dataset shape:')
print('Train: ' + str(df_train.shape))
print('Test: ' + str(df_test.shape))

Sampled Dataset shape:
Train: (12000, 785)
Test: (2400, 785)


In [None]:
C = [0.1, 1, 3]
gamma = [0.001,0.01, 0.1,1]
for c in C:
    for g in gamma:
        start_time = time.time()
        pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(-1, 1))), ('svc', SVC(kernel= 'rbf', C = c, gamma = g))])
        score = cross_val_score(pipe, df_train.drop(['label'], axis=1), df_train['label'], n_jobs=-1, cv=3)
        accuracy = score.mean()
        print("CrossVal Score for c = ",c ," and gamma = ",g," :")
        print(accuracy)
        print("--- %s seconds ---" % (time.time() - start_time))

CrossVal Score for c =  0.1  and gamma =  0.001  :
0.8911666666666666
--- 219.65801620483398 seconds ---
CrossVal Score for c =  0.1  and gamma =  0.01  :
0.9165
--- 271.4805717468262 seconds ---
CrossVal Score for c =  0.1  and gamma =  0.1  :
0.12408333333333334
--- 443.52981543540955 seconds ---
CrossVal Score for c =  0.1  and gamma =  1  :
0.45058333333333334
--- 443.32353949546814 seconds ---
CrossVal Score for c =  1  and gamma =  0.001  :
0.9320833333333334
--- 99.39767146110535 seconds ---
CrossVal Score for c =  1  and gamma =  0.01  :
0.96175
--- 198.49789762496948 seconds ---
CrossVal Score for c =  1  and gamma =  0.1  :
0.38949999999999996
--- 443.9265673160553 seconds ---
CrossVal Score for c =  1  and gamma =  1  :
0.11258333333333333
--- 442.77299332618713 seconds ---
CrossVal Score for c =  3  and gamma =  0.001  :
0.9420833333333333
--- 76.08125567436218 seconds ---
CrossVal Score for c =  3  and gamma =  0.01  :
0.9635000000000001
--- 201.41177701950073 seconds ---


In [None]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(-1, 1))), ('svc', SVC(kernel= 'rbf', C = 3, gamma = 0.01))])
pipe.fit(df_train.drop(['label'], axis=1), df_train['label'])
pred_train = pipe.predict(df_train.drop(['label'], axis=1))
pred_test = pipe.predict(df_test.drop(['label'], axis=1))
print('Training Accuracy : ')
print(accuracy_score(df_train['label'], pred_train))

print('Test Accuracy : ')
print(accuracy_score(df_test['label'], pred_test))

Training Accuracy : 
1.0
Test Accuracy : 
0.9708333333333333


I have used a subset of MNIST dataset. I have sampled 12,000 samples with 1200 samples from each label using stratified sampling. The 28x28 images are flattened into 784 1D arrays. 

Cross Validation is used for hyperparameter tuning through a grid search with different values for C and gamma.
The best Hyperparameters are chosen as c =  3  and gamma =  0.01  with a cross validation score for 3 fold CV as 0.9635.

Using these hyperparamters, SVM with 'rbf' kernel is used with the default decision function, oneVsRest(ovr).
The training and test accuracy are :

Training Accuracy : 
1.0

Test Accuracy : 
0.9708333333333333

## **One Vs All SVM**

In [None]:
labels = list(set(df_train['label']))
classifiers = []
for label in labels:
    y_hat =  np.where(df_train['label'] != label, 0, 1)
    clf = Pipeline([('scaler', MinMaxScaler(feature_range=(-1, 1))), ('svc', SVC(kernel= 'rbf', C = 3, gamma = 0.01, class_weight= 'balanced', probability=True))])
    clf.fit(df_train.drop(['label'], axis=1), y_hat)
    print("Done for label ", label)
    classifiers.append(clf)

Done for label  0
Done for label  1
Done for label  2
Done for label  3
Done for label  4
Done for label  5
Done for label  6
Done for label  7
Done for label  8
Done for label  9


In [None]:
predictions = pd.DataFrame()
for i in range(len(classifiers)):
    preds = classifiers[i].predict_proba(df_train.drop(['label'], axis=1))
    predictions[str(i)] = preds[:,1]
y_pred_train = np.array(predictions.idxmax(axis="columns")).astype(int)

predictions = pd.DataFrame()
for i in range(len(classifiers)):
    preds = classifiers[i].predict_proba(df_test.drop(['label'], axis=1))
    predictions[str(i)] = preds[:,1]
y_pred_test = np.array(predictions.idxmax(axis="columns")).astype(int)

In [None]:
print('Training Accuracy : ')
print(accuracy_score(df_train['label'], y_pred_train))

print('Test Accuracy : ')
print(accuracy_score(df_test['label'], y_pred_test))

print('Confusion Matrix for test predictions')
print(confusion_matrix(df_test['label'], y_pred_test))

Training Accuracy : 
1.0
Test Accuracy : 
0.9758333333333333
Confusion Matrix for test predictions
[[238   0   0   0   0   0   1   0   1   0]
 [  0 238   1   0   0   1   0   0   0   0]
 [  1   0 234   1   0   0   0   3   1   0]
 [  0   0   2 231   0   0   0   3   3   1]
 [  0   0   1   0 235   0   0   0   0   4]
 [  0   0   1   6   0 230   1   0   1   1]
 [  1   0   0   0   0   2 236   0   1   0]
 [  0   0   4   0   1   0   0 235   0   0]
 [  0   0   1   0   1   1   0   2 234   1]
 [  0   1   1   2   4   0   0   1   0 231]]


In [None]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(-1, 1))), ('svc', SVC(kernel= 'rbf', C = 3, gamma = 0.01, decision_function_shape='ovo'))])
pipe.fit(df_train.drop(['label'], axis=1), df_train['label'])
pred_train = pipe.predict(df_train.drop(['label'], axis=1))
pred_test = pipe.predict(df_test.drop(['label'], axis=1))
print('Training Accuracy : ')
print(accuracy_score(df_train['label'], pred_train))

print('Test Accuracy : ')
print(accuracy_score(df_test['label'], pred_test))

print('Confusion Matrix for test predictions')
print(confusion_matrix(df_test['label'], pred_test))

Training Accuracy : 
1.0
Test Accuracy : 
0.9708333333333333
Confusion Matrix for test predictions
[[239   0   0   0   0   1   0   0   0   0]
 [  0 238   1   0   0   1   0   0   0   0]
 [  1   0 233   1   0   0   1   2   2   0]
 [  0   0   3 230   0   0   0   3   4   0]
 [  0   0   1   0 235   0   0   0   0   4]
 [  0   0   2   5   0 230   1   0   2   0]
 [  1   0   0   0   0   3 235   0   1   0]
 [  0   0   4   0   3   0   0 233   0   0]
 [  1   0   1   1   1   2   0   2 231   1]
 [  0   4   1   2   5   0   0   1   1 226]]


In this part of the exercise, one-vs-one inbuilt function is compared with the one-vs-all coded version of SVM. 


---


For the built code, 10 classifiers are trained with the binary svm function with same best hyperparamters. Then training and test predictions were made by selecting the highest probable class label. Later, accuracies were calculated and reported as below:

Training Accuracy : 
1.0

Test Accuracy : 
0.9758333333333333

This was same as the accuracies reported in exercise one.


---


Later, the inbuilt 'ovo' version is used to fit and predict with the same chosen hyperparameters. The accuracies are as listed below:

Training Accuracy : 
1.0

Test Accuracy : 
0.9708333333333333


---

To some extent, both classifiers performed nearly equally good with aroung 97% accuracy. There was only 0.5% difference and 'ovr' classifier performed better than 'ovo' with that 0.5% difference.

Observing the confusion matrices, the misclassifications are similar but just that there are slighly more (2 or 3) in 'ovo'. 
The major classes that were misclassified are (1 and 9, 3 and 5, 4 and 9, 7 and 2, 3 and 8)

