In [1]:
import urllib.request
import gzip
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsOneClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [2]:
# downloading the dataset from http://yann.lecun.com/exdb/mnist/

url = 'http://yann.lecun.com/exdb/mnist/'
files = ['train-images-idx3-ubyte.gz', 
         'train-labels-idx1-ubyte.gz', 
         't10k-images-idx3-ubyte.gz', 
         't10k-labels-idx1-ubyte.gz']

with gzip.open(urllib.request.urlopen(url+files[0]), 'rb') as f:
    X_train = np.frombuffer(f.read(), dtype=np.uint8, offset=16).reshape(-1, 784)

with gzip.open(urllib.request.urlopen(url+files[1]), 'rb') as f:
    y_train = np.frombuffer(f.read(), dtype=np.uint8, offset=8)

with gzip.open(urllib.request.urlopen(url+files[2]), 'rb') as f:
    X_test = np.frombuffer(f.read(), dtype=np.uint8, offset=16).reshape(-1, 784)

with gzip.open(urllib.request.urlopen(url+files[3]), 'rb') as f:
    y_test = np.frombuffer(f.read(), dtype=np.uint8, offset=8)

In [3]:
X_train = X_train.reshape(X_train.shape[0], 784) / 255.0
X_test = X_test.reshape(X_test.shape[0], 784) / 255.0

In [4]:
indice_train = np.random.choice(X_train.shape[0], 5000, replace = False)
X_train = X_train[indice_train]
y_train = y_train[indice_train]

indice_test = np.random.choice(X_test.shape[0], 10000, replace = False)
X_test = X_test[indice_test]
y_test = y_test[indice_test]

In [5]:
# Define the grid of hyperparameters to search over
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 0.5, 1]
}

In [6]:
svc_rbf = SVC(kernel='rbf')

# performing the grid search using cross-validation
rbf_grid = GridSearchCV(svc_rbf, param_grid, cv=3)
rbf_grid.fit(X_train, y_train) # using an smaller dataset

In [7]:
print('Best RBF SVC hyperparameters: {}'.format(rbf_grid.best_params_))

Best RBF hyperparameters: {'C': 100, 'gamma': 0.01}


In [8]:
best_svc = SVC(kernel='rbf', C=rbf_grid.best_params_['C'], gamma=rbf_grid.best_params_['gamma'])
best_svc.fit(X_train, y_train)
y_pred = best_svc.predict(X_test)

test_score = sum(y_pred == y_test) / len(y_test) * 100

print('Test accuracy: {:.2f}%'.format(test_score))

Test accuracy: 96.19%


In [9]:
# part 2 creating my own one-vs-all and comparing with the built-in one-vs-one
class OneVsAllSVM:
    def __init__(self, C, gamma):
        self.C = C
        self.gamma = gamma
        self.svms = []

    def fit(self, X, y):
        for i in range(10):
            # create a binary classifier for digit i
            y_binary = (y_train == i).astype(int)
            
            # train an SVM model with RBF kernel on the binary labels
            svm = SVC(kernel='rbf', C=self.C, gamma=self.gamma)
            svm.fit(X, y_binary)
            self.svms.append(svm)


    def predict(self, X):
        scores = np.zeros((X.shape[0], 10))
        for i, svm in enumerate(self.svms):
            # Compute the confidence score for digit i
            scores[:, i] = svm.decision_function(X)
        return np.argmax(scores, axis=1)

In [12]:
ova = OneVsAllSVM(C=rbf_grid.best_params_['C'], gamma=rbf_grid.best_params_['gamma'])
ova.fit(X_train, y_train)
y_pred_ova = ova.predict(X_test)

test_score_ova = sum(y_pred_ova == y_test) / len(y_test) * 100
print('Test accuracy with one-vs-all: {:.2f}%'.format(test_score_ova))

print(confusion_matrix(y_test, y_pred_ova))

Test accuracy with one-vs-all: 96.34%
[[ 970    0    0    0    1    2    3    2    2    0]
 [   0 1123    4    1    1    1    3    0    2    0]
 [   7    1  985    9    6    0    4   11    8    1]
 [   1    0    5  973    0    9    0    9    7    6]
 [   2    1    2    0  944    0    7    3    2   21]
 [   8    0    1   12    4  848   10    2    5    2]
 [   5    4    2    0    5    6  934    1    1    0]
 [   1   10   12    3    0    0    0  983    1   18]
 [   6    2    5    9    5   11    3    5  926    2]
 [   3    7    1   11   18    6    2   10    3  948]]


In [11]:
ovo = SVC(kernel='rbf', C=rbf_grid.best_params_['C'], gamma=rbf_grid.best_params_['gamma'])

ovo.fit(X_train, y_train)
y_pred_ovo = ovo.predict(X_test)

test_score_ovo = sum(y_pred_ovo == y_test) / len(y_test) * 100
print('Test accuracy with built-in one-vs-one: {:.2f}%'.format(test_score_ovo))

print(confusion_matrix(y_test, y_pred_ova))

Test accuracy with built-in one-vs-one: 96.19%
[[ 970    0    0    0    1    2    3    2    2    0]
 [   0 1123    4    1    1    1    3    0    2    0]
 [   7    1  985    9    6    0    4   11    8    1]
 [   1    0    5  973    0    9    0    9    7    6]
 [   2    1    2    0  944    0    7    3    2   21]
 [   8    0    1   12    4  848   10    2    5    2]
 [   5    4    2    0    5    6  934    1    1    0]
 [   1   10   12    3    0    0    0  983    1   18]
 [   6    2    5    9    5   11    3    5  926    2]
 [   3    7    1   11   18    6    2   10    3  948]]


The one-vs-all approach achieves a higher accuracy compared to the one-vs-one approach. However, this difference in accuracy is quite small and may not be significant.<br>
Comparing the confusion matrices, we see that the one-vs-all classifier has slightly fewer misclassifications that the one-vs-one. However, the difference in misclassifications is also small and may not be significant.<br>
Therefore, we can conclude the both one-vs-one and one-vs-all classifiers perform well on the MNIST dataset with SVM and RBF kernel, with the one-vs-all being slightly better.