In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer, StandardScaler

function:$b+xc^T$

\begin{align*}
\sum_{i=1}^n c_i^2 + \lambda \sum_{i=1}^M \text{max}(0,1-y^{(i)}(b+x^{(i)}c^T))
\end{align*}

In [2]:
def SVM(X, y, epochs, lr, lam):
    b = 1.
    c = np.ones(X.shape[1])
    # gradient descent
    for epoch in range(epochs): 
        hinge_loss = 1. - y * (b + np.dot(X, c))
        hinge_loss[hinge_loss <= 1e-5] = 0 # hinge loss
        dc = - lam * np.dot(y[hinge_loss != 0], X[hinge_loss !=0]) + 2.*c
        db = - lam * np.sum(y[hinge_loss != 0])
        c -= lr * dc
        b -= lr * db
    return b, c

def accuracy(ypred, yreal):
    return np.sum(ypred==yreal)/float(len(yreal))

In [3]:
Xtrain = pd.read_csv("MNIST_X_train.csv").values
ytrain = pd.read_csv("MNIST_Y_train.csv").values
Xtest = pd.read_csv("MNIST_X_test.csv").values
ytest = pd.read_csv("MNIST_Y_test.csv").values

print("The shape of Xtrain is {}".format(Xtrain.shape))
print("The shape of ytrain is {}".format(ytrain.shape))
print("The shape of Xtest is {}".format(Xtest.shape))
print("The shape of ytest is {}".format(ytest.shape))

ytrain, ytest = ytrain.flatten(), ytest.flatten()

The shape of Xtrain is (2000, 784)
The shape of ytrain is (2000, 1)
The shape of Xtest is (500, 784)
The shape of ytest is (500, 1)


In [7]:
lb = LabelBinarizer(neg_label=-1)
lb.fit(ytrain)
ytrain_ohe = lb.transform(ytrain)
ytest_ohe  = lb.transform(ytest)

In [8]:
# Feature scaling
scaler = StandardScaler().fit(Xtrain)
scaled_Xtrain = scaler.transform(Xtrain)
scaled_Xtest = scaler.transform(Xtest)

epochs = 100
lr = 0.03
lam = 1/200 # lambda
      
preds = np.zeros((Xtest.shape[0], 10))
# one vs all approach
for i in range(10):
    # Train class i vs rest
    b, c = SVM(scaled_Xtrain, ytrain_ohe[:,i], epochs, lr, lam)
    preds[:, i] = np.dot(scaled_Xtest, c)+b # labels is going to be used for prediction on test data
    
    pred_labels = np.dot(scaled_Xtrain, c)+b 
    pred_labels[pred_labels < 0.] = -1
    pred_labels[pred_labels >= 0.] = 1 # pred_labels are the labels predicted on training data
    # compute training accuracy
    score = accuracy(ytrain_ohe[:,i], pred_labels)
    print("Training class {} vs all is complete. The training accuracy is {:.2f}%".format(i, score*100))

ypred = np.argmax(preds, axis=1)

score = accuracy(ytest, ypred)
print("The accuracy of multiclass classification is {:.2f}%".format(score*100))

Training class 0 vs all is complete. The training accuracy is 98.65%
Training class 1 vs all is complete. The training accuracy is 99.10%
Training class 2 vs all is complete. The training accuracy is 98.35%
Training class 3 vs all is complete. The training accuracy is 97.25%
Training class 4 vs all is complete. The training accuracy is 98.90%
Training class 5 vs all is complete. The training accuracy is 97.30%
Training class 6 vs all is complete. The training accuracy is 98.45%
Training class 7 vs all is complete. The training accuracy is 98.25%
Training class 8 vs all is complete. The training accuracy is 96.25%
Training class 9 vs all is complete. The training accuracy is 93.85%
The accuracy of multiclass classification is 88.40%


In [3]:
a = np.array([0,1])
B = np.array([[0,1],[2,3]])
np.dot(a, B)

array([2, 3])

In [29]:
# A more concise version of one vs one classification
# Feature scaling
scaler = StandardScaler().fit(Xtrain)
scaler.fit(Xtrain)
scaled_Xtrain = scaler.transform(Xtrain)
scaled_Xtest = scaler.transform(Xtest)

epochs = 100
lr = 0.03
lam = 1/200 # lambda

labels = np.zeros((Xtest.shape[0], 10))
# one vs one approach
for i in range(9):
    for j in range(10):
        if j > i:
            data = scaled_Xtrain[(ytrain_ohe[:, i]==1)+(ytrain_ohe[:, j]==1)] # False+False=False
            target = ytrain_ohe[:,i][(ytrain_ohe[:, i]==1)+(ytrain_ohe[:, j]==1)]
            # Train class i vs class j
            b, c = SVM(data, target, epochs, lr, lam)
            
            labels_training_sets = np.dot(data, c)+b 
            labels_training_sets[labels_training_sets >=1e-5] = 1
            labels_training_sets[labels_training_sets < 1e-5] = -1 # labels predicted on training sets
            # compute training accuracy
            score = accuracy(target, labels_training_sets)
            print("Training class {} vs class {} is complete. The training accuracy is {:.2f}%".format(i,j,score*100))
            
            pred = np.dot(scaled_Xtest, c)+b
            labels[:, i][pred>=1e-5] += 1
            labels[:, j][pred<1e-5] += 1

ypred = np.argmax(labels, axis=1)

score = accuracy(ytest, ypred)
print("The accuracy of multiclass classification is {:.2f}%".format(score*100))

Training class 0 vs class 1 is complete. The training accuracy is 100.00%
Training class 0 vs class 2 is complete. The training accuracy is 99.05%
Training class 0 vs class 3 is complete. The training accuracy is 98.05%
Training class 0 vs class 4 is complete. The training accuracy is 99.75%
Training class 0 vs class 5 is complete. The training accuracy is 99.23%
Training class 0 vs class 6 is complete. The training accuracy is 100.00%
Training class 0 vs class 7 is complete. The training accuracy is 99.76%
Training class 0 vs class 8 is complete. The training accuracy is 98.68%
Training class 0 vs class 9 is complete. The training accuracy is 99.49%
Training class 1 vs class 2 is complete. The training accuracy is 98.00%
Training class 1 vs class 3 is complete. The training accuracy is 97.05%
Training class 1 vs class 4 is complete. The training accuracy is 99.54%
Training class 1 vs class 5 is complete. The training accuracy is 98.80%
Training class 1 vs class 6 is complete. The trai

In [32]:
len(ytrain_ohe[:, i]==1)

2000

In [31]:
np.ones(2)+np.zeros(2)

array([1., 1.])