In [126]:
#Importing all the necessary libraries
import numpy as np
import pandas as pd
import time
from cvxopt import solvers
from cvxopt import matrix
from scipy.spatial.distance import pdist, cdist, squareform
from sklearn.svm import SVC
from multiprocessing import Process

In [27]:
#Importing data
s = time.time()
data = pd.read_csv('../fashion_mnist/train.csv', header=None)
data = data.to_numpy()
print("Data imported in: ", time.time()-s, "s")
C = 1
gamma = 0.05

Data imported in:  3.7777106761932373 s


In [28]:
#Function to extract subset of data, for two desired classes
def extract_data(d, d_prime):
    
    index_d = np.argwhere(data[:,-1]==d)[:,0]
    index_dp = np.argwhere(data[:,-1]==d_prime)[:,0]
    
    count1 = index_d.shape[0]
    count2 = index_dp.shape[0]
    
    return count1, count2, np.concatenate( (data[index_d, :-1], data[index_dp, :-1]), axis = 0)

In [55]:
#Function to train a binary classifier for two desired classes
#Returns the learnt value of alphas and intercept b
def binary_classifier(d, dp):
    
    count_d, count_dp, data_d_dp = extract_data(d, dp)
    data_d_dp /= 255

    y_d = np.full(count_d, -1).reshape(-1,1)
    y_dp = np.full(count_dp, 1).reshape(-1,1)

    y = np.concatenate((y_d, y_dp), axis = 0)
    y_d = []
    y_dp = []
    
    s = time.time()
    P_g = squareform(pdist(data_d_dp))
    P_g = np.exp(-gamma*np.square(P_g))
    P_gaussian = matrix(np.multiply(np.matmul(y, y.T), P_g))
    print("Kernel Computed for ", d," & ", dp, " in ", time.time()-s,"s")
    
    q = matrix(-np.ones((count_d+count_dp, 1)))
    G = matrix(np.concatenate((np.eye(count_d+count_dp)*-1,np.eye(count_d+count_dp)), axis = 0))
    h = matrix(np.concatenate((np.zeros(count_d+count_dp), np.ones(count_d+count_dp) * C), axis = None))
    b_solver = matrix(np.zeros(1))
    A = matrix(y.reshape(1, -1), tc='d')
    
    s = time.time()
    solution_gaussian = solvers.qp(P_gaussian, q, G, h, A, b_solver)
    print("Gaussian Solved for ", d," & ", dp, " in ", time.time()-s,"s")
    alphas_gaussian = np.array(solution_gaussian['x'])
    
    alpha_y = (y * alphas_gaussian)
    b_0 = np.max(np.matmul(P_g[:count_d,:], alpha_y))
    b_1 = np.min(np.matmul(P_g[count_d:,:], alpha_y))
    P_g = []
    b = (-b_0-b_1)/2
    
    return (alphas_gaussian, b)

In [66]:
#Enumerating the desired models of binary classifiers
n=10
ids = []
for i in range(n):
    for j in range(i+1, n):
        ids.append((i,j))

alphas = []
b = np.zeros((n,n))

In [68]:
batchsize = 1
#Training 45 different models of binary classifiers
#Tried doing it with help of multiprocessing but was of no use, since the CVXOPT solver itself uses 4 threads.
s1 = time.time()
for i in range(int(45/batchsize)):
    
    print("Going for batch ", i)
    
    s = time.time()
    for j in range(batchsize):
        
        d, dp = ids[i*batchsize + j]
        
        alphas_g, b_g = binary_classifier(d,dp)
        
        alphas.append((alphas_g, d, dp))
        b[d, dp] = b_g
#         p = Process(target=binary_classifier, args=(n, m))
#         p.start()
#         processes.append(p)
#     for p in processes:
#         p.join()
    print("Done for batch ", i, " in ", time.time()-s)
print("One vs One Done in ", time.time()-s1, "s")

Going for batch  0
Kernel Computed for  0  &  1  in  6.877500772476196 s
     pcost       dcost       gap    pres   dres
 0: -1.6492e+02 -6.6070e+03  3e+04  2e+00  5e-15
 1: -1.1407e+02 -3.0686e+03  5e+03  2e-01  4e-15
 2: -1.0459e+02 -7.6952e+02  9e+02  3e-02  5e-15
 3: -1.2684e+02 -3.0203e+02  2e+02  6e-03  4e-15
 4: -1.4133e+02 -2.0101e+02  6e+01  1e-03  3e-15
 5: -1.4945e+02 -1.6809e+02  2e+01  2e-04  3e-15
 6: -1.5144e+02 -1.6280e+02  1e+01  3e-05  3e-15
 7: -1.5364e+02 -1.5728e+02  4e+00  7e-06  3e-15
 8: -1.5443e+02 -1.5550e+02  1e+00  2e-13  3e-15
 9: -1.5478e+02 -1.5492e+02  1e-01  2e-13  3e-15
10: -1.5483e+02 -1.5484e+02  4e-03  1e-13  3e-15
11: -1.5483e+02 -1.5483e+02  6e-05  2e-13  3e-15
Optimal solution found.
Gaussian Solved for  0  &  1  in  25.952956438064575 s
Done for batch  0  in  33.34030222892761
Going for batch  1
Kernel Computed for  0  &  2  in  7.200037002563477 s
     pcost       dcost       gap    pres   dres
 0: -3.7366e+02 -8.4330e+03  4e+04  2e+00  6e-15
 

 3: -1.7295e+02 -4.3323e+02  3e+02  7e-03  5e-15
 4: -1.9647e+02 -2.7544e+02  8e+01  1e-03  5e-15
 5: -2.0746e+02 -2.3515e+02  3e+01  1e-04  5e-15
 6: -2.1226e+02 -2.2191e+02  1e+01  2e-05  5e-15
 7: -2.1376e+02 -2.1815e+02  4e+00  2e-13  6e-15
 8: -2.1495e+02 -2.1581e+02  9e-01  7e-13  5e-15
 9: -2.1517e+02 -2.1539e+02  2e-01  7e-13  6e-15
10: -2.1525e+02 -2.1526e+02  7e-03  9e-14  5e-15
11: -2.1526e+02 -2.1526e+02  1e-04  2e-13  5e-15
Optimal solution found.
Gaussian Solved for  1  &  3  in  27.41170310974121 s
Done for batch  10  in  34.97573447227478
Going for batch  11
Kernel Computed for  1  &  4  in  6.967263221740723 s
     pcost       dcost       gap    pres   dres
 0: -1.2784e+02 -6.4239e+03  3e+04  2e+00  5e-15
 1: -8.0058e+01 -2.9152e+03  4e+03  2e-01  3e-15
 2: -6.9558e+01 -6.4681e+02  7e+02  3e-02  3e-15
 3: -9.4376e+01 -2.5073e+02  2e+02  5e-03  3e-15
 4: -1.0877e+02 -1.5571e+02  5e+01  7e-04  2e-15
 5: -1.1554e+02 -1.3286e+02  2e+01  1e-04  2e-15
 6: -1.1887e+02 -1.2384

 1: -7.7941e+02 -6.5268e+03  8e+03  2e-01  2e-14
 2: -8.3083e+02 -1.8673e+03  1e+03  2e-02  2e-14
 3: -9.4685e+02 -1.2637e+03  3e+02  5e-03  2e-14
 4: -9.9989e+02 -1.0807e+03  8e+01  7e-04  2e-14
 5: -1.0159e+03 -1.0374e+03  2e+01  1e-04  2e-14
 6: -1.0214e+03 -1.0238e+03  2e+00  8e-06  2e-14
 7: -1.0221e+03 -1.0223e+03  2e-01  6e-07  2e-14
 8: -1.0222e+03 -1.0222e+03  7e-03  1e-08  2e-14
 9: -1.0222e+03 -1.0222e+03  2e-04  2e-10  2e-14
Optimal solution found.
Gaussian Solved for  2  &  6  in  21.66515326499939 s
Done for batch  20  in  28.550618410110474
Going for batch  21
Kernel Computed for  2  &  7  in  6.303195953369141 s
     pcost       dcost       gap    pres   dres
 0: -1.0352e+02 -7.4392e+03  4e+04  2e+00  5e-15
 1: -5.2259e+01 -3.7516e+03  6e+03  3e-01  3e-15
 2: -1.0077e+01 -8.8825e+02  1e+03  4e-02  8e-15
 3: -3.5635e+01 -1.9440e+02  2e+02  5e-03  4e-15
 4: -5.4188e+01 -1.0235e+02  5e+01  9e-04  3e-15
 5: -6.2019e+01 -7.9857e+01  2e+01  1e-04  2e-15
 6: -6.5729e+01 -7.112

 4: -1.2171e+02 -1.8069e+02  6e+01  1e-03  2e-15
 5: -1.2850e+02 -1.5673e+02  3e+01  7e-14  2e-15
 6: -1.3314e+02 -1.4204e+02  9e+00  2e-13  2e-15
 7: -1.3446e+02 -1.3849e+02  4e+00  7e-14  2e-15
 8: -1.3538e+02 -1.3651e+02  1e+00  2e-13  2e-15
 9: -1.3573e+02 -1.3581e+02  8e-02  9e-14  2e-15
10: -1.3576e+02 -1.3576e+02  2e-03  2e-13  2e-15
11: -1.3576e+02 -1.3576e+02  6e-05  2e-13  2e-15
Optimal solution found.
Gaussian Solved for  4  &  5  in  26.078190803527832 s
Done for batch  30  in  32.89657497406006
Going for batch  31
Kernel Computed for  4  &  6  in  6.313436508178711 s
     pcost       dcost       gap    pres   dres
 0: -8.9772e+02 -9.6565e+03  4e+04  2e+00  1e-14
 1: -7.1427e+02 -5.9961e+03  8e+03  2e-01  2e-14
 2: -7.5707e+02 -1.7361e+03  1e+03  3e-02  2e-14
 3: -8.6971e+02 -1.1390e+03  3e+02  5e-03  2e-14
 4: -9.1628e+02 -1.0034e+03  9e+01  7e-04  2e-14
 5: -9.3428e+02 -9.5281e+02  2e+01  8e-05  2e-14
 6: -9.3898e+02 -9.4171e+02  3e+00  8e-06  2e-14
 7: -9.3981e+02 -9.399

 8: -3.2708e+02 -3.2710e+02  2e-02  1e-08  3e-15
 9: -3.2708e+02 -3.2709e+02  5e-04  2e-10  3e-15
10: -3.2708e+02 -3.2708e+02  1e-05  3e-12  3e-15
Optimal solution found.
Gaussian Solved for  6  &  8  in  23.83267307281494 s
Done for batch  40  in  30.645123720169067
Going for batch  41
Kernel Computed for  6  &  9  in  6.320991277694702 s
     pcost       dcost       gap    pres   dres
 0: -1.3972e+02 -6.9017e+03  3e+04  2e+00  5e-15
 1: -7.4093e+01 -3.3717e+03  5e+03  2e-01  2e-15
 2: -4.9322e+01 -5.8506e+02  6e+02  2e-02  5e-15
 3: -9.5462e+01 -2.0704e+02  1e+02  3e-03  3e-15
 4: -1.1120e+02 -1.4547e+02  4e+01  6e-04  2e-15
 5: -1.1785e+02 -1.2868e+02  1e+01  6e-05  2e-15
 6: -1.2042e+02 -1.2319e+02  3e+00  8e-06  2e-15
 7: -1.2125e+02 -1.2164e+02  4e-01  4e-07  2e-15
 8: -1.2138e+02 -1.2141e+02  2e-02  7e-09  2e-15
 9: -1.2139e+02 -1.2139e+02  7e-04  2e-10  2e-15
10: -1.2139e+02 -1.2139e+02  1e-05  3e-12  2e-15
Optimal solution found.
Gaussian Solved for  6  &  9  in  23.8401901721

In [109]:
#Saving the learnt values of alphas for all the 45 classifiers for future use
f = "alpha_b_ovo.csv"
f = open(f,"w")
count = 0
for i in range(len(alphas)):
    alpha, d, dp = alphas[i]
    alpha = ",".join(alpha.flatten().astype("str"))
    alpha = str(d),str(dp),str(b[d][dp]),alpha
    alpha = ",".join(alpha)
    f.write(alpha+"\n")

In [111]:
#Importing Validation and testing data
s = time.time()

data_val = pd.read_csv('../fashion_mnist/val.csv', header=None)
data_val = data_val.to_numpy()

data_test = pd.read_csv('../fashion_mnist/test.csv', header=None)
data_test = data_test.to_numpy()

y_val = data_val[:,-1]
y_test = data_test[:,-1]

data_val = data_val[:,:-1]
data_test = data_test[:,:-1]

data_val /= 255
data_test /= 255
print("Validation and Test Data imported in: ", time.time()-s, "s")

Validation and Test Data imported in:  3.3152167797088623 s


In [183]:
#Function to predict classes of test set, using One vs One Classifier technique
def prediction(alphas, b, test):
    n_test = test.shape[0]
    scores = np.zeros((n_test, 10))
    votes = np.zeros((n_test, 10))
    
    for i in alphas:
        s = time.time()
        alpha, d, dp = i
        
        count_d, count_dp, data_d_dp = extract_data(d, dp)
        data_d_dp /= 255

        y_d = np.full(count_d, -1).reshape(-1,1)
        y_dp = np.full(count_dp, 1).reshape(-1,1)

        y = np.concatenate((y_d, y_dp), axis = 0)
        y_d = []
        y_dp = []
        alpha_y = (y * alpha)
        
        pred = cdist(data_d_dp, test)
        pred = np.exp(-gamma*np.square(pred))
        pred = np.sum(np.multiply(pred, alpha_y), axis = 0)  + b[d][dp]
        
        index_d = np.argwhere(pred<0).flatten()
        index_dp = np.argwhere(pred>=0).flatten()
        
        votes[index_d,d] += 1
        votes[index_dp,dp] += 1
        
        scores[index_d,d] += abs(pred[index_d])
        scores[index_dp,dp] += abs(pred[index_dp])
        print("Done for ",d,"&",dp,"in :", time.time()-s)
        
        
        
        
    class_pred = []
    for i in range(n_test):
        vote = votes[i]
        winner = np.argwhere(vote == np.max(vote))
        if(winner.shape[0]>1):
            winner = winner.flatten()
            score = scores[i]
            won = np.argwhere(score == np.max(score[winner]))
            class_pred.append(won[0][0])
        else:
            class_pred.append(winner[0][0])
            
    return class_pred

In [178]:
#Accuracy over a given preediction set and set of actual labels
def accuracy(pred, y):
    confusion = np.zeros((10, 10))
    count = 0
    for i in range(len(y)):
        confusion[int(pred[i]), int(y[i])] += 1
        if(pred[i]==y[i]):
            count += 1
    return count*100/(len(y)), confusion 

In [180]:
#Accuracy over validation set in our implementation
s = time.time()
pred_val = prediction(alphas, b, data_val)
print("Time taken to predict Validation set is: ", time.time()-s)
acc_val, confusion_val = accuracy(pred_val, y_val)
print("Accuarcy over validation set in multi class classification is: ", acc_val, "%")

In [185]:
#Accuracy over test set in our implementation
s = time.time()
pred_test = prediction(alphas, b, data_test)
print("Time taken to predict test set is: ", time.time()-s)
acc_test, confusion_test = accuracy(pred_test, y_test)
print("Accuarcy over test set in multi class classification is: ", acc_test, "%")

Done for  0 & 1 in : 15.68754768371582
Done for  0 & 2 in : 15.397441148757935
Done for  0 & 3 in : 15.536777973175049
Done for  0 & 4 in : 15.56071400642395
Done for  0 & 5 in : 15.597939014434814
Done for  0 & 6 in : 15.628666162490845
Done for  0 & 7 in : 15.50343918800354
Done for  0 & 8 in : 15.507837533950806
Done for  0 & 9 in : 15.481278896331787
Done for  1 & 2 in : 15.854492425918579
Done for  1 & 3 in : 15.53148078918457
Done for  1 & 4 in : 15.532623052597046
Done for  1 & 5 in : 15.86173677444458
Done for  1 & 6 in : 15.804723262786865
Done for  1 & 7 in : 15.762629508972168
Done for  1 & 8 in : 15.569353580474854
Done for  1 & 9 in : 15.818818092346191
Done for  2 & 3 in : 15.585557222366333
Done for  2 & 4 in : 15.48154616355896
Done for  2 & 5 in : 15.728162288665771
Done for  2 & 6 in : 15.538205862045288
Done for  2 & 7 in : 15.805870532989502
Done for  2 & 8 in : 15.765530347824097
Done for  2 & 9 in : 15.246947050094604
Done for  3 & 4 in : 15.53453278541565
Done fo

In [190]:
#Confusion matrix for test and validation set in our implementation
print("The confusion matrix for validation set is:\n", confusion_val,"\n")
print("The confusion matrix for test set is:\n", confusion_test)

The confusion matrix for validation set is:
 [[201.   0.   2.  11.   0.   0.  19.   0.   0.   0.]
 [  2. 240.   0.   7.   2.   0.   0.   0.   0.   0.]
 [  1.   2. 208.   0.  30.   0.  27.   0.   1.   0.]
 [  4.   2.   1. 197.   5.   1.   1.   0.   0.   0.]
 [  0.   0.  13.   6. 185.   0.  11.   0.   0.   0.]
 [  0.   0.   0.   0.   0. 226.   0.  24.   0.   2.]
 [ 38.   3.  14.  20.  19.   0. 184.   0.   2.   0.]
 [  0.   0.   0.   0.   0.   1.   0. 199.   2.   4.]
 [  4.   3.  12.   9.   9.  16.   8.   6. 245.   5.]
 [  0.   0.   0.   0.   0.   6.   0.  21.   0. 239.]] 

The confusion matrix for test set is:
 [[404.   0.   0.  17.   0.   1.  52.   0.   1.   0.]
 [  0. 484.   0.  10.   1.   0.   1.   0.   0.   0.]
 [  7.   6. 414.   2.  55.   0.  52.   0.   1.   0.]
 [  7.   2.   3. 410.  12.   0.   5.   0.   0.   0.]
 [  0.   0.  26.   6. 366.   0.  20.   0.   0.   0.]
 [  0.   0.   0.   0.   0. 432.   0.  48.   0.   5.]
 [ 65.   6.  44.  39.  51.   0. 351.   0.   3.   0.]
 [  0.   0. 

In [198]:
#Enumerating the desired models of binary classifiers
n=10
ids = []
for i in range(n):
    for j in range(i+1, n):
        ids.append((i,j))
        
#One vs One SVM Sklearn
#Training 45 different models of binary classifiers using Sklearn library
batchsize = 1
clfs = []
s1 = time.time()
for i in range(int(45/batchsize)):
    
    print("Going for batch ", i)
    
    s = time.time()
    for j in range(batchsize):
        
        d, dp = ids[i*batchsize + j]
        count_d, count_dp, data_d_dp = extract_data(d, dp)
        data_d_dp /= 255

        y_d = np.full(count_d, -1).reshape(-1,1)
        y_dp = np.full(count_dp, 1).reshape(-1,1)

        y = np.concatenate((y_d, y_dp), axis = 0)
        y_d = []
        y_dp = []
        
        clf = SVC(kernel='rbf',gamma=0.05)
        clf.fit(data_d_dp, y.flatten())
        
        clfs.append((clf,d,dp))
    print("Done for batch ", i, " in ", time.time()-s)
print("One vs One SKlearn Done in ", time.time()-s1, "s")


Going for batch  0
Done for batch  0  in  4.757843494415283
Going for batch  1
Done for batch  1  in  7.285531520843506
Going for batch  2
Done for batch  2  in  8.868611097335815
Going for batch  3
Done for batch  3  in  6.176299333572388
Going for batch  4
Done for batch  4  in  5.466998338699341
Going for batch  5
Done for batch  5  in  12.802596092224121
Going for batch  6
Done for batch  6  in  3.223029851913452
Going for batch  7
Done for batch  7  in  7.0697338581085205
Going for batch  8
Done for batch  8  in  4.445237159729004
Going for batch  9
Done for batch  9  in  4.551787614822388
Going for batch  10
Done for batch  10  in  5.24731183052063
Going for batch  11
Done for batch  11  in  4.1707375049591064
Going for batch  12
Done for batch  12  in  4.582836627960205
Going for batch  13
Done for batch  13  in  5.614081859588623
Going for batch  14
Done for batch  14  in  2.219125270843506
Going for batch  15
Done for batch  15  in  5.326971769332886
Going for batch  16
Done f

In [203]:
#Function to predict classes of test set, using One vs One Classifier technique in all the 45 classifiers
def prediction_sklearn(clfs, test):
    n_test = test.shape[0]
    scores = np.zeros((n_test, 10))
    votes = np.zeros((n_test, 10))
    
    for i in clfs:
        s = time.time()
        clf, d, dp = i
        
        pred = clf.decision_function(test)
        index_d = np.argwhere(pred<0).flatten()
        index_dp = np.argwhere(pred>=0).flatten()
        
        votes[index_d,d] += 1
        votes[index_dp,dp] += 1
        
        scores[index_d,d] += abs(pred[index_d])
        scores[index_dp,dp] += abs(pred[index_dp])
        print("Done for ",d,"&",dp,"in :", time.time()-s)
        
        
        
        
    class_pred = []
    for i in range(n_test):
        vote = votes[i]
        winner = np.argwhere(vote == np.max(vote))
        if(winner.shape[0]>1):
            winner = winner.flatten()
            score = scores[i]
            won = np.argwhere(score == np.max(score[winner]))
            class_pred.append(won[0][0])
        else:
            class_pred.append(winner[0][0])
            
    return class_pred

In [204]:
#Accuracy over validation set in Sklearn implementation
s = time.time()
pred_val_sklearn = prediction_sklearn(clfs, data_val)
print("Time taken to predict Validation set is: ", time.time()-s)
acc_val_sklearn, confusion_val_sklearn = accuracy(pred_val_sklearn, y_val)
print("Accuarcy over validation set in multi class classification is: ", acc_val_sklearn, "%")

Done for  0 & 1 in : 2.050034284591675
Done for  0 & 2 in : 3.76025390625
Done for  0 & 3 in : 4.47006893157959
Done for  0 & 4 in : 3.561756134033203
Done for  0 & 5 in : 3.1371910572052
Done for  0 & 6 in : 7.312145948410034
Done for  0 & 7 in : 1.5327484607696533
Done for  0 & 8 in : 3.8217577934265137
Done for  0 & 9 in : 2.220465898513794
Done for  1 & 2 in : 2.0789496898651123
Done for  1 & 3 in : 2.452998161315918
Done for  1 & 4 in : 1.8658175468444824
Done for  1 & 5 in : 1.9528255462646484
Done for  1 & 6 in : 2.4247355461120605
Done for  1 & 7 in : 0.9813377857208252
Done for  1 & 8 in : 2.6858839988708496
Done for  1 & 9 in : 1.6202025413513184
Done for  2 & 3 in : 3.161393880844116
Done for  2 & 4 in : 6.843564987182617
Done for  2 & 5 in : 2.85433030128479
Done for  2 & 6 in : 7.379079818725586
Done for  2 & 7 in : 1.548896074295044
Done for  2 & 8 in : 4.248725175857544
Done for  2 & 9 in : 2.5789008140563965
Done for  3 & 4 in : 4.2903454303741455
Done for  3 & 5 in : 2

In [205]:
#Accuracy over test set in Sklearn implementation
s = time.time()
pred_test_sklearn = prediction_sklearn(clfs, data_test)
print("Time taken to predict test set is: ", time.time()-s)
acc_test_sklearn, confusion_test_sklearn = accuracy(pred_test_sklearn, y_test)
print("Accuarcy over validation set in multi class classification is: ", acc_test_sklearn, "%")

Done for  0 & 1 in : 4.20386266708374
Done for  0 & 2 in : 7.626614809036255
Done for  0 & 3 in : 8.920718908309937
Done for  0 & 4 in : 5.93738579750061
Done for  0 & 5 in : 5.492050886154175
Done for  0 & 6 in : 13.585835695266724
Done for  0 & 7 in : 2.956488609313965
Done for  0 & 8 in : 7.559519052505493
Done for  0 & 9 in : 4.095263242721558
Done for  1 & 2 in : 4.0502769947052
Done for  1 & 3 in : 5.003556966781616
Done for  1 & 4 in : 3.7408220767974854
Done for  1 & 5 in : 3.896029472351074
Done for  1 & 6 in : 4.800410747528076
Done for  1 & 7 in : 1.9849026203155518
Done for  1 & 8 in : 4.938264846801758
Done for  1 & 9 in : 3.0346572399139404
Done for  2 & 3 in : 6.11798357963562
Done for  2 & 4 in : 13.485262870788574
Done for  2 & 5 in : 5.732669115066528
Done for  2 & 6 in : 14.648459911346436
Done for  2 & 7 in : 3.0574026107788086
Done for  2 & 8 in : 8.11376667022705
Done for  2 & 9 in : 4.340226650238037
Done for  3 & 4 in : 7.398317575454712
Done for  3 & 5 in : 4.8

In [206]:
#Confusion matrix for test and validation set in Sklearn's implementation
print("The confusion matrix for test set in sklearn is:\n", confusion_val_sklearn,"\n")
print("The confusion matrix for test set in sklearn is:\n", confusion_test_sklearn)

The confusion matrix for validation set in sklearn is:
 [[212.   0.   5.   6.   1.   0.  33.   0.   0.   0.]
 [  0. 237.   0.   0.   1.   0.   0.   0.   0.   0.]
 [  1.   3. 205.   0.  24.   0.  28.   0.   1.   0.]
 [  8.   7.   3. 228.   8.   1.   4.   0.   1.   0.]
 [  0.   0.  19.   6. 200.   0.  19.   0.   1.   0.]
 [  0.   0.   0.   0.   0. 241.   0.   8.   0.   5.]
 [ 26.   2.  13.   9.  15.   0. 165.   0.   1.   0.]
 [  0.   0.   0.   0.   0.   2.   0. 230.   2.   8.]
 [  3.   1.   5.   1.   1.   1.   1.   1. 244.   2.]
 [  0.   0.   0.   0.   0.   5.   0.  11.   0. 235.]] 

The confusion matrix for test set in sklearn is:
 [[432.   1.   5.  12.   2.   0.  80.   0.   1.   0.]
 [  0. 482.   0.   0.   1.   0.   0.   0.   0.   0.]
 [  5.   4. 410.   3.  39.   0.  53.   0.   1.   0.]
 [ 12.   9.   7. 457.  14.   0.   9.   0.   1.   0.]
 [  3.   0.  37.   9. 399.   0.  34.   0.   2.   0.]
 [  0.   0.   0.   0.   0. 473.   0.  14.   2.  11.]
 [ 38.   4.  33.  14.  39.   0. 317.   0.  