In [1]:
#Importing all the necessary libraries
import numpy as np
import pandas as pd
import time
from cvxopt import solvers
from cvxopt import matrix
from scipy.spatial.distance import pdist, cdist, squareform
from sklearn.svm import SVC

In [2]:
#Function to extract subset of data, for two desired classes
def extract_data(data, d, d_prime):
    
    index_d = np.argwhere(data[:,-1]==d)[:,0]
    index_dp = np.argwhere(data[:,-1]==d_prime)[:,0]
    
    count1 = index_d.shape[0]
    count2 = index_dp.shape[0]
    
    return count1, count2, np.concatenate( (data[index_d, :-1], data[index_dp, :-1]), axis = 0)

In [3]:
#Importing data
s = time.time()
data = pd.read_csv('../fashion_mnist/train.csv', header=None)
data = data.to_numpy()
print("Data imported in: ", time.time()-s, "s")

count_5, count_6, data_5_6 = extract_data(data, 5, 6)
data_5_6 /= 255

y_5 = np.full(count_5, -1).reshape(-1,1)
y_6 = np.full(count_6, 1).reshape(-1,1)

y = np.concatenate((y_5, y_6), axis = 0)
y_5 = []
y_6 = []

Data imported in:  10.239705562591553 s


In [4]:
#Value of C
C = 1
#Formulating the parameters of CVXOPT Quadratic Problem solver for Linear Kernel
X = np.multiply(y, data_5_6)
P = matrix(np.matmul(X, X.T))
q = matrix(-np.ones((count_5+count_6, 1)))
G = matrix(np.concatenate((np.eye(count_5+count_6)*-1,np.eye(count_5+count_6)), axis = 0))
h = matrix(np.concatenate((np.zeros(count_5+count_6), np.ones(count_5+count_6) * C), axis = None))
b_solver = matrix(np.zeros(1))
A = matrix(y.reshape(1, -1), tc='d')

In [5]:
#Solving for alphas in Linear Kernel
s = time.time()
solution = solvers.qp(P, q, G, h, A, b_solver)
print("Solved in ", time.time()-s,"s")
alphas = np.array(solution['x'])

     pcost       dcost       gap    pres   dres
 0: -2.8076e+02 -8.3088e+03  4e+04  3e+00  2e-12
 1: -1.5849e+02 -4.4639e+03  9e+03  4e-01  1e-12
 2: -5.1865e+01 -1.2686e+03  2e+03  9e-02  8e-13
 3: -2.1440e+01 -7.0693e+02  1e+03  4e-02  4e-13
 4: -3.8988e+00 -1.9875e+02  3e+02  8e-03  2e-13
 5: -1.3115e+00 -1.0143e+02  2e+02  4e-03  8e-14
 6: -4.5724e-01 -4.1303e+01  6e+01  1e-03  5e-14
 7: -2.6273e-02 -2.0037e+01  3e+01  5e-04  3e-14
 8: -3.8091e-01 -7.1544e+00  8e+00  1e-04  3e-14
 9: -6.0363e-01 -3.4105e+00  3e+00  2e-16  3e-14
10: -9.8611e-01 -2.2986e+00  1e+00  4e-16  3e-14
11: -1.1165e+00 -2.0484e+00  9e-01  3e-15  2e-14
12: -1.2839e+00 -1.6996e+00  4e-01  1e-15  2e-14
13: -1.3432e+00 -1.5701e+00  2e-01  3e-15  3e-14
14: -1.4108e+00 -1.4693e+00  6e-02  3e-15  3e-14
15: -1.4351e+00 -1.4385e+00  3e-03  4e-16  3e-14
16: -1.4367e+00 -1.4367e+00  5e-05  3e-15  3e-14
17: -1.4367e+00 -1.4367e+00  6e-07  1e-15  3e-14
Optimal solution found.
Solved in  47.44748497009277 s


In [33]:
#Support Vectors in Linear Kernel
np.argwhere(alphas>0.0000001).shape

(86, 2)

In [26]:
#Getting value of w and b from alpha
w = (np.matmul((y * alphas).T , data_5_6)).reshape(-1,1)
b_0 = np.max(np.matmul(data_5_6[:count_5,:], w))
b_1 = np.min(np.matmul(data_5_6[count_5:,:], w))
b_intercept = (-b_0-b_1)/2

In [27]:
#Function to calculate accuracy of a given prediction
def accuracy(pred, y):
    count = 0
    for i in range(pred.shape[0]):
        if(pred[i][0]==y[i][0]):
            count += 1
    
    return count*100/(pred.shape[0])

#Function to predict classes for a given test set x in Linear Kernel, w and b are passed as parameters
def prediction(w, b, x):
    theta = np.matmul(x,w) + b
    theta[theta<0] = -1
    theta[theta>=0] = 1
    return theta

In [28]:
#Importing Validation and testing data
s = time.time()

data_val = pd.read_csv('../fashion_mnist/val.csv', header=None)
data_val = data_val.to_numpy()

data_test = pd.read_csv('../fashion_mnist/test.csv', header=None)
data_test = data_test.to_numpy()

count_val5, count_val6, data_val = extract_data(data_val, 5, 6)
count_test5, count_test6, data_test = extract_data(data_test, 5, 6)

data_val /= 255
data_test /= 255
print("Validation and Test Data imported in: ", time.time()-s, "s")

Validation and Test Data imported in:  3.483920097351074 s


In [29]:
#Accuracy of Validation set in Linear Kernel

y_val5 = np.full(count_val5, -1).reshape(-1,1)
y_val6 = np.full(count_val6, 1).reshape(-1,1)

y_val = np.concatenate((y_val5, y_val6), axis = 0)
y_val5 = []
y_val6 = []

pred_val = prediction(w, b_intercept, data_val)
print("Accuarcy over validation set is: ", accuracy(pred_val, y_val), "%")

Accuarcy over validation set is:  99.8 %


In [30]:
#Accuracy of Test set in Linear kernel

y_test5 = np.full(count_test5, -1).reshape(-1,1)
y_test6 = np.full(count_test6, 1).reshape(-1,1)

y_test = np.concatenate((y_test5, y_test6), axis = 0)

pred_test = prediction(w, b_intercept, data_test)
print("Accuarcy over test set is: ", accuracy(pred_test, y_test), "%")

Accuarcy over test set is:  99.6 %


In [49]:
#The value of W and intercept b in our implementation of Linear Kernel
w, b_intercept

(array([[ 0.00000000e+00],
        [ 1.51784338e-12],
        [ 8.14385678e-04],
        [ 4.07192889e-04],
        [ 2.75384125e-03],
        [ 7.38448807e-03],
        [ 6.31474886e-03],
        [ 1.77914858e-02],
        [ 4.82141893e-02],
        [ 2.55315885e-02],
        [ 3.82036852e-02],
        [ 1.32396403e-01],
        [ 7.42205800e-02],
        [ 1.31544154e-01],
        [ 1.54509824e-01],
        [ 1.34075845e-01],
        [ 7.69645445e-02],
        [ 3.40328231e-02],
        [ 1.21710465e-02],
        [-1.23804941e-02],
        [ 3.23233322e-02],
        [-2.55740299e-02],
        [-1.57690090e-02],
        [ 1.13719636e-02],
        [-2.79385228e-05],
        [-1.96411542e-10],
        [-1.14020363e-10],
        [-5.73762131e-12],
        [ 6.81762155e-13],
        [ 2.81781258e-12],
        [ 4.96657896e-11],
        [ 1.96995863e-10],
        [ 3.17245043e-03],
        [ 3.39435914e-02],
        [ 5.32311392e-02],
        [ 7.15217545e-02],
        [ 6.80748419e-02],
 

In [31]:
#Formulating the parameters of CVXOPT Quadratic Problem solver for Gaussian Kernel
s = time.time()
P_g = squareform(pdist(data_5_6))
P_g = np.exp(-0.05*np.square(P_g))
P_gaussian = matrix(np.multiply(np.matmul(y, y.T), P_g))
print("Kernel Computed in ", time.time()-s,"s")

Kernel Computed in  9.625320196151733 s


In [34]:
#Solving for alphas in Gaussian Kernel
s = time.time()
solution_gaussian = solvers.qp(P_gaussian, q, G, h, A, b_solver)
print("Gaussian Solved in ", time.time()-s,"s")
alphas_gaussian = np.array(solution_gaussian['x'])

     pcost       dcost       gap    pres   dres
 0: -2.0533e+02 -7.5506e+03  3e+04  2e+00  5e-15
 1: -1.2813e+02 -3.9857e+03  6e+03  2e-01  3e-15
 2: -9.5987e+01 -7.8644e+02  8e+02  2e-02  6e-15
 3: -1.4760e+02 -3.3431e+02  2e+02  5e-03  3e-15
 4: -1.6699e+02 -2.3327e+02  7e+01  1e-03  2e-15
 5: -1.7565e+02 -2.0037e+02  3e+01  2e-04  2e-15
 6: -1.7993e+02 -1.8778e+02  8e+00  3e-05  2e-15
 7: -1.8146e+02 -1.8393e+02  2e+00  2e-13  2e-15
 8: -1.8213e+02 -1.8255e+02  4e-01  2e-13  2e-15
 9: -1.8227e+02 -1.8229e+02  3e-02  4e-14  2e-15
10: -1.8228e+02 -1.8228e+02  4e-04  7e-14  2e-15
11: -1.8228e+02 -1.8228e+02  7e-06  4e-14  2e-15
Optimal solution found.
Gaussian Solved in  29.208707094192505 s


In [43]:
#Support Vectors of Gaussian Kernel
np.argwhere(alphas_gaussian>0.0000001).shape

(1161, 2)

In [44]:
#Solving for alphas in Gaussian Kernel
alpha_y = (y * alphas_gaussian)
b_0 = np.max(np.matmul(P_g[:count_5,:], alpha_y))
b_1 = np.min(np.matmul(P_g[count_5:,:], alpha_y))
b_intercept_g = (-b_0-b_1)/2

In [45]:
#Function to predict classes in Gaussian Kernel, alpha, b and training data set is passed as parameters 
def prediction_g(alpha_y, data, test, b, gamma):
    pred = cdist(data, test)
    pred = np.exp(-gamma*np.square(pred))
    pred = np.sum(np.multiply(pred, alpha_y), axis = 0)  + b
    pred[pred>=0] = 1
    pred[pred<0] = -1
    return pred

def accuracy_g(pred, y):
    count = 0
    for i in range(len(pred)):
        if(pred[i]==y[i][0]):
            count += 1
    return count*100/len(pred)

In [67]:
#Value of alpha_y and intercept b in our implementation of Gaussian Kernel
alpha_y, b_intercept_g

(array([[-8.19495925e-02],
        [-1.66174610e-01],
        [-1.32478437e-09],
        ...,
        [ 2.62774633e-09],
        [ 3.61982875e-01],
        [ 5.17831648e-10]]), 0.08654618272366693)

In [52]:
#Accuracy of Validation set in Gaussian Kernel
pred_val_g = prediction_g(alpha_y, data_5_6, data_val, b_intercept_g, 0.05)
print("Accuarcy over validation set in gaussian is: ", accuracy_g(pred_val_g, y_val), "%")

Accuarcy over validation set in gaussian is:  99.6 %


In [53]:
#Accuracy of Test set in Gaussian Kernel
pred_test_g = prediction_g(alpha_y, data_5_6, data_test, b_intercept_g, 0.05)
print("Accuarcy over test set in gaussian is: ", accuracy_g(pred_test_g, y_test), "%")

Accuarcy over test set in gaussian is:  99.8 %


In [54]:
#Training two different models of Sklearn's SVM using both linear and gaussian kernel
clf_l = SVC(kernel='linear')
clf_g = SVC(kernel='rbf',gamma=0.05)

s = time.time()
clf_l.fit(data_5_6, y.flatten())
print("Linear model trained in ", time.time()-s, "s")

s = time.time()
clf_g.fit(data_5_6, y.flatten())
print("Gaussian model trained in ", time.time()-s, "s")

val_sklearn_linear = clf_l.score(data_val, y_val.flatten())
test_sklearn_linear = clf_l.score(data_test, y_test.flatten())

val_sklearn_gaussian = clf_g.score(data_val, y_val.flatten())
test_sklearn_gaussian = clf_g.score(data_test, y_test.flatten())

Linear model trained in  0.5143616199493408 s
Gaussian model trained in  5.68053412437439 s


In [59]:
#The value of W and intercept b in Sklearn's SVM implementation using Linear Kernel
clf_l.coef_[0], clf_l.intercept_

(array([ 0.00000000e+00,  0.00000000e+00,  8.14655870e-04,  4.07327935e-04,
         2.75429824e-03,  7.39204991e-03,  6.31396000e-03,  1.77809297e-02,
         4.82158760e-02,  2.55387815e-02,  3.81927421e-02,  1.32365117e-01,
         7.42228073e-02,  1.31566402e-01,  1.54549375e-01,  1.34006424e-01,
         7.69225545e-02,  3.40269244e-02,  1.21603267e-02, -1.23953066e-02,
         3.23079776e-02, -2.56100248e-02, -1.57920201e-02,  1.13826318e-02,
        -1.73996638e-05,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         3.17386205e-03,  3.39477723e-02,  5.32374582e-02,  7.15180426e-02,
         6.80603343e-02,  9.84392131e-02,  7.78393153e-02,  1.23828573e-01,
         2.05341902e-01,  2.05785517e-01,  1.71152287e-01,  2.97258176e-01,
         5.78978019e-02, -2.03615457e-02, -6.88319822e-02, -2.97549603e-02,
        -4.82639496e-02, -9.40746212e-02, -3.60782900e-02,  1.47226748e-02,
         9.9

In [66]:
#Value of alpha_y and intercept b in Sklearn's SVM implementation using Gaussian Kernel
clf_g.dual_coef_, clf_g.intercept_

(array([[-0.08191024, -0.16602487, -0.55559743, ...,  0.94064027,
          0.31722276,  0.36251044]]), array([-0.12264118]))

In [70]:
#Support Vectors in Sklearn's SVM implementations
(clf_l.support_vectors_).shape, (clf_g.support_vectors_).shape 

((85, 784), (1123, 784))

In [62]:
#Accuracy over Validation and test sets in Sklearn's implmentations of linear and gaussian kernels

print("Accuarcy over validation set in sklearn_linear is: ", 100*val_sklearn_linear, "%")
print("Accuarcy over test set in sklearn_linear is: ", 100*test_sklearn_linear, "%")
print("Accuarcy over validation set in sklearn_gaussian is: ", 100*val_sklearn_gaussian, "%")
print("Accuarcy over test set in sklearn_gaussian is: ", 100*test_sklearn_gaussian, "%")

Accuarcy over validation set in sklearn_linear is:  99.8 %
Accuarcy over test set in sklearn_linear is:  99.6 %
Accuarcy over validation set in sklearn_gaussian is:  99.6 %
Accuarcy over test set in sklearn_gaussian is:  100.0 %
