In [1]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
from cvxopt import matrix, solvers
from sklearn.svm import SVC
import numpy as np

parameters = {}
KERNEL_LINEAR = 1
KERNEL_RBF = 2

DATASET_LINEARLY_SEPARABLE = 1
DATASET_CIRCULAR = 2

In [2]:
dest_folder = './fashion_mnist/'
train_filename = 'train.csv'
test_filename = 'test.csv'
val_filename = 'val.csv'

train_size = 22500
test_size = 5000
val_size = 2501
image_size = 28
label = 784

In [13]:
def loadData(catagory, firstclass, secondclass):
    if(catagory=='train'):
        f = open(dest_folder+train_filename)
    
        X1 = np.zeros(shape = (4500, image_size*image_size))
        Y1 = np.zeros(shape = (4500, 1))

        it = 0
        for x in f:
            lst = x.split(',')
            if(int(lst[len(lst)-1][0])!=firstclass and int(lst[len(lst)-1][0])!=secondclass):
                continue
            Y1[it] = np.asarray([lst[label]], dtype = np.float32)
        #     print(Y1[it])
            X1[it] = np.asarray(lst, dtype=np.float32)[:image_size*image_size]
            it += 1

        for j in range(4500):
            for k in range(image_size*image_size):
                X1[j][k] /= 255
            if(Y1[j][0]==secondclass):
                Y1[j][0] = -1
            else:
                Y1[j][0] = 1
        y1 = Y1.reshape(1,4500)[0]

        parameters['X'] = X1
        parameters['y'] = y1
        
        return X1, y1
    elif (catagory=='test'):
        f = open(dest_folder+test_filename)
        X2 = np.zeros(shape = (1000, image_size*image_size))
        Y2 = np.zeros(shape = (1000, 1))

        it = 0
        for x in f:
            lst = x.split(',')
            if(int(lst[len(lst)-1][0])!=firstclass and int(lst[len(lst)-1][0])!=secondclass):
                continue
            Y2[it] = np.asarray([lst[label]], dtype = np.float32)
            X2[it] = np.asarray(lst, dtype=np.float32)[:image_size*image_size]
            it += 1

        for j in range(1000):
            for k in range(image_size*image_size):
                X2[j][k] /= 255
            if(Y2[j][0]==secondclass):
                Y2[j][0] = -1
            else:
                Y2[j][0] = 1
        y2 = Y2.reshape(1,1000)[0]
        return X2, y2
    elif(catagory=='val'):
        f = open(dest_folder+val_filename)
        X3 = np.zeros(shape = (500, image_size*image_size))
        Y3 = np.zeros(shape = (500, 1))

        it = 0
        for x in f:
            lst = x.split(',')
            if(int(lst[len(lst)-1][0])!=firstclass and int(lst[len(lst)-1][0])!=secondclass):
                continue
            Y3[it] = np.asarray([lst[label]], dtype = np.float32)
            X3[it] = np.asarray(lst, dtype=np.float32)[:image_size*image_size]
            it += 1

        for j in range(500):
            for k in range(image_size*image_size):
                X3[j][k] /= 255
            if(Y3[j][0]==secondclass):
                Y3[j][0] = -1
            else:
                Y3[j][0] = 1
        y3 = Y3.reshape(1,500)[0]
        return X3, y3

In [14]:
X, y = loadData('val', 3, 4)

In [7]:
X, y = generate_data(DATASET_LINEARLY_SEPARABLE)

In [8]:
X.shape

(4500, 784)

In [10]:
def gram_matrix(X, Y, kernel_type, gamma=0.5):
    K = np.zeros((X.shape[0], Y.shape[0]))
    
    if kernel_type == KERNEL_LINEAR:
        for i, x in enumerate(X):
            for j, y in enumerate(Y):
                K[i, j] = np.dot(x.T, y)
                
    elif kernel_type == KERNEL_RBF:
        for i, x in enumerate(X):
            for j, y in enumerate(Y):
                K[i, j] = np.exp(-gamma * np.linalg.norm(x - y) ** 2)
        
    return K
def train_svm(kernel):
    C = 1
    n, k = X.shape
    
    y_matrix = y.reshape(1, -1)
    H = np.dot(y_matrix.T, y_matrix) * gram_matrix(X, X, kernel)
    P = matrix(H)
    q = matrix(-np.ones((n, 1)))
    G = matrix(np.vstack((-np.eye((n)), np.eye(n))))
    h = matrix(np.vstack((np.zeros((n,1)), np.ones((n,1)) * C)))
    A = matrix(y_matrix)
    b = matrix(np.zeros(1))
    
    solvers.options['abstol'] = 1e-10
    solvers.options['reltol'] = 1e-10
    solvers.options['feastol'] = 1e-10
    solvers.options['show_progress'] = False

    return solvers.qp(P, q, G, h, A, b)
def get_parameters(alphas):
    threshold = 1e-5 # Values greater than zero (some floating point tolerance)
    S = (alphas > threshold).reshape(-1, )
    w = np.dot(X.T, alphas * y)
    b = y[S] - np.dot(X[S], w) # b calculation
    b = np.mean(b)
    return w, b, S

In [None]:
svm_parameters = train_svm(KERNEL_LINEAR)
alphas = np.array(svm_parameters['x'])[:, 0]
w, b, S = get_parameters(alphas)
print('Number of Support Vectors: %s'% alphas[S].shape[0])
print('Alphas:', alphas[S])
print('w and b', w, b)

In [11]:
def valandtest():
    p_count = 0
    for i in range(len(y3)):
        if((np.dot(w.T, X3[i]) + b > 0 and y3[i]==1) or (np.dot(w.T, X3[i]) + b < 0 and y3[i]==-1)):
            p_count += 1

    print('Validation accuracy: ', p_count/len(y3))

    p_count = 0
    for i in range(len(y2)):
        if((np.dot(w.T, X2[i]) + b > 0 and y2[i]==1) or (np.dot(w.T, X2[i]) + b < 0 and y2[i]==-1)):
            p_count += 1

    print('Test accuracy: ', p_count/len(y2))