In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math
import random
import sklearn
import copy
from itertools import combinations
from sklearn.metrics import confusion_matrix
import time
import pickle

In [11]:
def SMO(X, y, C, tol=math.pow(10, -5), max_passes=5, degree=3):
        # size of input vector in feature space
        m = X.shape[0]
        a = np.zeros((m, 1))
        b = 0
        passes = 0
        E = np.zeros((m, 1))
        # Assignment statements do not copy objects, needed so one can change one copy without changing the other
        a_old = copy.deepcopy(a)        
        #print(m)
        kernel = lambda xi, yi: math.pow((np.dot(xi.T, yi) + 1), degree)
        while passes < max_passes:
            num_changed_alphas = 0
            for i in range(m):
                E[i] = f_x(X, y, a, b, X[i, :], degree) - y[i]
                #print("i = %s : E_i = %s" %(i, E[i]))
                if (y[i]*E[i] < -tol and a[i] < C) or (y[i]*E[i] > tol and a[i] > 0):
                    j = random.randrange(m)
                    while j == i:
                        j = random.randrange(m)
                    E[j] = f_x(X, y, a, b, X[j, :], degree) - y[j]
                    #print("j = %s : E_j = %s" %(j, E[j]))
                    # print(E[j])
                    a_old[i] = a[i]
                    a_old[j] = a[j]
                    #print("ai %s and aj %s " %(a[i], a[j]))
                    # Finding L and H bounds such that L ≤ a_j ≤ H
                    if y[i] != y[j]:
                        L = max(0, a[j] - a[i])
                        H = min(C, C + a[j] - a[i])
                    else:
                        L = max(0, a[i] + a[j] - C)
                        H = min(C, a[i] + a[j])
                    if L == H:
                        continue
                    #print("L %s and H %s " %(L, H))
                    # calculating the value of eeta
                    n = 2*kernel(X[i, :], X[j, :]) - kernel(X[i, :], X[i, :]) - kernel(X[j, :], X[j, :])
                    #print("eta : %s and C : %s " %(n, C))
                    if n >= 0:
                        continue
                    # updating the value of alpha j
                    a[j] = a[j] - ((y[j] * (E[i] - E[j])) / n)
                    if a[j] > H:
                        a[j] = H
                    elif a[j] < L:
                        a[j] = L                   
                    if abs(a[j] - a_old[j]) < tol:
                        continue
                    # updating the value of alpha i
                    a[i] = a[i] + y[i]*y[j]*(a_old[j] - a[j])
                    #print("New : ai %s and aj %s " %(a[i], a[j]))
                    # now need to calculate the bias term
                    b1 = b - E[i] - (y[i] * (a[i] - a_old[i]) * kernel(X[i, :], X[i, :])) - \
                         (y[j] * (a[j] - a_old[j]) * kernel(X[i, :], X[j, :]))
                    b2 = b - E[j] - (y[i] * (a[i] - a_old[i]) * kernel(X[i, :], X[j, :])) - \
                         (y[j] * kernel(X[j, :], X[j, :]) * (a[j] - a_old[j]))
                    if a[i] > 0 and a[i] < C:
                        b = b1
                    elif a[j] > 0 and a[j] < C:
                        b = b2
                    else:
                        b = (b1 + b2) / 2
                    num_changed_alphas += 1
                # End if
            # End for
            if num_changed_alphas == 0:
                passes += 1
            else:
                passes = 0
        # end while
        return a, b

In [3]:
def f_x(X, y, a, b, x, degree):
    predicted_value = 0.0
    # using polynomial kernel
    for k in range(X.shape[0]):
        #print(X[k, :].T@x)
        predicted_value += (a[k]*y[k]*((X[k, :].T@x + 1)**degree))
    return predicted_value + b

In [3]:
infile = open('train9.pkl','rb')
X_subclass = pickle.load(infile)
infile.close()
infile = open('test9.pkl','rb')
y_subclass = pickle.load(infile)
infile.close()

In [179]:
alpha, bias = SMO(X_subclass, y_subclass, 0.1)

In [180]:
y_subclass.shape

(6655,)

In [181]:
alpha.shape

(6655, 1)

In [182]:
filename = 'alpha9.pkl'
outfile = open(filename,'wb')
pickle.dump(alpha,outfile)
outfile.close()

In [183]:
filename = 'bias9.pkl'
outfile = open(filename,'wb')
pickle.dump(bias,outfile)
outfile.close()

In [10]:
predicted_values = np.zeros((y_test.shape[0], 5))
OvO = list(combinations(['not_recom', 'recommend', 'very_recom', 'priority', 'spec_prior'], 2))
start = time.time()
k = 0
for i in OvO:
    #print(i)
    filename = 'alpha'+str(k)+'.pkl'
    infile = open(filename,'rb')
    alpha = pickle.load(infile)
    infile.close
    filename = 'bias'+str(k)+'.pkl'
    infile = open(filename,'rb')
    bias = pickle.load(infile)
    infile.close
    infile = open('train'+str(k)+'.pkl','rb')
    X_subclass = pickle.load(infile)
    infile.close()
    infile = open('test'+str(k)+'.pkl','rb')
    y_subclass = pickle.load(infile)
    infile.close()
    k += 1
    y_predict = np.zeros((y_test.shape[0], 1))
    for l in range(X_test.shape[0]):
        #print(f_x(X_subclass, y_subclass, alpha, bias, X_test[l, :], 3))
        if f_x(X_subclass, y_subclass, alpha, bias, X_test[l, :], 3) >= 0:
            y_predict[l] = 1.0
        else:
            y_predict[l] = -1.0
    for j in range(y_predict.shape[0]):
        if i[0] == "not_recom" and y_predict[j] == 1:
            predicted_values[j][0] += 1
        elif i[1] == "not_recom" and y_predict[j] == -1:
            predicted_values[j][0] += 1
        if i[0] == "recommend" and y_predict[j] == 1:
            predicted_values[j][1] += 1
        elif i[1] == "recommend" and y_predict[j] == -1:
            predicted_values[j][1] += 1
        if i[0] == "very_recom" and y_predict[j] == 1:
            predicted_values[j][2] += 1
        elif i[1] == "very_recom" and y_predict[j] == -1:
            predicted_values[j][2] += 1
        if i[0] == "priority" and y_predict[j] == 1:
            predicted_values[j][3] += 1
        elif i[1] == "priority" and y_predict[j] == -1:
            predicted_values[j][3] += 1
        if i[0] == "spec_prior" and y_predict[j] == 1:
            predicted_values[j][4] += 1
        elif i[1] == "spec_prior" and y_predict[j] == -1:
            predicted_values[j][4] += 1
print(accuracy_check(predicted_values))
end = time.time()
print(end - start)

955
0.6319845857418112
695.763986825943


In [4]:
filename = 'X_test.pkl'
infile = open(filename,'rb')
X_test = pickle.load(infile)
infile.close

<function BufferedReader.close>

In [5]:
filename = 'y_test.pkl'
infile = open(filename,'rb')
y_test = pickle.load(infile)
infile.close

<function BufferedReader.close>

In [7]:
X_test[0]

array([0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1.,
       0., 0., 1., 0., 0., 1., 0., 1., 0., 0.])

In [9]:
f_x(X_subclass, y_subclass, alpha, bias, X_test[0, :], 3)

NameError: name 'X_subclass' is not defined

In [8]:
predicted_values

NameError: name 'predicted_values' is not defined

In [7]:
def accuracy_check(predicted):
    error = 0
    for i in range(predicted.shape[0]):
        check = np.where(predicted[i] == max(predicted[i]))[0][0]
        if check == 0:
            if y_test[i] != "not_recom":                
                error += 1
        elif check == 1:
            if y_test[i] != "recommend":                
                error += 1
        elif check == 2:
            if y_test[i] != "very_recom":
                #print(predicted[i])
                #print(y_test[i])
                error += 1
        elif check == 3:
            if y_test[i] != "priority":
                error += 1
        elif check == 4:
            if y_test[i] != "spec_prior":
                error += 1
    print(error)
    return (1-((error/predicted.shape[0])))

In [193]:
X_test = X_test[0:5]
y_test = y_test[0:5]