## This is SVM Implementation using Dual formulation with SMO

###### It was taking too long (3.5 hours as 12000 rows in multiclass and categorical dataset) to run the code so we have used a OvO technique and trained the model using each class in different browsers manually which reduced my time to 35 mins and stored the model using pickle library.
###### I have included a trained model with this python notebook 

In [1]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math
import random
import sklearn
import copy
from itertools import combinations
from sklearn.metrics import confusion_matrix
import time
import pickle

In [4]:
# Prediction method on the basis of voting in OvO multiclass SVM
def predict(predicted):
    y_predicted = np.zeros((predicted.shape[0]))
    for i in range(predicted.shape[0]):
        y_predicted[i] = np.where(predicted[i] == max(predicted[i]))[0][0]
    return y_predicted

In [5]:
# label Encoding the prediction class
def label_encoding(y):
    y_encoded = np.zeros((y.shape[0]))
    for i in range(y.shape[0]):
        if y[i] == "not_recom":                
            y_encoded[i] = 0
        elif y[i] == "recommend":                
            y_encoded[i] = 1
        elif y[i] == "very_recom":                
            y_encoded[i] = 2
        elif y[i] == "priority":                
            y_encoded[i] = 3
        elif y[i] == "spec_prior":                
            y_encoded[i] = 4
    return y_encoded

In [6]:
def accuracy_check(predicted):
    error = 0
    for i in range(predicted.shape[0]):
        check = np.where(predicted[i] == max(predicted[i]))[0][0]
        if check == 0:
            if y_test[i] != "not_recom":                
                error += 1
        elif check == 1:
            if y_test[i] != "recommend":                
                error += 1
        elif check == 2:
            if y_test[i] != "very_recom":
                error += 1
        elif check == 3:
            if y_test[i] != "priority":
                error += 1
        elif check == 4:
            if y_test[i] != "spec_prior":
                error += 1
    # print(error)
    return (1-((error/predicted.shape[0])))

In [8]:
# Implementation of SMO algorithm for calculating value of alpha and bias 
def SMO(X, y, C, tol=math.pow(10, -3), max_passes=5, degree=3):
    # size of input vector in feature space
    m = X.shape[0]
    a = np.zeros((m, 1))
    b = 0
    passes = 0
    E = np.zeros((m, 1))
    a_old = copy.deepcopy(a)
    # Polynomial Kernel
    kernel = lambda xi, yi: math.pow((np.dot(xi.T, yi) + 1), degree)
    while passes < max_passes:
        num_changed_alphas = 0
        for i in range(m):
            E[i] = f_x(X, y, a, b, X[i, :], degree) - y[i]
            if (y[i]*E[i] < -tol and a[i] < C) or (y[i]*E[i] > tol and a[i] > 0):
                j = random.randrange(m)
                while j == i:
                    j = random.randrange(m)
                E[j] = f_x(X, y, a, b, X[j, :], degree) - y[j]
                # print(E[j])
                a_old[i] = a[i]
                a_old[j] = a[j]
                if y[i] != y[j]:
                    L = max(0, a[j] - a[i])
                    H = min(C, C + a[j] - a[i])
                else:
                    L = max(0, a[i] + a[j] - C)
                    H = min(C, a[i] + a[j])
                if L == H:
                    continue
                # calculating the value of eeta
                n = 2*kernel(X[i, :], X[j, :]) - kernel(X[i, :], X[i, :]) - kernel(X[j, :], X[j, :])
                if n >= 0:
                    continue
                # updating the value of alpha j
                a[j] = a[j] - ((y[j] * (E[i] - E[j])) / n)
                if a[j] > H:
                    a[j] = H
                elif a[j] < L:
                    a[j] = L
                # print(a[j])
                if abs(a[j] - a_old[j]) < tol:
                    continue
                # updating the value of alpha i
                a[i] = a[i] + y[i]*y[j]*(a_old[j] - a[j])
                # print(a[i])
                # now need to calculate the bias term
                b1 = b - E[i] - (y[i] * (a[i] - a_old[i]) * kernel(X[i, :], X[i, :])) - \
                     (y[j] * (a[j] - a_old[j]) * kernel(X[i, :], X[j, :]))
                b2 = b - E[j] - (y[i] * (a[i] - a_old[i]) * kernel(X[i, :], X[j, :])) - \
                     (y[j] * kernel(X[j, :], X[j, :]) * (a[j] - a_old[j]))
                if a[i] > 0 and a[i] < C:
                    b = b1
                elif a[j] > 0 and a[j] < C:
                    b = b2
                else:
                    b = (b1 + b2) / 2
                num_changed_alphas += 1
            # End if
        # End for
        if num_changed_alphas == 0:
            passes += 1
        else:
            passes = 0
    # end while
    return a, b

In [14]:
def f_x(X, y, a, b, x, degree):
    predicted_value = 0.0
    # using polynomial kernel
    for k in range(X.shape[0]):
        predicted_value += (a[k]*y[k]*((X[k, :].T@x + 1)**degree))
    return predicted_value + b

In [9]:
# dividind dataset wrt to each class e.g classes = ['not_recom', 'recommend'] for all 10 classes pair
def combination(X_train, y_train, classes):
    X_subclass = np.array([])
    y_subclass = np.array([])
    for j in range(X_train.shape[0]):
        if classes[0] == y_train[j]:
            X_subclass = np.append(X_subclass, np.array(X_train[j]), axis=0)
            y_subclass = np.append(y_subclass, 1)
        elif classes[1] == y_train[j]:
            X_subclass = np.append(X_subclass, np.array(X_train[j]), axis=0)
            y_subclass = np.append(y_subclass, -1)
    return X_subclass, y_subclass

In [10]:
folder = 'Data_with_-5_20_test'

In [11]:
# Reading the data and storing it in file system
dataset = pd.read_csv('Nursery_edited.csv', delimiter=',', skiprows=0)
y = dataset.iloc[:, -1:].values
X = dataset.iloc[:, :-1].values
# Performing
ohe = OneHotEncoder(categories='auto')
X_transformed = ohe.fit_transform(X).toarray()
# Spliting the data in 80-20 split
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.20, random_state=42)

outfile1 = open(folder+'\X_train.pkl','wb')
pickle.dump(X_train,outfile1)
outfile1.close()
outfile1 = open(folder+'\y_train.pkl','wb')
pickle.dump(y_train,outfile1)
outfile1.close()
outfile1 = open(folder+'\X_test.pkl','wb')
pickle.dump(X_test,outfile1)
outfile1.close()
outfile1 = open(folder+'\y_test.pkl','wb')
pickle.dump(y_test,outfile1)
outfile1.close()

In [39]:
# Storing the validation data wrt each class
OvO = list(combinations(['not_recom', 'recommend', 'very_recom', 'priority', 'spec_prior'], 2))
k = 0
for i in OvO:
    X_subclass, y_subclass = combination(X_train, y_train, i)
    X_subclass = X_subclass.reshape((np.int(X_subclass.shape[0] / 27), 27))    
    y_subclass = y_subclass.astype(np.int32)      
    
    outfile1 = open(folder+'/train'+str(k)+'.pkl','wb')
    pickle.dump(X_subclass,outfile1)
    outfile1.close()   
    outfile2 = open(folder+'/test'+str(k)+'.pkl','wb')
    pickle.dump(y_subclass,outfile2)
    outfile2.close()
    k += 1

In [16]:
# Trainging and storing alpha and bias on file system
# to increase the time by simultaneously running in different browsers
k = 1
infile = open(folder+'/train'+str(k)+'.pkl','rb')
X_subclass = pickle.load(infile)
infile.close()
infile = open(folder+'/test'+str(k)+'.pkl','rb')
y_subclass = pickle.load(infile)
infile.close()

alpha, bias = SMO(X_subclass, y_subclass, 0.1)

filename = folder+'/alpha'+str(k)+'.pkl'
outfile = open(filename,'wb')
pickle.dump(alpha,outfile)
outfile.close()
filename = folder+'/bias'+str(k)+'.pkl'
outfile = open(filename,'wb')
pickle.dump(bias,outfile)
outfile.close()


In [18]:
#Reading Test data
infile = open(folder+'/X_test.pkl','rb')
X_test = pickle.load(infile)
infile.close()
infile = open(folder+'/y_test.pkl','rb')
y_test = pickle.load(infile)
infile.close()

In [19]:
# implementation that takes stored trained model and run test on the test data
predicted_values = np.zeros((y_test.shape[0], 5))
OvO = list(combinations(['not_recom', 'recommend', 'very_recom', 'priority', 'spec_prior'], 2))
start = time.time()
k = 0
for i in OvO:
    #print(i)
    filename = folder+'/alpha'+str(k)+'.pkl'
    infile = open(filename,'rb')
    alpha = pickle.load(infile)
    infile.close
    filename = folder+'/bias'+str(k)+'.pkl'
    infile = open(filename,'rb')
    bias = pickle.load(infile)
    infile.close
    infile = open(folder+'/train'+str(k)+'.pkl','rb')
    X_subclass = pickle.load(infile)
    infile.close()
    infile = open(folder+'/test'+str(k)+'.pkl','rb')
    y_subclass = pickle.load(infile)
    infile.close()
    k += 1
    y_predict = np.zeros((y_test.shape[0], 1))
    for l in range(X_test.shape[0]):
        #print(f_x(X_subclass, y_subclass, alpha, bias, X_test[l, :], 3))
        if f_x(X_subclass, y_subclass, alpha, bias, X_test[l, :], 3) >= 0:
            y_predict[l] = 1.0
        else:
            y_predict[l] = -1.0
    for j in range(y_predict.shape[0]):
        if i[0] == "not_recom" and y_predict[j] == 1:
            predicted_values[j][0] += 1
        elif i[1] == "not_recom" and y_predict[j] == -1:
            predicted_values[j][0] += 1
        if i[0] == "recommend" and y_predict[j] == 1:
            predicted_values[j][1] += 1
        elif i[1] == "recommend" and y_predict[j] == -1:
            predicted_values[j][1] += 1
        if i[0] == "very_recom" and y_predict[j] == 1:
            predicted_values[j][2] += 1
        elif i[1] == "very_recom" and y_predict[j] == -1:
            predicted_values[j][2] += 1
        if i[0] == "priority" and y_predict[j] == 1:
            predicted_values[j][3] += 1
        elif i[1] == "priority" and y_predict[j] == -1:
            predicted_values[j][3] += 1
        if i[0] == "spec_prior" and y_predict[j] == 1:
            predicted_values[j][4] += 1
        elif i[1] == "spec_prior" and y_predict[j] == -1:
            predicted_values[j][4] += 1
print(accuracy_check(predicted_values))
end = time.time()
print(end - start)

0.6319845857418112
1882.4346897602081


In [27]:
print('Final Accuracy:', accuracy_check(predicted_values)*100)

Final Accuracy: 63.19845857418112


In [23]:
final_predict = predict(predicted_values)
y_encoded = label_encoding(y_test)
cm = confusion_matrix(y_encoded, final_predict, labels=[0, 1, 2, 3, 4])
print('Confusion matrix :')
print(cm)

Confusion matrix :
[[764 104   4   0   0]
 [  0   6   0   0   0]
 [  0  44  18   0   0]
 [  0 334 259 217  47]
 [  0  89  59  15 635]]


In [24]:
from sklearn.metrics import classification_report
target_names = ['class 0', 'class 1', 'class 2', 'class 3', 'class 4']
print(classification_report(y_encoded, final_predict, target_names=target_names))

              precision    recall  f1-score   support

     class 0       1.00      0.88      0.93       872
     class 1       0.01      1.00      0.02         6
     class 2       0.05      0.29      0.09        62
     class 3       0.94      0.25      0.40       857
     class 4       0.93      0.80      0.86       798

    accuracy                           0.63      2595
   macro avg       0.59      0.64      0.46      2595
weighted avg       0.93      0.63      0.71      2595



In [30]:
# Class vs Class confusion matrices
from sklearn.metrics import multilabel_confusion_matrix
multilabel_confusion_matrix(y_encoded, final_predict, labels=[0, 1, 2, 3, 4])

array([[[1723,    0],
        [ 108,  764]],

       [[2018,  571],
        [   0,    6]],

       [[2211,  322],
        [  44,   18]],

       [[1723,   15],
        [ 640,  217]],

       [[1750,   47],
        [ 163,  635]]], dtype=int64)

###### NOTE: The below cell will take approx. 3 hours to run

In [25]:
# This is complete implementation that is training and prediction without storing 
# cross validation   
C = [0.01, 0.1, 1, 50]
c_accuracy = [0,0,0,0]
predicted_values = np.zeros((y_test.shape[0], 5))
p = 0
a = {}
b = {}
OvO = list(combinations(['not_recom', 'recommend', 'very_recom', 'priority', 'spec_prior'], 2))
start = time.time()
for i in OvO:
    start2 = time.time()
    X_subclass, y_subclass = combination(X_train, y_train, i)
    X_subclass = X_subclass.reshape((np.int(X_subclass.shape[0] / 27), 27))
    y_subclass = y_subclass.astype(np.int32)

    y_predict = np.zeros((y_test.shape[0], 1))
    for c in C:
        alpha, bias = SMO(X_subclass, y_subclass, c)
        a[p] = alpha
        b[p] = bias           
        p += 1 

        for l in range(X_test.shape[0]):
            if f_x(X_subclass, y_subclass, alpha, bias, X_test[l, :], 3) >= 0:
                y_predict[l] = 1.0
            else:
                y_predict[l] = -1.0

        for j in range(y_predict.shape[0]):
            if i[0] == "not_recom" and y_predict[j] == 1:
                predicted_values[j][0] += 1
            elif i[1] == "not_recom" and y_predict[j] == -1:
                predicted_values[j][0] += 1
            if i[0] == "recommend" and y_predict[j] == 1:
                predicted_values[j][1] += 1
            elif i[1] == "recommend" and y_predict[j] == -1:
                predicted_values[j][1] += 1
            if i[0] == "very_recom" and y_predict[j] == 1:
                predicted_values[j][2] += 1
            elif i[1] == "very_recom" and y_predict[j] == -1:
                predicted_values[j][2] += 1
            if i[0] == "priority" and y_predict[j] == 1:
                predicted_values[j][3] += 1
            elif i[1] == "priority" and y_predict[j] == -1:
                predicted_values[j][3] += 1
            if i[0] == "spec_prior" and y_predict[j] == 1:
                predicted_values[j][4] += 1
            elif i[1] == "spec_prior" and y_predict[j] == -1:
                predicted_values[j][4] += 1
    c_accuracy[p] = accuracy_check(predicted_values)
    print(accuracy_check(predicted_values))
end = time.time()
print(end - start)

34
0.26086956521739135
34
0.26086956521739135
34
0.26086956521739135
34
0.26086956521739135
32
0.30434782608695654
30
0.34782608695652173
33
0.28260869565217395
22
0.5217391304347826
14
0.6956521739130435
3
0.9347826086956522
20.522088766098022
