In [95]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mlxtend.plotting.decision_regions import plot_decision_regions

%matplotlib widget

Multi- Class SVM

defining kernel and poly functions

In [96]:

def linear_kernel(x1, x2):
    return x1.T @ x2

def poly_kernel(x1, x2, d = 2, c = 0):
    return (x1 @ x2.T + c)**d

deriving multiclass svm


In [97]:

class multi_class_svm():
    def __init__(self, kernel='linear', c=1.0, tol=1e-3, maxiter=1000):
        self._kernel = kernel
        self._tol = tol
        self._maxiter = maxiter
        self.epocs = 0.001
        
        if self._kernel == 'linear':
            self._k = linear_kernel
        elif self._kernel == 'poly':
            self._k = poly_kernel
        
        self._c = c
        
    def _init_params(self):
        self._error_cache = np.zeros(self._data.shape[0])
        self._alphas = np.ones(self._data.shape[0]) * .1
        self._b = 0
        
        if self._kernel == 'linear':
            self._weights = np.random.rand(self._data.shape[1])

    def predict_score(self, x):
        """Predicts a raw score (not classification)
        
        Arguments
            x, array (batch_size, n_features) - input samples.
        """
        u = 0
        if self._kernel == 'linear':
            u = self._weights @ x.T - self._b
        else:
            for i in range(self._data.shape[0]):
                u += self._targets[i] * self._alphas[i] * self._k(self._data[i], x)
            u -= self._b
        return u
        
    def predict(self, x):
        """Classifies input samples.
        
        Arguments
            x, array (batch_size, n_features) - input samples.
        """
        score = self.predict_score(x)

        if type(score) is np.ndarray:
            score[score < 0] = -1
            score[score >= 0] = 1

            return score
        else:
            return -1 if score < 0 else 1

    def smo_step(self, i1, i2):
        if i1 == i2:
            return 0

        x1 = self._data[i1]
        x2 = self._data[i2]
        y1 = self._targets[i1]
        y2 = self._targets[i2]
        alpha1 = self._alphas[i1]
        alpha2 = self._alphas[i2]

        # Compute errors for x1 and x2
        e1 = self.predict_score(x1) - y1
        e2 = self.predict_score(x2) - y2

        s = y1 * y2

        if s == 1:
            L = max(0, alpha2 + alpha1 - self._c)
            H = min(self._c, alpha2 + alpha1)
        else:
            L = max(0, alpha2 - alpha1)
            H = min(self._c, self._c + alpha2 - alpha1)

        if L == H:
            return 0

        k11 = self._k(x1, x1)
        k22 = self._k(x2, x2)
        k12 = self._k(x1, x2)

        eta = k11 + k22 - 2 * k12

        if eta > 0:
            a2 = alpha2 + y2 * (e1 - e2) / eta
            if a2 <= L:
                a2 = L
            elif a2 >= H:
                a2 = H
        # TODO: the negative case
        else:
            print(f"[DEBUG] smo_step: eta = {eta}")
            
            z1 = (y1*(e1 + self._b)) - (alpha1 * k11) - (s*alpha2*k12)
            z2 = (y2*(e2 + self._b)) - (s*alpha1*k12) - (alpha2*k22)
           
            L_1 = alpha1 + s*(alpha2 - L)
            H_1 = alpha1 + s*(alpha2 - H)
            
            L_ob = (L_1*z1) + (L*z2) + (0.5*(L_1**2)*k11) + (0.5*(L**2)*k22) + (s*L*L_1*k12)
            H_ob = (H_1*z1) + (H*z2) + (0.5*(H_1**2)*k11) + (0.5*(H**2)*k22) + (s*H*H_1*k12)


            if (L_ob < H_ob- self.epocs):

                a2 = L
            elif (L_ob > (H_ob + self.epocs)):

                a2 = H
            else:
                
                a2 = alpha2
            
        if np.abs(a2 - alpha2) < 1e-3 * (a2 + alpha2 + 1e-3):
            return 0

        a1 = alpha1 + s * (alpha2 - a2)

        # Update threshold to reflect change in Lagrange multipliers
        b1 = e1 + y1 * (a1 - alpha1) * k11 + y2 * (a2 - alpha2) * k12 + self._b
        b2 = e2 + y1 * (a1 - alpha1) * k12 + y2 * (a2 - alpha2) * k22 + self._b
        self._b = (b1 + b2) / 2

        # Update weight vector to reflect change in a1 & a2, if SVM is linear
        if self._kernel == 'linear':
            self._weights = np.sum((self._targets * self._alphas)[:, None] * self._data, axis=0)
        
        # Store a1 and a2 in alpha array
        self._alphas[i1] = a1
        self._alphas[i2] = a2

        # update error cache using new multipliers
        for i in range (self._data.shape[0]):
            self._error_cache[i] = self.predict_score(self._data[i]) - self._targets[i]

        return 1

    def examine(self, i2):
        x2 = self._data[i2]
        y2 = self._targets[i2]
        alpha2 = self._alphas[i2]
        e2 = self.predict_score(x2) - y2
        r2 = e2 * y2

        # Heuristic for picking the first multiplier
        if (r2 < -self._tol and alpha2 < self._c) or (r2 > self._tol and alpha2 > 0):
            f_idxs = np.where((self._alphas != 0) & (self._alphas != self._c))[0]

            if len(f_idxs) > 1:
                # Hueristic for second multiplier: get i1 with lowest absolute error |e1 - e2|

                # TODO: Clean this up
                if e2 > 0:
                    min_error = 999999
                    for i, v in enumerate(f_idxs):
                        if v == i2:
                            continue

                        if self._error_cache[v] == 0:
                            self._error_cache[v] = self.predict_score(self._data[v]) - self._targets[v]
                        error = np.abs(e2 - self._error_cache[v])

                        if error < min_error:
                            min_error = error
                            i1 = v
                else:
                    max_error = -999999
                    for i, v in enumerate(f_idxs):
                        if v == i2:
                            continue

                        if self._error_cache[v] == 0:
                            self._error_cache[v] = self.predict_score(self._data[v]) - self._targets[v]
                        error = np.abs(e2 - self._error_cache[v])

                        if error > max_error:
                            max_error = error
                            i1 = v

                if self.smo_step(i1, i2):
                    return 1
                
                # Loop over all non-zero and non-C alpha, starting at random point
                for i, v in enumerate(np.random.permutation(f_idxs)):
                    if self.smo_step(v, i2):
                        return 1
                
                # Loop over all possible i1, starting at a random point
                for i in range(self._data.shape[0]):
                    if i == i2:
                        continue
                    if self.smo_step(i, i2):
                        return 1
                
        return 0
    
    def fit(self, data, targets):
        self._data = data
        self._targets = targets
        
        self._init_params()
        
        n_changed = 0
        examine_all = True
        n_iter = 0
        
        while (n_changed > 0 or examine_all is True) and n_iter < self._maxiter:
            n_changed = 0
            n_iter += 1
            
            if examine_all is True:
                # loop over all training examples
                for i in range(data.shape[0]):
                    n_changed += self.examine(i)
            else:
                # loop over examples where alpha is not 0 & not C
                f_idxs = np.where((self._alphas != 0) & (self._alphas != self._c))[0]
                for i, v in enumerate(f_idxs):
                    n_changed += self.examine(v)
            
            if examine_all is True:
                examine_all = False
            elif n_changed == 0:
                examine_all = True
    def accuracy(self, B_pred,B):
         B_pred = [1 if pred == -1. else 0 for pred in B_pred]
         accuracy = np.sum(B == B_pred)/len(B)
        
         return accuracy
    


Loading iris dataset

In [98]:
data = datasets.load_iris()

A = data.data
B = data.target
A_train, A_test, B_train, B_test = train_test_split(A, B, random_state = 42, test_size = 0.1)

Training Multi-class


Model-1 - Type-A vs (Type-B and Type-C)

In [99]:
# Redefining B for this model
B_for_model1 = []

for i in range(len(B)):
    if(B[i] != 0):
        B_for_model1.append(1)
    else:
        B_for_model1.append(0)
B_for_model1 = np.array(B_for_model1)
print(B_for_model1)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1]


In [100]:
# data selected for testing sets and training
A_train_m1, A_test_m1, B_train_m1, B_test_m1 = train_test_split(A, B_for_model1, random_state = 42, test_size=0.1)

linear kernel

In [101]:
#Model 1 creation my model
linear_svm_m1 = multi_class_svm(c =5.0)

#Model 1 training and testing
linear_svm_m1.fit(A_train_m1, B_train_m1)
print(linear_svm_m1._weights)
print(linear_svm_m1._b)


pred_m1_svm = linear_svm_m1.predict(A_test_m1)
print("Accuracy of linear svm model-1= ", (linear_svm_m1.accuracy(pred_m1_svm, B_test_m1))*100, "%")

[DEBUG] smo_step: eta = 0.0
[32.02365608 14.30946243 21.41655925  6.40473122]
276.7611091439231
Accuracy of linear svm model-1=  0.0 %


In [102]:
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC


linear_sk_model1 = LinearSVC()

linear_sk_model1.fit(A_train_m1, B_train_m1.astype(np.int32))

print(f"coeff_={linear_sk_model1.coef_}")
print(f"intercept={linear_sk_model1.intercept_}")
pred_m1_svc = linear_sk_model1.predict(A_test_m1)
print("Accuracy of linear svc model-1= ", accuracy_score(B_test_m1, pred_m1_svc)*100)

coeff_=[[-0.18423705 -0.45122458  0.80794916  0.45071294]]
intercept=[-0.1095615]
Accuracy of linear svc model-1=  100.0


Poly kernel

In [103]:
poly_svm_m1 = multi_class_svm(c=5.0)

#training and testing of model-1

poly_svm_m1.fit(A_train_m1, B_train_m1)
#printing poly_svm
print(poly_svm_m1._b)


pred_m1_svm = poly_svm_m1.predict(A_test_m1)
print("Accuracy of poly svm model-1= ", (poly_svm_m1.accuracy(pred_m1_svm, B_test_m1))*100, "%")

[DEBUG] smo_step: eta = 0.0
274.12938764508664
Accuracy of poly svm model-1=  13.333333333333334 %


In [104]:
from sklearn.svm import SVC
poly_svc_m1 = SVC(kernel = 'poly', degree = 2)
poly_svc_m1.fit(A_train_m1, B_train_m1.astype(np.int32))

#printing intercept
print(f"intercept={poly_svc_m1.intercept_}")
#prediction of svc
pred_m1_svc = poly_svc_m1.predict(A_test_m1)
print("Accuracy of poly svc model 1 = ", accuracy_score(B_test_m1, pred_m1_svc)*100)

intercept=[-1.25416081]
Accuracy of poly svc model 1 =  100.0


Model-2 - Type-B vs (Type-A and Type-C)

In [105]:
# Redefining B for this model
B_for_model2 = []

for i in range(len(B)):
    if(B[i] != 0):
        B_for_model2.append(1)
    else:
        B_for_model2.append(0)
B_for_model2 = np.array(B_for_model2)
print(B_for_model2)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1]


In [106]:
# data selected for testing sets and training
A_train_m2, A_test_m2, B_train_m2, B_test_m2 = train_test_split(A, B_for_model2, random_state = 42, test_size=0.1)

linear kernel

In [107]:
#Model 2 creation my model
linear_svm_m2 = multi_class_svm(c =5.0)

#Model 2 training and testing
linear_svm_m2.fit(A_train_m2, B_train_m2)
print(linear_svm_m2._weights)
print(linear_svm_m2._b)


pred_m2_svm = linear_svm_m2.predict(A_test_m2)
print("Accuracy of linear svm model-2= ", (linear_svm_m2.accuracy(pred_m2_svm, B_test_m2))*100, "%")

[DEBUG] smo_step: eta = 0.0
[31.34194187 14.73677675 21.28935931  6.36838837]
201.7387423901552
Accuracy of linear svm model-2=  40.0 %


In [108]:
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC


linear_sk_model2 = LinearSVC()

linear_sk_model2.fit(A_train_m2, B_train_m2.astype(np.int32))

print(f"coeff_={linear_sk_model2.coef_}")
print(f"intercept={linear_sk_model2.intercept_}")
pred_m2_svc = linear_sk_model2.predict(A_test_m2)
print("Accuracy of linear svc model-2= ", accuracy_score(B_test_m2, pred_m2_svc)*100)

coeff_=[[-0.18424767 -0.4512312   0.80794563  0.45071398]]
intercept=[-0.10956268]
Accuracy of linear svc model-2=  100.0


poly kernel

In [118]:
poly_svm_m2 = multi_class_svm(c=5.0)

#training and testing of model-2

poly_svm_m2.fit(A_train_m2, B_train_m2)
#printing poly_svm
print(poly_svm_m2._b)


pred_m2_svm = poly_svm_m2.predict(A_test_m2)
print("Accuracy of poly svm model-2= ", (poly_svm_m2.accuracy(pred_m2_svm, B_test_m2))*100, "%")

[DEBUG] smo_step: eta = 0.0
0.6649364708001415
Accuracy of poly svm model-2=  40.0 %


In [110]:
from sklearn.svm import SVC
poly_svc_m2 = SVC(kernel = 'poly', degree = 2)
poly_svc_m2.fit(A_train_m2, B_train_m2.astype(np.int32))

#printing intercept
print(f"intercept={poly_svc_m2.intercept_}")
#prediction of svc
pred_m2_svc = poly_svc_m2.predict(A_test_m2)
print("Accuracy of poly svc model-2 = ", accuracy_score(B_test_m2, pred_m2_svc)*100)

intercept=[-1.25416081]
Accuracy of poly svc model-2 =  100.0


Model-3 - Type-C vs (Type-A and Type-B)

In [111]:
# Redefining B for this model
B_for_model3 = []

for i in range(len(B)):
    if(B[i] != 0):
        B_for_model3.append(1)
    else:
        B_for_model3.append(0)
B_for_model3 = np.array(B_for_model3)
print(B_for_model3)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1]


In [112]:
# data selected for testing sets and training
A_train_m3, A_test_m3, B_train_m3, B_test_m3 = train_test_split(A, B_for_model3, random_state = 42, test_size=0.1)

linear kernel

In [119]:
#Model 3 creation my model
linear_svm_m3 = multi_class_svm(c =5.0)

#Model 3 training and testing
linear_svm_m3.fit(A_train_m3, B_train_m3)
print(linear_svm_m3._weights)
print(linear_svm_m3._b)


pred_m3_svm = linear_svm_m3.predict(A_test_m3)
print("Accuracy of linear svm model-3= ", (linear_svm_m3.accuracy(pred_m3_svm, B_test_m3))*100, "%")

[DEBUG] smo_step: eta = 0.0
[35.82092337 16.52836935 24.42464636  7.26418467]
283.95927633255184
Accuracy of linear svm model-3=  26.666666666666668 %


In [114]:
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC


linear_sk_model3 = LinearSVC()

linear_sk_model3.fit(A_train_m3, B_train_m3.astype(np.int32))

print(f"coeff_={linear_sk_model3.coef_}")
print(f"intercept={linear_sk_model3.intercept_}")
pred_m3_svc = linear_sk_model3.predict(A_test_m3)
print("Accuracy of linear svc model-3= ", accuracy_score(B_test_m3, pred_m3_svc)*100)

coeff_=[[-0.18424169 -0.45122875  0.80794112  0.45071406]]
intercept=[-0.10956374]
Accuracy of linear svc model-3=  100.0


poly kernel

In [116]:
poly_svm_m3 = multi_class_svm(c=5.0)

#training and testing of model-3

poly_svm_m3.fit(A_train_m3, B_train_m3)

#printing poly_svm
print(poly_svm_m3._b)


pred_m3_svm = poly_svm_m3.predict(A_test_m3)
print("Accuracy of poly svm model-3= ", (poly_svm_m3.accuracy(pred_m3_svm, B_test_m3))*100, "%")

[DEBUG] smo_step: eta = 0.0
238.82194476875355
Accuracy of poly svm model-3=  0.0 %


In [117]:
from sklearn.svm import SVC
poly_svc_m3 = SVC(kernel = 'poly', degree = 2)
poly_svc_m3.fit(A_train_m3, B_train_m3.astype(np.int32))

#printing intercept
print(f"intercept={poly_svc_m3.intercept_}")
#prediction of svc
pred_m3_svc = poly_svc_m3.predict(A_test_m3)
print("Accuracy of poly svc model 1 = ", accuracy_score(B_test_m3, pred_m3_svc)*100)

intercept=[-1.25416081]
Accuracy of poly svc model 1 =  100.0
