<a href="https://colab.research.google.com/github/tanvu10/ML_rework_algorithm/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import print_function
import numpy as np 
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
np.random.seed(22)
from cvxopt import matrix, solvers

means = [[2, 2], [4, 2]]
cov = [[.3, .2], [.2, .3]]
N = 10
X0 = np.random.multivariate_normal(means[0], cov, N) # class 1
X1 = np.random.multivariate_normal(means[1], cov, N) # class 2

class_1 = np.concatenate( (X0, np.ones(  (X0.shape[0],1)  ) ), axis = 1)
#print(class_1)#
class_2 = np.concatenate( (X1,- np.ones(  (X0.shape[0],1)  ) ), axis = 1)
#print(class_2)

sample = np.concatenate((class_1, class_2), axis = 0)
#print(sample)

In [None]:

def data_preprocessing(dataframe):
    return np.array(dataframe)


class SVM():
    def __init__(self, dataframe):
        self.X = dataframe[:,:-1].T
        self.y = dataframe[:,-1]   #1d dimension
        self.num_sample = dataframe.shape[0]
        self.num_feature = dataframe.shape[1] - 1  #minus y axis


    def fit(self):
        
        #create V matrix 
        self.V = np.zeros((self.num_feature, self.num_sample))
        for i in range(self.num_sample):
            self.V[:,i] = self.X[:,i]*self.y[i]
        self.K = self.V.T.dot(self.V)


        #solving for lambda:
        P = matrix(self.K, tc='d') 

        q = matrix( -np.ones((self.K.shape[0],1)), tc='d')

        G = matrix( -np.eye(self.K.shape[0]), tc= 'd' ) #lambda > 0 

        h = matrix( np.zeros((self.K.shape[0],1)), tc ='d')


        # print(self.y)
        A = matrix(  np.array([self.y])  , tc = 'd')   #sum( lambda_i * y_i) = 0   #increase dimension for y to 2d
        # print(A)
        b = matrix(np.zeros((1,1)), tc ='d')

        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)

        self.lambda_ = np.array(sol['x'])
        #self.weight = self.V.dot(self.lambda_)
        #self.W = self.V.dot(lambda_)


        # TAKING SUBSET S: lambda > 0
        positive_index = np.where(self.lambda_ > 1e-6)

        #positive_index[0]: location of corresponding n_th sample
        lambda_subset_S = self.lambda_[positive_index[0],:]  #column vec
        y_subset_S = np.array([self.y])[:, positive_index[0]].T  #column vec
        X_subset_S = self.X[:, positive_index[0]]   #matrix
        V_subset_S = self.V[:, positive_index[0]]   #matrix



        self.weight = V_subset_S.dot(lambda_subset_S)
        self.bias = np.mean(y_subset_S - self.weight.T.dot(X_subset_S))


        #self.bias = np.mean( np.array([self.y]).T -  self.X.T.dot(self.weight)   )


        return self.weight, self.bias


    def predict(self, input):
        if input.shape[0] != 1:
            input = input.reshape(1,-1)
        return np.sign(self.bias + input.dot(self.weight))


In [None]:
A = np.random.rand(5,3)
X = A[:,:-1].T
print(X)
y = A[:,-1]
print(y)

B = np.zeros( (X.shape[0], X.shape[1]))
print(B)
print([y[0]]) 

for i in range(X.shape[1]):
    B[:,i] = X[:,i]*y[i]
print(B)
index = B<0.3
print(index)
B[index]

[[0.82854036 0.8584532  0.95314825 0.673016   0.24212296]
 [0.74456446 0.00551241 0.94177639 0.16411433 0.15785591]]
[0.17753704 0.35764317 0.02304901 0.56813237 0.50356282]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[0.17753703793604714]
[[0.1470966  0.30701992 0.02196913 0.38236217 0.12192412]
 [0.13218777 0.00197148 0.02170702 0.09323867 0.07949037]]
[[ True False  True False  True]
 [ True  True  True  True  True]]


array([0.1470966 , 0.02196913, 0.12192412, 0.13218777, 0.00197148,
       0.02170702, 0.09323867, 0.07949037])

In [None]:
SVM_v1 = SVM(sample)
weight, bias = SVM_v1.fit()
print(weight)
print(bias)

[[-2.00984381]
 [ 0.64068336]]
4.668560633868116


In [None]:
prediction = SVM_v1.predict(np.array([[5, -1]]))
prediction

array([[-1.]])