In [None]:
import numpy as np 

class SVM:

    def __init__(self, C = 1.0):
        self.C = C
        self.w = 0
        self.b = 0
    
    def hingeloss(self, w, b, x, y):
      reg = 0.5 * (w * w)
      for i in range(x.shape[0]):
        opt_term = y[i] * ((np.dot(w, x[i])) + b)
        loss = reg + self.C * max(0, 1-opt_term)
      return loss[0][0]
    
    def fit(self, X, Y, batch_size=100, learning_rate=0.001, epochs=1000):
        number_of_features = X.shape[1]
        print("number of features", number_of_features)
        number_of_samples = X.shape[0]
        c = self.C
        ids = np.arange(number_of_samples)
        np.random.shuffle(ids)
        w = np.zeros((1, number_of_features))
        b = 0
        losses = []
        for i in range(epochs):
            l = self.hingeloss(w, b, X, Y)
            losses.append(l)
            for batch_initial in range(0, number_of_samples, batch_size):
                gradw = 0
                gradb = 0
                for j in range(batch_initial, batch_initial + batch_size):
                    if j < number_of_samples:
                        x = ids[j]
                        ti = Y[x] * (np.dot(w, X[x].T) + b)

                        if ti > 1:
                            gradw += 0
                            gradb += 0
                        else:
                            gradw += c * Y[x] * X[x]
                            gradb += c * Y[x]
                w = w - learning_rate * w + learning_rate * gradw
                b = b + learning_rate * gradb
        
        self.w = w
        self.b = b

        return self.w, self.b, losses 

    def predict(self, X):
        prediction = np.dot(X, self.w[0]) + self.b # w.x + b
        return np.sign(prediction)

In [None]:
svm_module = SVM()

In [None]:
from google.colab import files
uploaded = files.upload()

Saving User_Data.csv to User_Data.csv


In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('User_Data.csv')

In [None]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
data['Gender'] = le.fit_transform(data['Gender'])

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,1,19,19000,0
1,15810944,1,35,20000,0
2,15668575,0,26,43000,0
3,15603246,0,27,57000,0
4,15804002,1,19,76000,0
...,...,...,...,...,...
395,15691863,0,46,41000,1
396,15706071,1,51,23000,1
397,15654296,0,50,20000,1
398,15755018,1,36,33000,0


In [None]:
data_x = data.iloc[:, 1:-1].values
data_x[0]

array([    1,    19, 19000])

In [None]:
data_x = np.asarray(data_x).astype('float32')
data_x -= np.mean(data_x)
data_x /= np.std(data_x)
data_y = data.iloc[:, -1].values
for i in range(data_y.shape[0]): 
  if data_y[i] == 0:
    data_y[i] = -1

In [None]:
svm_module.fit(data_x, data_y)

(array([[0.0661461 , 0.07713631, 0.93362108]]),
 -1.8100000000001055,
 [1.0,
  1.2051708415137128,
  1.4147265442681123,
  1.6286117358326286,
  1.8467715667506768,
  1.9856523357117235,
  2.082509651604392,
  2.0826253241258943,
  2.0957436214857528,
  2.128887257172232,
  2.1402518540629263,
  2.1564644080423734,
  2.1705934943652676,
  2.1802349315886826,
  2.1945504993607505,
  2.2088885417309707,
  2.2210033731582497,
  2.233138589758572,
  2.24529391210503,
  2.257469063465283,
  2.269663769778213,
  2.284191149665674,
  2.298738567454893,
  2.3087550056678183,
  2.3233451648079475,
  2.3333920567855686,
  2.345776020869978,
  2.3581773733570657,
  2.370595862110162,
  2.383031237456024,
  2.395483252163388,
  2.407951661421699,
  2.4204362228200154,
  2.432936696326088,
  2.4454528442656205,
  2.4579844313016905,
  2.4705312244143505,
  2.4830929928803984,
  2.49566950825331,
  2.508260544343346,
  2.520865877197817,
  2.5334852850815217,
  2.546118548457338,
  2.561934600463577

In [None]:
pred_value = svm_module.predict(data_x[0])

In [None]:
pred_value

-1.0

In [None]:
data_y[0]

-1

In [None]:
pred_values = svm_module.predict(data_x[0:10])
print(pred_values)
data.iloc[0:10, -1]

[-1. -1. -1. -1. -1. -1. -1.  1. -1. -1.]


0   -1
1   -1
2   -1
3   -1
4   -1
5   -1
6   -1
7    1
8   -1
9   -1
Name: Purchased, dtype: int64

In [None]:
data = np.random.randn(500, 4)

In [None]:
data

array([[ 1.35305702e+00, -2.27850995e-01, -1.34218015e+00,
         1.46589103e-01],
       [ 3.43535117e-01, -4.17976227e-01,  4.54835439e-01,
         1.76302547e-01],
       [ 2.72034760e-01,  4.71673492e-02,  1.95230987e+00,
        -9.07944568e-02],
       ...,
       [ 1.66463847e+00, -6.76032418e-01,  5.04280379e-01,
         8.92697120e-04],
       [ 4.12990846e-01, -1.22480604e+00, -2.85179131e-01,
        -1.18512859e+00],
       [ 1.71092568e-01,  3.52952428e-01, -3.65781410e-01,
         2.58669442e+00]])