# Lesson 5 - Assignment

In this assignment, you will implement a Support Vector Machine Classifier  from scratch and compare the results to existing sklearn algorithm. 

In [49]:
# import packages
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib.legend_handler import HandlerLine2D
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# make this notebook's output stable across runs
np.random.seed(0)

In [50]:
# This comes from AssemblyAI series on ML from scratch
# https://www.youtube.com/playlist?list=PLcWfeUsAys2k_xub3mHks85sBHZvg24Jd
# used this reference code to help understand and debug my implementation
class SupportVectorMachine:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000) -> None:
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None   #weights
        self.b = None   #biases

    def fit(self, X: np.ndarray, y:np.ndarray):
        n_samples, n_features = X.shape
        y_ = np.where(y <= 0, -1, 1)

        # init the weights
        self.w = np.zeros(n_features)  # Again it would better to randomly initialize the weights
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]

    def predict(self, X: np.ndarray):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

Question 1.1: Implement the cost function cost/objective function:

<img src="https://miro.medium.com/max/688/1*JAS6rUTO7TDlrv4XZSMbsA.png" alt="drawing" width="600"/>


In [51]:
# implement the cost/objective function for Support Vector Machine
# reg_strength is C in the above equation
def compute_cost(W, X, Y, regularization_strength=1000):
    # calculate hinge loss
    N = X.shape[0] # number of training examples
    distances = 1 - Y * (np.dot(W, X.T).flatten())
    distances[distances < 0] = 0  # convert anything less than 0 to 0

    hinge_loss = regularization_strength * (np.sum(distances) / N)

    # calculate cost
    cost = 0.5 * np.dot(W, W)**2 + hinge_loss
    return cost

def compute_cost2(W, X, Y, regularization_strength=1000):
    w_norm = 0.5*np.dot(W, W)**2
    offsets = Y * (np.dot(W, X.T).flatten())
    smax = np.clip(a=1 - offsets, a_min=0.0, a_max=np.inf)
    hinge_loss = regularization_strength * np.sum(smax)/len(smax)
    cost = w_norm + hinge_loss
    return cost

Question 1.2: Write a method that calculate the cost gradient:

<img src="https://miro.medium.com/max/866/1*ww3F21VMVGp2NKhm0VTesA.png" alt="drawing" width="600"/>

In [52]:
def calculate_cost_gradient(W, X_batch, Y_batch, regularization_strength=1000):

   dw = np.zeros(len(W))
   N = len(Y_batch)
   for x, y in zip(X_batch, Y_batch):
       hinge_loss = np.max((0, 1.0 - np.dot(W.T, x)))
       if hinge_loss == 0:
           dw = W
       else:
           dw = W - (regularization_strength * y * x)

   dw = dw/N  # average
   return dw

def calculate_cost_gradient2(W, X_batch, Y_batch, regularization_strength=1000):
   dw = np.zeros(W.shape[0])
   for x, y in zip(X_batch, Y_batch):
       hinge_loss = np.max((0, 1.0 - np.dot(W.T, x)*y))
       if hinge_loss == 0:
           dw += W
       else:
           dw += W - (regularization_strength * y * x)
   return dw/len(Y_batch)  # average

Question 1.3: Write a method that performs stochastic Gradient descent as follows:
- Calculate the gradient of cost function i.e. ∇J(w)
- Update the weights in the opposite direction to the gradient: w = w — ∝(∇J(w))
- Repeat until conversion or until 5000 epochs are reached

In [53]:
def sgd(data, outputs, learning_rate = 0.0001, max_epochs = 5000):
    print("Training using Stochastic Gradient Descent 2 method")
    weights = np.zeros(data.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(data, outputs)
        #for ind, x in enumerate(X):
        for x_chunk, y_chunk in zip(np.array_split(X, 100), np.array_split(Y, 100)):
            ascent = calculate_cost_gradient(weights, x_chunk, y_chunk)
            weights = weights - learning_rate * ascent
            # convergence check on 2^nth epoch
            if epoch == 2 ** nth or epoch == max_epochs - 1:
                cost = compute_cost(weights, data, outputs)
                print("Epoch is:{} and Cost is: {}".format(epoch, cost))
                # stoppage criterion
                if abs(prev_cost - cost) < cost_threshold * prev_cost:
                    return weights
                prev_cost = cost
                nth += 1
               
    return weights

def sgd2(data, outputs, learning_rate = 0.0001, max_epochs = 5000):
   print("Training using Stochastic Gradient Descent 2 method")
   weights = np.random.uniform(-1.0, 1.0, data.shape[1])
   nth = 1
   prev_cost = 0.0
   cost_threshold = 0.01  # in percent
   # stochastic gradient descent
   for epoch in range(1, max_epochs):
      X, Y = shuffle(data, outputs)
      for x_chunk, y_chunk in zip(np.array_split(X, 100), np.array_split(Y, 100)):
         ascent = calculate_cost_gradient(weights, x_chunk, y_chunk)
         weights = weights - learning_rate * ascent

      if epoch == 2 ** nth:
         cost = compute_cost(weights, X, Y)
         print(f"Epoch is:{epoch} and Cost is: {cost}")
         # stoppage criterion
         if epoch == max_epochs - 1:
            return weights
         prev_cost = cost
         nth += 1
   
   return weights

# Dataset

In [54]:
data = pd.read_csv('data_banknote_authentication.csv')

Y = data.iloc[:, -1]*2-1
X = data.iloc[:, 1:4]
X.insert(loc=len(X.columns), column='intercept', value=1)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

Question 4: Train and evaluate an SVC using the banknote_authentication data

In [55]:
# train the model
print("training started...")
W = sgd(X_train.to_numpy(), y_train.to_numpy())
print("training finished.")
print("weights are: {}".format(W))

# testing the model on test set
y_test_predicted = []
for i in range(X_test.shape[0]):
    yp = np.sign(np.dot(X_test.to_numpy()[i], W))
    y_test_predicted = np.append(y_test_predicted, yp)
print("accuracy on test dataset: {}".format(accuracy_score(y_test.to_numpy(), y_test_predicted)))

training started...
Training using Stochastic Gradient Descent 2 method
Epoch is:1 and Cost is: 760.9746869905896
Epoch is:2 and Cost is: 3002.8950286788067
Epoch is:4 and Cost is: 9351.74411788382
Epoch is:8 and Cost is: 63518.81816118541
Epoch is:16 and Cost is: 929557.8688096321
Epoch is:32 and Cost is: 19636087.966861937
Epoch is:64 and Cost is: 286094655.6427475
Epoch is:128 and Cost is: 3770759755.5630956
Epoch is:256 and Cost is: 49743003231.08576
Epoch is:512 and Cost is: 456000677754.7444
Epoch is:1024 and Cost is: 2649797901203.2236
Epoch is:2048 and Cost is: 6975087033201.291
Epoch is:4096 and Cost is: 9810148505873.498
Epoch is:4999 and Cost is: 9883564727670.102
training finished.
weights are: [-2070.40506174  -315.85191267   109.86784152  -218.20410039]
accuracy on test dataset: 0.6502732240437158


In [44]:
def accuracy(y_true, y_pred):
    '''
    Define a function to test accuracy of the model
    '''
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

# This is using the reference code from AssemblyAI series on ML from scratch
clf = SupportVectorMachine()
clf.fit(X_train.to_numpy(), y_train.to_numpy())
predictions = clf.predict(X_test)
print (type(predictions))

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

<class 'numpy.ndarray'>
Accuracy: 0.8214936247723132


[Bonus] Question 5: Train and evaluate an SKLEARN SVC model, and compare the results to your model 

In [45]:
# Train a Support Vector Machine model from Sklearn using the same data as inputs
from sklearn.svm import SVC
#clf = SVC(kernel='linear')
clf = SVC(kernel='rbf') # I tried all different kernels and rbf gave the best accuracy
clf.fit(X_train, y_train)
y_test_predicted = clf.predict(X_test)
print("accuracy on test dataset using sklearn: {}".format(accuracy_score(y_test.to_numpy(), y_test_predicted)))


accuracy on test dataset using sklearn: 0.9016393442622951


Question 6: Create a new text cell in your Notebook: Complete a 50-100 word summary (or short description of your thinking in applying this week's learning to the solution) of your experience in this assignment. Include: What was your incoming experience with this model, if any? what steps you took, what obstacles you encountered. how you link this exercise to real-world, machine learning problem-solving. (What steps were missing? What else do you need to learn?) This summary allows your instructor to know how you are doing and allot points for your effort in thinking and planning, and making connections to real-world work.