In [5]:
""" 
dependencies:
  - python=3.8.17
  - numpy=1.24.0
  - matplotlib=3.7.1
  - pandas=2.0.2 
"""
import os
import random
import datetime
from itertools import product 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

# import random
# random.seed(42)
# np.random.seed(42)
# np.random.RandomState(42)
# os.environ['TF_DETERMINISTIC_OPS'] = '1' 

finish_sound = "afplay /Users/mehmet/Documents/vs-code/winsquare.mp3"
# play sound when finished
# os.system(finish_sound)

In [6]:
# Read data from npy file ( already preprocessed )
X_train = np.load('dataset/numpy-arrays/X_train.npy')
X_val = np.load('dataset/numpy-arrays/X_val.npy')
X_test = np.load('dataset/numpy-arrays/X_test.npy')
y_train = np.load('dataset/numpy-arrays/y_train.npy')
y_val = np.load('dataset/numpy-arrays/y_val.npy')
y_test = np.load('dataset/numpy-arrays/y_test.npy')

# # Add Bias to X
# X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
# X_val = np.concatenate((np.ones((X_val.shape[0], 1)), X_val), axis=1)
# X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
# X_train.shape, X_val.shape, X_test.shape

# Remove one hot encoding from y
y_train = np.argmax(y_train, axis=1)
y_val = np.argmax(y_val, axis=1)
y_test = np.argmax(y_test, axis=1)

print(X_train.shape, y_train.shape,'\n', X_val.shape, y_val.shape,'\n', X_test.shape, y_test.shape)



(5120, 10859) (5120,) 
 (640, 10859) (640,) 
 (640, 10859) (640,)


In [7]:
class SVM:
    def __init__(self):
        self.weights = None
        self.bias = None

    def one_hot_encode(self, y, num_classes):
        y_one_hot = np.zeros((len(y), num_classes))
        y_one_hot[np.arange(len(y)), y] = 1
        return y_one_hot

    def hinge_loss(self, scores, correct_class):
        margins = np.maximum(0, scores - scores[correct_class] + 1)
        margins[correct_class] = 0
        loss = np.sum(margins)
        return loss

    def fit(self, X, y, learning_rate=0.01, lambda_param=0.01, num_epochs=1000):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        self.weights = np.random.rand(num_features, num_classes)
        self.bias = np.zeros(num_classes)

        y_encoded = y
        y = np.argmax(self.one_hot_encode(y, num_classes), axis=1)

        for epoch in range(num_epochs):
            scores = X.dot(self.weights) + self.bias
            correct_class_mask = (np.arange(num_samples), y)

            margins = np.maximum(0, scores - scores[correct_class_mask][:, np.newaxis] + 1)
            margins[correct_class_mask] = 0

            loss = np.sum(margins)

            grad_mask = (margins > 0).astype(float)
            grad_mask[correct_class_mask] = -np.sum(grad_mask, axis=1)

            self.weights -= learning_rate * (X.T.dot(grad_mask) / num_samples + lambda_param * self.weights)
            self.bias -= learning_rate * np.sum(grad_mask, axis=0) / num_samples
            
            # For each 100 epochs print losses and accuracy
            if epoch % 100 == 0:
                # how to calculate accuracy
                predictions = self.predict(X)
                accuracy = np.mean(predictions == y)         
                print(f'Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}')

    def predict(self, X):
        scores = X.dot(self.weights) + self.bias
        predictions = np.argmax(scores, axis=1)
        return predictions


In [10]:
model = SVM()
model.fit(X_train, y_train,
        learning_rate=0.1, lambda_param=0.01, num_epochs=1000)

# Val predictions
predictions = model.predict(X_val)
print('Val. Accuracy:',np.mean(predictions == y_val))

# Test predictions
predictions = model.predict(X_test)
print('Test Accuracy:',np.mean(predictions == y_test))


Epoch: 0, Loss: 236098.02074514746, Accuracy: 0.319140625
Epoch: 100, Loss: 67563.8051520372, Accuracy: 0.6787109375
Epoch: 200, Loss: 3699.0453832698927, Accuracy: 0.9435546875
Epoch: 300, Loss: 423.5297828278527, Accuracy: 0.98359375
Epoch: 400, Loss: 26.068458399992892, Accuracy: 0.9994140625
Epoch: 500, Loss: 0.0, Accuracy: 1.0
Epoch: 600, Loss: 0.0, Accuracy: 1.0
Epoch: 700, Loss: 1.021893126245267, Accuracy: 1.0
Epoch: 800, Loss: 0.0, Accuracy: 1.0
Epoch: 900, Loss: 758.8346585827685, Accuracy: 0.9796875
Val. Accuracy: 0.815625
Test Accuracy: 0.81875


In [23]:
X = X_train
y = y_train

# Initialize the weights (zero)

W = np.zeros((X.shape[1], 1))

num_train = X.shape[0]
delta = 1.0

# Compute for the scores
scores = X.dot(W)

# Record the score of the example's correct class
correct_class_score = scores[np.arange(num_train), y]

# Compute for the margin by getting the max between 0 and the computed expression
margins = np.maximum(0, scores - correct_class_score[:,np.newaxis] + delta)
margins[np.arange(num_train), y] = 0

# Add all the losses together
loss = np.sum(margins)
# Divide the loss all over the number of training examples
loss /= num_train
# Regularize
reg = 1e-3
loss += 0.5 * reg * np.sum(W * W)

# This mask can flag the examples in which their margin is greater than 0
X_mask = np.zeros(margins.shape)
X_mask[margins > 0] = 1

# As usual, we count the number of these examples where margin > 0
count = np.sum(X_mask,axis=1)
X_mask[np.arange(num_train),y] = -count

dW = X.T.dot(X_mask)

# Divide the gradient all over the number of training examples
dW /= num_train

# Regularize
dW += reg*W

IndexError: index 1 is out of bounds for axis 1 with size 1

In [8]:
model = SVM()
model.fit(X_train, y_train)


ValueError: shapes (10859,) and (4,) not aligned: 10859 (dim 0) != 4 (dim 0)

In [None]:
predictions = model.predict(X_test)
print("SVM classification accuracy:", model.accuracy(y_test, predictions))