In [1]:
import numpy as np
import pandas as pd

# WORKING OF MULTI LAYER SVM LOSS  WITH RELU ACTIVATION FUNC

In [2]:
class MultiLayerSVM:
    def __init__(self, num_layers, neurons_per_layer):
        self.num_layers = num_layers
        self.neurons_per_layer = neurons_per_layer
        self.weights = []
        self.biases = []
        self.LR = None
        self.num_iters = None
        self.X = None
        self.Y = None
        self.forward_logits = []
        self.lderivatives = []
    
    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        if x > 0:
            return 1
        else:
            return 0
        
    def relu_backward(self, dout, derivative):
        return dout * derivative

    def forward_pass(self,X,W,b):
        output = np.dot(X,W) + b
        ldx = W
        ldw = X
        ldb = np.ones_like(b)
        return output, ldx, ldw, ldb
    
    def affine_backward_pass(self,ldx,ldw,ldb,ud):
        dx = np.dot(ud,ldx.T)
        dw = np.dot(ud.T,ldw)
        db = np.sum(ud,axis=0)
        return dx, dw, db
    
    def SVMLoss(self, logits):
        correct_scores = np.sum(logits * self.Y, axis=1, keepdims=True)
        diff = np.maximum(0, logits - correct_scores + 1.5)  
        diff[np.arange(logits.shape[0]), np.argmax(self.Y, axis=1)] = 0
        
        loss = np.sum(diff)
        
        gradient = np.zeros_like(logits)
        gradient[diff > 0] = 1
        gradient[np.arange(logits.shape[0]), np.argmax(self.Y, axis=1)] = -np.sum(gradient, axis=1)
        gradient /= logits.shape[0]
        
        return loss, gradient

    def forward_pass(self):
        for i in range(self.num_layers):
            logits, ldx, ldw, ldb = self.forward_pass(self.X, self.weights[i], self.biases[i])
            if i != self.num_layers-1:
                logits = self.relu(logits)
                ldx = self.relu_derivative(logits)
            self.forward_logits.append(logits)
            self.lderivatives.append((ldx,ldw,ldb))
        return logits
    
    def backward_pass(self, loss_derivative):
        ud = loss_derivative
        i = self.num_layers-1
        while i > -1:
            ldx,ldw,ldb = self.lderivatives[i]
            if i != 0 and i != self.num_layers-1:
                ud = self.relu_backward(ud,ldw)
            ud, dW, db = self.affine_backward_pass(ldx,ldw,ldb,ud)
            self.weights[i] -= self.LR * dW.reshape(self.weights[i].shape)
            self.biases[i] -= self.LR * db
            i = i - 1
    
    def Train(self, X, Y, alpha=0.01, max_iterations=100, printer=10):
        self.X = X
        self.Y = Y
        self.LR = alpha
        self.num_iters = max_iterations

        num_classes = Y.shape[1]
        num_features = X.shape[1]
        
        for i in range(self.num_layers):
            if i == self.num_layers - 1:
                self.weights.append(0.01 * np.random.randn(num_features, num_classes))
            else:
                self.weights.append(0.01 * np.random.randn(num_features, self.neurons_per_layer[i]))
            self.biases.append(np.zeros((1, self.neurons_per_layer[i])))
            num_features = self.neurons_per_layer[i]

        print("Initial Weights =", self.weights)
        print("Initial Biases =", self.biases)

        for i in range(self.num_iters):
            logits = self.forward_pass()
            loss, ud = self.SVMLoss(logits)
            self.backward_pass(ud)
            if i % printer == 0:
                print("Iteration {}: Loss {}".format(i, loss))


Dataset Loading

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelBinarizer
import numpy as np

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Normalize the features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Convert labels to one-hot encoded vectors
encoder = LabelBinarizer()
y_one_hot = encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_one_hot, test_size=0.2, random_state=42)


num_layers = 1
num_classes = y_train.shape[1]  
neurons_per_layer = [num_classes]
model = class MultiLayerSVM(num_layers, neurons_per_layer)

alpha = 0.01
max_iterations = 1000
printer = 100

# Train the model
model.Train(X_train, y_train, alpha, max_iterations, printer)


Initial Weights = [array([[ 0.00315003, -0.00746229,  0.00298828],
       [ 0.00661808, -0.01312527,  0.00067078],
       [-0.01306129, -0.0049745 , -0.02721426],
       [-0.00309587,  0.01137068, -0.00050199]])]
Initial Biases = [array([[0., 0., 0.]])]
Iteration 0: Loss 359.1599899850129
Iteration 100: Loss 270.2401529965622
Iteration 200: Loss 264.45365357166213
Iteration 300: Loss 262.3877517536679
Iteration 400: Loss 254.07195151527853
Iteration 500: Loss 243.1249106483762
Iteration 600: Loss 226.47654371026212
Iteration 700: Loss 207.58190050330668
Iteration 800: Loss 190.08942241763924
Iteration 900: Loss 172.8547695382083
