In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from keras.datasets import cifar10

In [3]:
def to_numpy(data_loader):
    images = []
    labels = []
    for inputs, targets in data_loader:
        images.append(inputs.numpy())
        labels.append(targets.numpy())
    images = np.vstack(images)
    labels = np.hstack(labels)
    return images, labels

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


train_images, train_labels = to_numpy(trainloader)
train_images = train_images.reshape(-1, 3072)
test_images, test_labels = to_numpy(testloader)
test_images = test_images.reshape(-1, 3072)

Files already downloaded and verified
Files already downloaded and verified


## Neural Network without hyperparameter tuning

In [11]:
import numpy as np

class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
        self.biases_input_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
        self.biases_hidden_output = np.zeros((1, output_size))

    def custom_sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def custom_softmax(self, z):
        max_z = np.max(z, axis=1, keepdims=True)
        exp_z = np.exp(z - max_z)
        exp_sum = np.sum(exp_z, axis=1, keepdims=True)
        return exp_z / exp_sum

    def custom_forward_propagation(self, X):
        self.hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.biases_input_hidden
        self.hidden_layer_activation = self.custom_sigmoid(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_activation, self.weights_hidden_output) + self.biases_hidden_output
        self.output_layer_activation = self.custom_softmax(self.output_layer_input)
        return self.output_layer_activation

    def custom_backward_propagation(self, X, Y, learning_rate):
        m = X.shape[0]

        # Calculating gradients for the output layer
        output_error = self.output_layer_activation - Y
        hidden_layer_output_transposed = self.hidden_layer_activation.T
        d_weights_hidden_output = np.dot(hidden_layer_output_transposed, output_error) / m
        d_biases_hidden_output = np.sum(output_error, axis=0, keepdims=True) / m

        # Calculating gradients for the hidden layer
        output_error_hidden = np.dot(output_error, self.weights_hidden_output.T)
        sigmoid_derivative = self.hidden_layer_activation * (1 - self.hidden_layer_activation)
        hidden_layer_error = output_error_hidden * sigmoid_derivative
        input_data_transposed = X.T
        d_weights_input_hidden = np.dot(input_data_transposed, hidden_layer_error) / m
        d_biases_input_hidden = np.sum(hidden_layer_error, axis=0, keepdims=True) / m

        # Updating weights and biases
        self.weights_hidden_output -= learning_rate * d_weights_hidden_output
        self.biases_hidden_output -= learning_rate * d_biases_hidden_output
        self.weights_input_hidden -= learning_rate * d_weights_input_hidden
        self.biases_input_hidden -= learning_rate * d_biases_input_hidden

In [12]:

# Example usage
input_size = 32 * 32 * 3
hidden_size = 128
output_size = 10
net = TwoLayerNN(input_size, hidden_size, output_size)

learning_rate = 0.01
epochs = 30

for epoch in range(epochs):
    # Forward pass
    train_output = net.custom_forward_propagation(train_images)

    # Compute loss
    m = train_images.shape[0]
    loss = -np.sum(np.eye(output_size)[train_labels] * np.log(train_output)) / m

    # Compute training accuracy
    train_predictions = np.argmax(train_output, axis=1)
    train_accuracy = np.mean(train_predictions == train_labels) * 100

    # Backward pass
    net.custom_backward_propagation(train_images, np.eye(output_size)[train_labels], learning_rate)

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss}, Training Accuracy: {train_accuracy:.2f}%')

# Test the network
test_output = net.custom_forward_propagation(test_images)
test_predictions = np.argmax(test_output, axis=1)
test_accuracy = np.mean(test_predictions == test_labels) * 100
print(f'Test Accuracy: {test_accuracy:.2f}%')


Epoch 1/30, Loss: 2.3030829168404936, Training Accuracy: 10.00%
Epoch 2/30, Loss: 2.3029045864193063, Training Accuracy: 10.00%
Epoch 3/30, Loss: 2.30273255223312, Training Accuracy: 10.00%
Epoch 4/30, Loss: 2.3025663781042875, Training Accuracy: 10.00%
Epoch 5/30, Loss: 2.3024056579999073, Training Accuracy: 10.00%
Epoch 6/30, Loss: 2.302250013831455, Training Accuracy: 10.00%
Epoch 7/30, Loss: 2.302099093426249, Training Accuracy: 10.00%
Epoch 8/30, Loss: 2.3019525686560267, Training Accuracy: 10.00%
Epoch 9/30, Loss: 2.3018101337092314, Training Accuracy: 10.00%
Epoch 10/30, Loss: 2.301671503494984, Training Accuracy: 10.00%
Epoch 11/30, Loss: 2.301536412167784, Training Accuracy: 10.00%
Epoch 12/30, Loss: 2.3014046117630094, Training Accuracy: 10.00%
Epoch 13/30, Loss: 2.3012758709342394, Training Accuracy: 10.00%
Epoch 14/30, Loss: 2.3011499737841437, Training Accuracy: 10.00%
Epoch 15/30, Loss: 2.3010267187815328, Training Accuracy: 10.00%
Epoch 16/30, Loss: 2.3009059177577145, T

## Neural Network with L2 regularisation & Hyperparameter tuning

In [13]:
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.20, random_state=42)

In [14]:
class CustomTwoLayerNNWithRegularisation:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
        self.biases_input_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
        self.biases_hidden_output = np.zeros((1, output_size))

    def custom_sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def custom_softmax(self, z):
        max_z = np.max(z, axis=1, keepdims=True)
        exp_z = np.exp(z - max_z)
        exp_sum = np.sum(exp_z, axis=1, keepdims=True)
        return exp_z / exp_sum

    def custom_forward_propagation(self, X):
        self.hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.biases_input_hidden
        self.hidden_layer_activation = self.custom_sigmoid(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_activation, self.weights_hidden_output) + self.biases_hidden_output
        self.output_layer_activation = self.custom_softmax(self.output_layer_input)
        return self.output_layer_activation

    def custom_backward_propagation_regularisation(self, X, Y, learning_rate, regularization_strength):
        m = X.shape[0]

        # Calculating gradients for the output layer
        output_error = self.output_layer_activation - Y
        hidden_layer_output_transposed = self.hidden_layer_activation.T
        d_weights_hidden_output = (np.dot(hidden_layer_output_transposed, output_error) + regularization_strength * self.weights_hidden_output) / m
        d_biases_hidden_output = np.sum(output_error, axis=0, keepdims=True) / m

        # Calculating gradients for the hidden layer
        output_error_hidden = np.dot(output_error, self.weights_hidden_output.T)
        sigmoid_derivative = self.hidden_layer_activation * (1 - self.hidden_layer_activation)
        hidden_layer_error = output_error_hidden * sigmoid_derivative
        input_data_transposed = X.T
        d_weights_input_hidden = (np.dot(input_data_transposed, hidden_layer_error) + regularization_strength * self.weights_input_hidden) / m
        d_biases_input_hidden = np.sum(hidden_layer_error, axis=0, keepdims=True) / m

        # Updating weights and biases
        self.weights_hidden_output -= learning_rate * d_weights_hidden_output
        self.biases_hidden_output -= learning_rate * d_biases_hidden_output
        self.weights_input_hidden -= learning_rate * d_weights_input_hidden
        self.biases_input_hidden -= learning_rate * d_biases_input_hidden


In [15]:
best_net = None
best_test_accuracy = 0
best_hyperparams = {'learning_rate': None, 'regularization_strength': None}
input_size = 32 * 32 * 3
hidden_size = 128
output_size = 10
learning_rates = [0.01, 0.1, 0.2, 0.3, 0.5]
regularization_strengths = [0.0000001, 0.000001, 0.00001]

epochs = 30
patience = 5  # Number of epochs to wait before early stopping

for lr in learning_rates:
    for reg_strength in regularization_strengths:
        print(f'Training with learning rate: {lr}, regularization strength: {reg_strength}')

        # Initialize the neural network
        net = CustomTwoLayerNNWithRegularisation(input_size, hidden_size, output_size)

        # Initialize variables for early stopping
        best_val_accuracy = 0
        epochs_without_improvement = 0

        # Train the network
        for epoch in range(epochs):
            # Forward pass
            train_output = net.custom_forward_propagation(X_train)

            # Compute loss
            loss = -np.sum(np.eye(output_size)[y_train] * np.log(train_output)) / X_train.shape[0]

            # Compute training accuracy
            train_predictions = np.argmax(train_output, axis=1)
            train_accuracy = np.mean(train_predictions == y_train) * 100
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.2f}%')

            # Backward pass with L2 regularization
            net.custom_backward_propagation_regularisation(X_train, np.eye(output_size)[y_train], lr, reg_strength)

            # Calculate validation accuracy
            val_output = net.custom_forward_propagation(X_val)
            val_predictions = np.argmax(val_output, axis=1)
            val_accuracy = np.mean(val_predictions == y_val) * 100
            print(f'Validation Accuracy: {val_accuracy:.2f}%')

            # Check for early stopping
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1

            if epochs_without_improvement >= patience:
                print(f'Early stopping at epoch {epoch+1} due to no improvement in validation accuracy.')
                break

        # Calculate test accuracy after all epochs
        test_output = net.custom_forward_propagation(test_images)
        test_predictions = np.argmax(test_output, axis=1)
        test_accuracy = np.mean(test_predictions == test_labels) * 100
        print(f'Final Test Accuracy: {test_accuracy:.2f}%')

        # Update best hyperparameters and best model
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            best_hyperparams['learning_rate'] = lr
            best_hyperparams['regularization_strength'] = reg_strength
            best_net = net

print(f'Best hyperparameters: Learning Rate = {best_hyperparams["learning_rate"]}, Regularization Strength = {best_hyperparams["regularization_strength"]}, Test Accuracy = {best_test_accuracy:.2f}%')

Training with learning rate: 0.01, regularization strength: 1e-07
Epoch 1/30, Loss: 2.3057, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Epoch 2/30, Loss: 2.3054, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Epoch 3/30, Loss: 2.3051, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Epoch 4/30, Loss: 2.3049, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Epoch 5/30, Loss: 2.3046, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Epoch 6/30, Loss: 2.3044, Training Accuracy: 10.14%
Validation Accuracy: 9.44%
Early stopping at epoch 6 due to no improvement in validation accuracy.
Final Test Accuracy: 10.00%
Training with learning rate: 0.01, regularization strength: 1e-06
Epoch 1/30, Loss: 2.3034, Training Accuracy: 10.03%
Validation Accuracy: 9.86%
Epoch 2/30, Loss: 2.3032, Training Accuracy: 10.03%
Validation Accuracy: 9.86%
Epoch 3/30, Loss: 2.3031, Training Accuracy: 10.04%
Validation Accuracy: 9.86%
Epoch 4/30, Loss: 2.3029, Training Accuracy: 10.03%
Valid