In [None]:
# Importing necessary libraries and modules

import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import pandas as pd
import numpy as np
from scipy.spatial import distance

import torch
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.models import (
    resnet34,
    ResNet34_Weights,
    resnet18,
    ResNet18_Weights,
    vgg11,
    VGG11_Weights,
)

from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from tqdm.notebook import tqdm
import seaborn as sb

from copy import deepcopy

# Set seaborn theme
sb.set_theme()

In [None]:
# Check if CUDA is available and use it if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

### Load Data

In [None]:
# Define a series of transformations to apply to an image.

transform = transforms.Compose(
    [
        transforms.Resize(size=256),  # Resize the image to 256x256 pixels.
        transforms.CenterCrop(
            size=224
        ),  # Crop the center of the image to 224x224 pixels.
        transforms.ToTensor(),  # Convert the image to a tensor.
        transforms.Normalize(
            [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
        ),  # Normalize the image with mean and standard deviation.
    ]
)

In [None]:
# Load the training data
train_data = datasets.CIFAR10("data", train=True, download=True, transform=transform)

# Load the test data
test_data = datasets.CIFAR10("data", train=False, download=True, transform=transform)

In [None]:
# Split the training data into training and validation subsets
train_subset, val_subset = torch.utils.data.random_split(
    train_data, [0.85, 0.15], generator=torch.Generator().manual_seed(1)
)

### Dense (Fully Connected) Layer

In [None]:
class Dense:
    def __init__(self, n_inputs, n_neurons):
        self.n_inputs = n_inputs
        self.n_neurons = n_neurons

        # Initialize the weights with random values based on normal distribution with He heuristic
        self.weights = (
            torch.randn(n_inputs, n_neurons) * torch.sqrt(torch.tensor(2.0 / n_inputs))
        ).to(device)
        # Initialize the biases with zeros
        self.biases = torch.zeros(1, n_neurons).to(device)

    def forward(self, inputs):
        self.inputs = inputs
        self.output = torch.matmul(self.inputs, self.weights) + self.biases

    def backward(self, b_input):
        self.weights_gradient = torch.matmul(self.inputs.T, b_input)
        self.biases_gradient = torch.sum(b_input, dim=0, keepdims=True)
        self.inputs_gradient = torch.matmul(b_input, self.weights.T)

        self.b_output = self.inputs_gradient

### Activation Functions

In [None]:
class ReLU:
    def __repr__(self):
        return "ReLU"

    def forward(self, inputs):
        # Calculate the output using the ReLU activation function
        self.output = torch.maximum(torch.zeros(inputs.size()).to(device), inputs)

    def backward(self, b_input):
        inputs_gradient = b_input.clone()
        # Set the gradient to zero where the output is less than or equal to zero
        inputs_gradient[self.output <= 0] = 0
        self.b_output = inputs_gradient

In [None]:
class Sigmoid:
    def __repr__(self):
        return "Sigmoid"

    def forward(self, inputs):
        # Calculate the sigmoid function of the input tensor
        self.output = 1 / (1 + torch.exp(-inputs))

    def backward(self, b_input):
        inputs_gradient = b_input * (1 - self.output) * self.output
        self.b_output = inputs_gradient