<a href="https://colab.research.google.com/github/smishr97/38616-Neural-Networks-Deep-Learning/blob/main/ShivamMishra_NN_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 36-616: Neural Network and Deep Learning
### Homework #1: Neural Network from ***Scratch***
**Name: Shivam Mishra** <br>
**AndrewID: shivammi**

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from typing import Optional, List, Tuple, Dict
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
plt.style.use('bmh')

In [None]:
# For CUDA based devices
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# ## For MacOS based devices
# if torch.backends.mps.is_available():
#     device = torch.device("mps")
# else:
#     device = torch.device("cpu")

In [None]:
class Transform(object):
    """
    This is the base class. You do not need to change anything.
    Read the comments in this class carefully.
    """
    def __init__(self):
        """
        Initialize any parameters
        """
        pass

    def forward(self, x):
        """
        x should be passed as column vectors
        """
        pass

    def backward(self, grad_wrt_out):
        """
        Compute and save the gradients wrt the parameters for step()
        Return grad_wrt_x which will be the grad_wrt_out for previous Transform
        """
        pass

    def step(self):
        """
        Apply gradients to update the parameters
        """
        pass

    def zerograd(self):
        """
        This is used to Reset the gradients.
        Usually called before backward()
        """
        pass

In [None]:
# Define a class named ReLU, which inherits from the Transform class
class ReLU(Transform):
    def __init__(self):
        # Call the constructor of the parent class
        super(ReLU, self).__init__()

    def forward(self, x):
        """
        Forward pass of the ReLU activation function.
        
        x: input tensor with shape (indim, batch_size)
        output: output tensor with the same shape as x
        """
        # Store the input tensor in an instance variable
        self.x = x
        # Apply the ReLU activation function to the input tensor and store the result in an instance variable
        self.output = torch.max(x, torch.zeros_like(x))
        return self.output

    def backward(self, grad_wrt_out):
        """
        Backward pass of the ReLU activation function.
        
        grad_wrt_out: gradient of the loss function with respect to the output tensor
                      with shape (outdim, batch_size)
        """
        # Compute the gradient of the loss function with respect to the input tensor using the chain rule and the
        # derivative of the ReLU activation function
        return grad_wrt_out * (self.output > 0).float()

In [None]:
# Define a class named LinearMap, which inherits from the Transform class
class LinearMap(Transform):
    def __init__(self, indim, outdim, lr=0.001):
        """
        Constructor of the LinearMap class.

        indim: input dimension
        outdim: output dimension
        lr: learning rate
        """
        # Call the constructor of the parent class
        super(LinearMap, self).__init__()
        # Initialize the weight and bias tensors with random values
        self.weights = 0.01 *torch.rand((outdim, indim), dtype=torch.float64, requires_grad=True, device=device)
        self.bias = 0.01 * torch.rand((outdim, 1), dtype=torch.float64, requires_grad=True, device=device)
        # Set the learning rate
        self.lr = lr


    def forward(self, x):
        """
        Forward pass of the linear transformation.

        x: input tensor with shape (indim, batch_size)
        output: output tensor with shape (outdim, batch_size)
        """
        # Store the input tensor in an instance variable
        self.input = x
        # Apply the linear transformation to the input tensor and return the result
        return torch.add(torch.matmul(self.weights, x), self.bias)


    def backward(self, grad_wrt_out):
        """
        Backward pass of the linear transformation.

        grad_wrt_out: gradient of the loss function with respect to the output tensor
                      with shape (outdim, batch_size)
        grad_wrt_input: gradient of the loss function with respect to the input tensor
                        with shape (indim, batch_size)
        """
        # Compute the gradient of the loss function with respect to the weights and bias tensors using the chain rule
        # and the gradients of the loss function with respect to the output tensor
        grad_wrt_weights = torch.matmul(grad_wrt_out, self.input.t())
        self.bias.grad = torch.sum(grad_wrt_out, dim=1, keepdim=True)
        # Compute the gradient of the loss function with respect to the input tensor using the chain rule and the
        # gradient of the loss function with respect to the output tensor
        grad_wrt_input = torch.matmul(self.weights.t(), grad_wrt_out)
        self.weights.grad = grad_wrt_weights
        
        return grad_wrt_input


    def step(self):
        """
        Update the weights and bias tensors using gradient descent.
        """
        with torch.no_grad():
            self.weights -= self.lr * self.weights.grad
            self.bias -= self.lr * self.bias.grad
        
    def zerograd(self):
        """
        Reset the gradients of the weights and bias tensors to zero.
        """
        if self.weights.grad is not None:
            self.weights.grad.zero_()
        if self.bias.grad is not None:
            self.bias.grad.zero_()

In [None]:
class SoftmaxCrossEntropyLoss(object):
    def __init__(self):
        # Initialize the number of classes and batch size
        self.num_classes, self.batch_size = None, None
        
        
    def forward(self, logits, labels):
        """
        logits are pre-softmax scores, labels are one-hot labels of given inputs
        logits and labels are in the shape of (num_classes, batch_size)
        returns loss as a scalar (i.e. mean value of the batch_size loss)
        """
        # Save the logits and labels as class variables
        self.logits = logits
        self.labels = labels

        # Get the batch size
        self.batch_size = logits.shape[1]
        
        # Calculate softmax probabilities
        logits_exp = torch.exp(logits)
        self.probs = logits_exp / torch.sum(logits_exp, dim=0, keepdim=True)

        # Calculate cross-entropy loss for each sample
        log_probs = -torch.log(self.probs[labels.argmax(dim=1), range(self.batch_size)])
        loss = torch.mean(log_probs)
        
        self.log_probs = log_probs
        return loss


    def backward(self):
        """
        return grad_wrt_logits shape (num_classes, batch_size)
        (don't forget to divide by batch_size because your loss is a mean)
        """
        # Create a copy of the probability tensor
        grad_wrt_logits = self.probs.clone()

        # Compute the gradient of the loss with respect to the logits
        grad_wrt_logits[self.labels.argmax(dim=1), range(self.batch_size)] -= 1
        grad_wrt_logits /= self.batch_size

        # Return the gradient of the loss with respect to the logits
        return grad_wrt_logits
        
    
    def getAccu(self):
        """
        return accuracy here
        """
        # Compute the accuracy of the model
        probs_labels = torch.argmax(self.probs, dim=0) == torch.argmax(self.labels, dim=1)
        accuracy = probs_labels.sum().item() / self.labels.shape[0]
        
        return accuracy

In [None]:
class SingleLayerMLP(Transform):
    """
    Constructs a single layer neural network with the previous functions.
    """

    def __init__(self, indim, outdim, hidden_layer=100, lr=0.001):
        super(SingleLayerMLP, self).__init__()

        # Create a LinearMap object for the first layer with input dimension indim, output dimension hidden_layer and learning rate lr.
        self.linear_map = LinearMap(indim, hidden_layer, lr=lr)

        # Create a ReLU activation function object.
        self.relu = ReLU()

        # Create a LinearMap object for the second layer with input dimension hidden_layer, output dimension hidden_layer and learning rate lr.
        self.linear_map_2 = LinearMap(hidden_layer, hidden_layer, lr=lr)

        # # Create a second ReLU activation function object.
        # self.relu2 = ReLU()

        # # Create a LinearMap object for the output layer with input dimension hidden_layer, output dimension outdim and learning rate lr.
        # self.linear_map_3 = LinearMap(hidden_layer, outdim, lr=lr)

    def forward(self, x):
        """
        Performs forward pass for the single layer neural network.

        x: Input tensor of shape (indim, batch_size)
        out: Pre-softmax logits tensor of shape (outdim, batch_size)
        """
        out = self.linear_map.forward(x)
        out = self.relu.forward(out)
        out = self.linear_map_2.forward(out)
        # out = self.relu2.forward(out)
        # out = self.linear_map_3.forward(out)
        return out

    def backward(self, grad_wrt_out):
        """
        Performs backward pass for the single layer neural network.

        Args:
        grad_wrt_out: Gradient of the loss with respect to the output tensor, of shape (outdim, batch_size)

        Returns:
        grad_wrt_out: Gradient of the loss with respect to the input tensor, of shape (indim, batch_size)
        """
        # grad_wrt_out = self.linear_map_3.backward(grad_wrt_out)
        # grad_wrt_out = self.relu2.backward(grad_wrt_out)
        grad_wrt_out = self.linear_map_2.backward(grad_wrt_out)
        grad_wrt_out = self.relu.backward(grad_wrt_out)
        grad_wrt_out = self.linear_map.backward(grad_wrt_out)
        return grad_wrt_out

    def step(self):
        """
        Performs a gradient descent step on the model parameters for each layer.
        """
        self.linear_map.step()
        self.relu.step()
        self.linear_map_2.step()
        # self.relu2.step()
        # self.linear_map_3.step()


In [None]:
class DS(Dataset):
    """
    A PyTorch dataset class for loading data in batches.

    X: Input tensor of shape (num_samples, num_features)
    Y: Target tensor of shape (num_samples,)
    """

    def __init__(self, X: np.ndarray, Y: np.ndarray):
        self.length = len(X)  # Total number of samples in the dataset
        self.X = X  # Input tensor
        self.Y = Y  # Target tensor

    def __getitem__(self, idx):
        """
        Returns a single sample from the dataset.

        idx: Index of the sample to retrieve

        Returns:
        A tuple containing the input tensor and target value for the sample at the given index
        """
        x = self.X[idx, :]  # Get the input tensor for the given index
        y = self.Y[idx]  # Get the target value for the given index
        return (x, y)

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return self.length

In [None]:
# Function to train and test a model
def train_test(train_loader, test_loader, model, num_epochs, loss_fn):
    
    # Initialize lists to store loss and accuracy values
    training_loss = []
    testing_loss = []
    training_accuracy = []
    test_accuracy = []
    train_acc = []
    
    # Loop through epochs
    for epoch in range(num_epochs):
        running_loss = 0.0
        
        # Loop through batches in training dataset
        for batch_idx, (data, target) in enumerate(train_loader):
            data = data.t().to(device)   # Transpose input and move to device (GPU or CPU)
            target = target.t().to(device)   # Transpose labels and move to device
            labels_onehot = torch.from_numpy(labels2onehot(target))   # Convert labels to one-hot encoding
            
            # Forward Pass 
            output = model.forward(data)   # Pass input through the model to get output
            loss = loss_fn.forward(output, labels_onehot)   # Calculate loss using cross-entropy

            # Backward Pass
            grad = loss_fn.backward()   # Calculate gradients
            model.backward(grad)   # Backpropagate the gradients through the model
            model.step()   # Update the model parameters
            
            running_loss += loss.item()   # Add loss to running loss
            train_acc.append(loss_fn.getAccu())   # Append accuracy to training accuracy list
        
        # Append average training loss and accuracy to lists
        training_loss.append(running_loss / len(train_loader))
        training_accuracy.append(np.mean(train_acc))
        train_acc = []   # Reset training accuracy list
        
        # Evaluate model on test dataset
        test_acc = []
        test_loss = 0.0
        with torch.no_grad():
            for data, target in test_loader:
                data = data.t().to(device)
                target = target.t().to(device)
                target = torch.from_numpy(labels2onehot(target))

                # Forward Pass
                output = model.forward(data)
                loss = loss_fn.forward(output, target)

                test_loss += loss.item()
                test_acc.append(loss_fn.getAccu())
        testing_loss.append(test_loss/len(test_loader))   # Append average test loss to list
        test_accuracy.append(np.mean(test_acc))   # Append average test accuracy to list
    
    # Return loss and accuracy values for both training and testing
    return training_loss, testing_loss, training_accuracy, test_accuracy

In [None]:
def labels2onehot(labels: np.ndarray):
    return np.array([[i==lab for i in range(2)] for lab in labels]).astype(int)

if __name__ == "__main__":
    """The dataset loaders were provided for you.
    You need to implement your own training process.
    You need plot the loss and accuracies during the training process and test process. 
    """

    indim = 10
    outdim = 2
    hidden_dim = 100
    lr = 0.01
    batch_size = 64
    epochs = 200

    #dataset
    Xtrain = np.loadtxt("/content/XTrain.txt", delimiter="\t")
    Ytrain = np.loadtxt("/content/yTrain.txt", delimiter="\t").astype(int)
    m1, n1 = Xtrain.shape
    print(m1, n1)
    train_ds = DS(Xtrain, Ytrain)
    train_loader = DataLoader(train_ds, batch_size=batch_size)

    Xtest = np.loadtxt("/content/XTest.txt", delimiter="\t")
    Ytest = np.loadtxt("/content/yTest.txt", delimiter="\t").astype(int)
    m2, n2 = Xtest.shape
    print(m1, n2)
    test_ds = DS(Xtest, Ytest)
    test_loader = DataLoader(test_ds, batch_size=batch_size)

    #construct the model
    model = SingleLayerMLP(indim, outdim, hidden_dim, lr)
    # defining the loss function 
    loss_fn = SoftmaxCrossEntropyLoss()
    
    #construct the training and testing process
    training_loss, testing_loss, training_acc, testing_acc = train_test(train_loader, test_loader, model, epochs, loss_fn)
    # Train the model
#     training_loss, training_acc = train(train_loader, model, epochs, loss_fn)

#     # Test the trained model
#     testing_loss, testing_acc = test(test_loader, model, epochs, loss_fn)

500 10
500 10


In [None]:
min(training_loss)

0.1431847775174466

In [None]:
max(training_loss)

4.585063581905664

In [None]:
fig = go.Figure()

# Add training and test loss traces to the figure
fig.add_trace(go.Scatter(x=list(range(len(training_loss))), y=training_loss, name='Training Loss', line=dict(color='green')))
fig.add_trace(go.Scatter(x=list(range(len(testing_loss))), y=testing_loss, name='Test Loss', line=dict(color='red')))

# Add markers at the minimum values of the training and test losses
train_min_index = training_loss.index(min(training_loss))
test_min_index = testing_loss.index(min(testing_loss))

fig.add_annotation(x=train_min_index, y=training_loss[train_min_index], text=f"Min Training Loss: {round(training_loss[train_min_index], 2)}", showarrow=True, arrowhead=1, ax=-50, ay=-50)
fig.add_annotation(x=test_min_index, y=testing_loss[test_min_index], text=f"Min Test Loss: {round(testing_loss[test_min_index], 2)}", showarrow=True, arrowhead=1, ax=-50, ay=50)

fig.update_layout(title='Training vs. Test Losses for Scratch NN Implementation', xaxis_title='Epochs', yaxis_title='Loss', height=600, width= 800)
fig.show()

In [None]:
fig = go.Figure()

# Add training and test loss traces to the figure
fig.add_trace(go.Scatter(x=list(range(len(training_acc))), y=training_acc, name='Training Loss', line=dict(color='green')))
fig.add_trace(go.Scatter(x=list(range(len(testing_acc))), y=testing_acc, name='Test Loss', line=dict(color='red')))

# Add markers at the minimum values of the training and test losses
train_min_index = len(training_acc)-1
test_min_index = len(testing_acc)-1

fig.add_annotation(x=len(training_acc)-1, y=training_acc[-1], text=f"Max Training Accuracy: {round(training_acc[-1], 2)}", showarrow=True, arrowhead=1, ax=-50, ay=-50)
fig.add_annotation(x=test_min_index, y=testing_acc[-1], text=f"Max Test Accuracy: {round(testing_acc[-1], 2)}", showarrow=True, arrowhead=1, ax=-50, ay=50)

fig.update_layout(title='Training vs. Test Accuracies for Scratch NN Implementation', xaxis_title='Epochs', yaxis_title='Loss', height=600, width= 800)
fig.show()