# Math 198 Homework 7

## Setup 

### Installs
Run this cell to ensure you have the required packages installed.

In [None]:
!pip install numpy
!pip install Pillow

### Imports
Run this cell to import the required libraries.

In [None]:
import numpy as np
from IPython.display import display
from PIL import Image

from loaders import MNISTLoader

### Load the Data
Run this cell to load the MNIST database.

In [None]:
train_images = MNISTLoader.train_images()
train_labels = MNISTLoader.train_labels()
input_dim = len(train_images[0]) # The dimension of our input vectors
output_dim = 10 # The number of possible labels, 0 through 9

### View the Data
Run this cell to view a random sample image from the MNIST database.

In [None]:
num = np.random.randint(len(train_images))
label = np.argmax(train_labels[num])
image = train_images[num].reshape((28, 28))

print("Label: " + str(label))
Image.fromarray(image)

## Neural Network Code
The ```MNIST_Net``` class defines the structure of the neural network we will train. You will need to fill in a few lines of the ```backward``` function, which computes the gradient of the loss function with respect to the weights for each set of weights and biases.

In [None]:
class MNIST_Net:
    
    # input_dim: The dimension of the input vectors
    # hidden_dim: The dimension of the hidden layer
    # output_dim: The dimension of the output layer (i.e. the number of classes)
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.output_dim = output_dim
        # Randomly initializes the weight layers with mean 0 and standard deviation .001, 
        # and initializes the bias layers to all zeroes.
        self.V = np.random.normal(0, 1e-3, hidden_dim * input_dim).reshape((hidden_dim, input_dim))
        self.b = np.zeros((hidden_dim, 1))
        self.W = np.random.normal(0, 1e-3, output_dim * hidden_dim).reshape((output_dim, hidden_dim))
        self.c = np.zeros((output_dim, 1))
        # Defines the ReLU function to set all negative entries in the input to 0
        self.ReLU = lambda x: np.where(x < 0, np.zeros_like(x), x)

    # Generates a vector of predictions for the input x
    def forward(self, x):
        # Apply the first layer of weights to the input
        h = (self.V @ x) + self.b
        # Apply ReLU
        h_prime = self.ReLU(h)
        # Apply the second layer of weights
        y_prime = (self.W @ h_prime) + self.c
        # Save intermediate values
        cache = (x, h, h_prime)
        return (y_prime, cache)

    # Calculates the loss for this prediction
    def loss(self, y, y_prime):
        return np.linalg.norm(y - y_prime) ** 2
    
    # Calculates the gradients for each weight and bias layer w.r.t loss
    # This function is called "backward" because it calculates gradients in reverse order.
    # This is done so all calculations are matrix-vector products rather than matrix-matrix;
    # this algorithm is known as "backpropagation".
    def backward(self, y, y_prime, cache):
        x, h, h_prime = cache
        dy_prime = 2 * (y_prime - y)  # answer to 3.1
        dW = dy_prime @ h_prime.T  # answer to 3.2
        dc = dy_prime  # answer to 3.3
        dh_prime = self.W.T @ dy_prime  # answer to 3.4
        dh = np.diag(np.where(h_prime > 0, np.ones_like(h_prime), np.zeros_like(h_prime)).flatten()) @ dh_prime  # answer to 3.5
        dV = dh @ x.T  # answer to 3.6
        db = dh  # answer to 3.7
        return (dV, db, dW, dc)

In [None]:
# Test the MNIST_Net on a random sample image to make sure everything was initialized correctly
# Note: This prediction will probably be wrong, as we have not begun to train the net yet

model = MNIST_Net(input_dim, 100, output_dim)

num = np.random.randint(len(train_images))
image = train_images[num]
label = train_labels[num]
pred, cache = model.forward(image)

print("Predicted: {}, Actual: {}, Loss: {}".format(np.argmax(pred), np.argmax(label), model.loss(label, pred)))
display(Image.fromarray(image.reshape((28, 28))))

dV, db, dW, dc = model.backward(label, pred, cache)
assert dV.shape == model.V.shape, "dV has incorrect shape"
assert db.shape == model.b.shape, "db has incorrect shape"
assert dW.shape == model.W.shape, "dW has incorrect shape"
assert dc.shape == model.c.shape, "dc has incorrect shape"

## Training Code
The ```MNIST_Trainer``` class takes in an ```MNIST_Net``` and training data, trains the net for the given number of epochs, and outputs the training accuracy after each epoch. You will need to fill in one line of the ```train``` function, which performs the weight update.

In [None]:
class MNIST_Trainer:
    def __init__(self, model, train_images, train_labels):
        self.model = model
        self.train_images = train_images
        self.train_labels = train_labels

    def train(self, learning_rate=1e-5, num_epochs=5):
        for epoch in range(num_epochs):
            num_guesses = 0
            num_correct = 0

            for x, y in zip(self.train_images, self.train_labels):
                num_guesses += 1
                y_prime, cache = self.model.forward(x)
                if np.argmax(y_prime) == np.argmax(y):
                    num_correct += 1
                dV, db, dW, dc = self.model.backward(y, y_prime, cache)
    
                # perform weight updates
                self.model.V -= (dV * learning_rate)
                self.model.b -= (db * learning_rate)
                self.model.W -= (dW * learning_rate)
                self.model.c -= (dc * learning_rate)
            print("Training accuracy {}% after {} epochs".format(round((num_correct / num_guesses) * 100, 2), epoch + 1))

    def validate(self, test_images, test_labels):
        num_guesses = 0
        num_correct = 0

        for x, y in zip(test_images, test_labels):
            num_guesses += 1
            y_prime, cache = self.model.forward(x)
            if np.argmax(y_prime) == np.argmax(y):
                num_correct += 1
        print("Test accuracy {}%".format(round((num_correct / num_guesses) * 100, 2)))

In [None]:
model = MNIST_Net(input_dim, 100, output_dim)
trainer = MNIST_Trainer(model, train_images, train_labels)
trainer.train()

In [None]:
test_images = MNISTLoader.test_images()
test_labels = MNISTLoader.test_labels()

# Moment of truth: will your trained model label test data correctly?
num = np.random.randint(len(test_images))
image = test_images[num]
label = test_labels[num]
pred, cache = model.forward(image)

print("Predicted: {}, Actual: {}, Loss: {}".format(np.argmax(pred), np.argmax(label), model.loss(label, pred)))
display(Image.fromarray(image.reshape((28, 28))))
trainer.validate(test_images, test_labels)