# Building a simple NN from scratch with numPy

Implement the missing functionalities one by one.

In [1]:
from os.path import join
import random

import numpy as np

from dreader import MnistDataloader, show_images


## Data Import (Given)

We use the MNIST dataset, it contains 28x28 images of handwritten digits, we want to classify. The training data contains 60k images, the test dataset contains 10k.

In [2]:
DATA_PATH = "../data/mnist/"

# paths
training_images_filepath = join(
    DATA_PATH, "train-images-idx3-ubyte/train-images-idx3-ubyte"
)
training_labels_filepath = join(
    DATA_PATH, "train-labels-idx1-ubyte/train-labels-idx1-ubyte"
)
test_images_filepath = join(DATA_PATH, "t10k-images-idx3-ubyte/t10k-images-idx3-ubyte")
test_labels_filepath = join(DATA_PATH, "t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte")

mnist_dataloader = MnistDataloader(
    training_images_filepath,
    training_labels_filepath,
    test_images_filepath,
    test_labels_filepath,
)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

In [None]:
images_2_show = []
titles_2_show = []
for i in range(0, 10):
    r = random.randint(1, 60000)
    images_2_show.append(x_train[r])
    titles_2_show.append("training image [" + str(r) + "] = " + str(y_train[r]))

for i in range(0, 5):
    r = random.randint(1, 10000)
    images_2_show.append(x_test[r])
    titles_2_show.append("test image [" + str(r) + "] = " + str(y_test[r]))

show_images(images_2_show, titles_2_show)

## Transform Data for Numpy

In [None]:
# TBD: convert data into numpy arrays and normalize values between to 0-1 (currently its 0-255) (x = image array, y = label)
TRAINING_SIZE = 60000  # max 60000

x_train = 
y_train = 
x_test = 
y_test = 

# transform the data into 2D arrays with 28*28=784 pixels
input_size = x_train.shape[1] * x_train.shape[2]
x_train = x_train.reshape(x_train.shape[0], input_size)
x_test = x_test.reshape(x_test.shape[0], input_size)

x_train.shape

## Setup the Network

Complete the function init_params to return the arrays, defining the tunable parameters (weights W, biases b per layer).
We will build a network with two trainable layers of size 10 each. What is the size of the input layer?

In [5]:
def init_params():
    W1 = 
    b1 = 
    W2 = 
    b2 = 
    return W1, b1, W2, b2

## The Activation Functions

The activation functions are crucial for learning more complex patterns as they break up the currently linear flow by introducing non-linearity.

Set up a relu function (takes an np.array as an input and outputs an np.array where each val is the output of the relu).
Set up a softmax function for the output layer.

In [6]:
def relu(x):
    pass


def softmax(x):
    pass

## Forward Propagation

Now we want to implement the forward pass through the network. Simply calculating the next layers values by Wx + b first, and adding the activation function next. Output the vals before each activation function (Z1, Z2) and after the relu and the softmax (A1, A2).

Using the dot product of the weights with the previous input x and adding the bias b computes the value all at once, before the activation function.

In [7]:
def forward_prop(W1, b1, W2, b2, x):
    Z1 = 
    A1 = 
    Z2 = 
    A2 = 

    return Z1, A1, Z2, A2

## Calculate the error by Backpropagation

This is the core function for learning. After the forward pass, we compare the difference between the expected result in one hot (e.g. 0 1 0... for label 2) and the output layer. This gives us dZ2, the error in layer 2 (the output layer). Calculate dW2, db2, dZ1, dW1 and db1 accordingly.

In [8]:
# helper
def one_hot(y):
    one_hot_y = np.zeros(
        (y.size, y.max() + 1)
    )  # create a matrix of y.size rows and y.max()+1 (10) columns
    one_hot_y[np.arange(y.size), y] = (
        1  # set the value of the column at the index of y to 1
    )
    one_hot_y = (
        one_hot_y.T
    )  # transpose the matrix bc we want each column to be an example

    return one_hot_y

In [9]:
def deriv_relu(x):
    pass


def back_prop(Z1, A1, Z2, A2, W1, W2, x, y):
    dZ2 = 
    dW2 = 
    db2 = 
    dZ1 = 
    dW1 = 
    db1 = 

    return dW1, db1, dW2, db2

## Update the parameters by the errors calculated through Backprop

Nudge each value by the calculated error (d..) times the learning rate and subtract this from the previous value.

In [10]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = 
    b1 = 
    W2 = 
    b2 = 

    return W1, b1, W2, b2

## Accuracy and loss function

Used to get insights into training progress.

In [11]:
def get_predictions(A2):
    pass


def get_accuracy(A2, y):
    pass


def get_loss(A2, y):
    pass

## Implement learning algorithm (gradient descent)

In [12]:
def gradient_descent(x, y, iters, alpha):
    W1, b1, W2, b2 = init_params()
    for i in range(iters):
        pass

    return W1, b1, W2, b2

# Start training

In [None]:
W1, b1, W2, b2 = gradient_descent(x_train, y_train, 1000, 0.08)

## Test the model

In [None]:
Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, x_test.T)
predictions = get_predictions(A2)

print("Test accuracy: ", get_accuracy(A2, y_test))
print("Test loss: ", get_loss(A2, y_test))
print("Predictions: ", predictions)
print("True values: ", y_test)

In [None]:
import matplotlib.pyplot as plt

classes = np.unique(y_test)
accuracy_per_class = {}

for cls in classes:
    idx = (y_test == cls)
    cls_accuracy = np.mean(predictions[idx] == y_test[idx])
    accuracy_per_class[cls] = cls_accuracy

# Plotting accuracy per digit
plt.figure(figsize=(10,6))
plt.bar(accuracy_per_class.keys(), accuracy_per_class.values(), color='skyblue')
plt.xlabel('Digit')
plt.ylabel('Accuracy')
plt.title('Accuracy per Digit')
plt.xticks(classes)
plt.ylim([0, 1])
plt.grid(axis='y', linestyle='--', linewidth=0.5)
plt.show()

## Store the model

In [None]:
STORE = True

import numpy as np

def save_model(W1, b1, W2, b2, path):
    np.savez(path, W1=W1, b1=b1, W2=W2, b2=b2)
    print(f"Model saved to {path}")

def load_model(path):
    data = np.load(path)
    W1 = data['W1']
    b1 = data['b1']
    W2 = data['W2']
    b2 = data['b2']
    print(f"Model loaded from {path}")
    return W1, b1, W2, b2

# Specify the path where you want to save the model
MODEL_PATH = DATA_PATH + 'model_parameters.npz' 

if STORE:
    # Save the model parameters
    save_model(W1, b1, W2, b2, MODEL_PATH)


In [None]:
W1_loaded, b1_loaded, W2_loaded, b2_loaded = load_model(MODEL_PATH)

# Verify that the loaded parameters are the same as the original
assert np.array_equal(W1, W1_loaded), "W1 does not match!"
assert np.array_equal(b1, b1_loaded), "b1 does not match!"
assert np.array_equal(W2, W2_loaded), "W2 does not match!"
assert np.array_equal(b2, b2_loaded), "b2 does not match!"

# Use the loaded model to perform forward propagation on test data
Z1, A1, Z2, A2 = forward_prop(W1_loaded, b1_loaded, W2_loaded, b2_loaded, x_test.T)
print("Test accuracy after loading model:", get_accuracy(A2, y_test))