In [2]:
import torchvision.datasets as datasets
from PIL.ImageShow import show
from PIL.Image import Image
from typing import List, Union
import random
import pytest

In [3]:
# Constants
ALPHA = 0.01
NORMALISATION = 255
BATCH_SIZE = 16
NO_NEURONS_LAYER_0 = 784

In [4]:
# Load MNIST data set
train_data = datasets.MNIST(root="./data", train=True, download=True, transform=None)

# Get first image data and target 
train_image_zero, train_target_zero = train_data[0]
image_zero_pixels = train_image_zero.getdata()
number_pixels = len(image_zero_pixels)

# Display image
train_image_zero.show()
print(f"The first label is a {train_target_zero}.")
print(f"There are {number_pixels} pixels in the first image.")

# Transform data for later to standard list, not pytorch object
train_data = [(image, label) for image, label in train_data]

The first label is a 5.
There are 784 pixels in the first image.


In [18]:
# The code avoids libraries to build understanding  of neural networks under 
# the hood. The functions that follow in subsequent cells are used by the
# network to make predictions. 

def initialise_random_weights(m=NO_NEURONS_LAYER_0, n=number_pixels):
    """
    Initializes a matrix of random weights for a layer in a neural network 
    randomly between -0.5 and 0.5. The weights matrix will be of shape (m, n)

    Args:
        m (int): The number of neurons in the layer (default is NO_NEURONS_LAYER_0).
        n (int): The number of input features (default is number_pixels, representing 
               the number of pixels in an input image, e.g., 784 for MNIST).

    Returns:
        List[List[float]]: A matrix of shape (m, n)        
    """
    return [[random.uniform(-0.5, 0.5) for _ in range(n)] for _ in range(m)]

def w_sum(inputs: List[List[Union[float, int]]], weights: List[List[float]]) -> List[List[float]]:
    """
    Calculates a weighted sum or dot product for a batch of data. The number
    of input features needs to be same as number of weights per neuron. 

    Args:
        inputs (List[List[Union[float, int]]]): A matrix of features for each sample
        weights (List[List[float]]): Weights for each feature by number of neurons

    Returns:
        List[List[float]]: A matrix of dot product for each neuron per sample in batch
    """
    num_neurons = len(weights)
    batch_size = len(inputs)
    num_features = len(inputs[0])

    outputs = [[0] * batch_size for _ in range(num_neurons)]

    for i in range(num_neurons):
        for j in range(batch_size):
            outputs[i][j] = sum(inputs[j][k] * weights[i][k] for k in range(num_features))
    return outputs

In [19]:
# Tests for above functions
def test_initialise_random_weights():
    weights = initialise_random_weights()
    assert len(weights) == NO_NEURONS_LAYER_0
    assert len(weights[0]) == number_pixels
    
def test_w_sum():
    inputs = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
    weights = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
    outputs = w_sum(inputs, weights)
    expected_outputs = [[1.4, 3.2], [3.2, 7.7]]
    
    for i in range(len(outputs)):
        for j in range(len(outputs[i])):
            assert outputs[i][j] == pytest.approx(expected_outputs[i][j], rel=1e-9), f"Mismatch at {i}, {j}"

# Run tests
test_initialise_random_weights()
test_w_sum()

In [11]:
def calculate_deltas(predicted_labels: List[float], labels: List[Union[float, int]]) -> List[float]:
    """
    Calculates absolute deviation of predicted from actual labels. The number of predicted and 
    actual label needs to be the same. 
        
    Args:
        predicted_labels (List[Union[float, int]]): A list of predicted labels per sample in batch
        labels (List[float]): A list of actual labels per sample in batch 

    Returns:
        deltas (List[float]): A List of absolute deviation between predicted and actual labels

    """
    batch_size = len(predicted_labels)

    if len(labels) != batch_size:
        raise ValueError("Vectors need to be of same len")

    deltas = [0] * batch_size

    for i in range(batch_size):
        deltas[i] = predicted_labels[i] - labels[i]

    return deltas

def calculate_errors(deltas: List[float]) -> List[float]:
    """
    Calculates the squared error for each delta in the batch.
        
    Args:
        deltas (List[float]): A list of differences between predicted and actual labels for each sample in the batch.
    
    Returns:
        errors (List[float]): A list of squared errors for each sample, representing the square of the delta values.
    """
    batch_size = len(deltas)
    errors = [0] * batch_size

    for i in range(batch_size):
        errors[i] = deltas[i] ** 2

    return errors
    
def calculate_weight_deltas(inputs: List[List[Union[float, int]]], deltas: List[float]) -> List[List[float]]:
    """
    Calculates weight deltas for each feature in each sample of the batch, using the product of
    the feature value and the delta for that sample. This forms the basis for adjusting weights
    during backpropagation by providing the per-feature correction needed for each sample.
        
    Args:
        inputs (List[List[Union[float, int]]]): A matrix of input features where each row 
            represents a sample in the batch and each column represents a feature.
        deltas (List[float]): A list of deltas for each sample in the batch, representing the deviation 
            of predictions from actual values.
    
    Returns:
        weight_deltas (List[List[float]]): A matrix where each row contains the weight deltas 
            for a sample in the batch, with each element in the row representing the delta for a feature.
    """
    batch_size = len(inputs)
    num_features = len(inputs[0])

    weight_deltas = [[0] * num_features for _ in range(batch_size)]

    for i in range(batch_size):
        for j in range(num_features):
            weight_deltas[i][j] = inputs[i][j] * deltas[i]
    
    return weight_deltas

In [14]:
# Set up
def test_calculate_deltas():
    assert calculate_deltas([1, 2, 3], [5, 5, 5]) == [-4, -3, -2]

def test_calculate_errors():
    assert calculate_errors([-4, -3, -2]) == [16, 9, 4]

def test_calculate_weight_deltas():
    result = calculate_weight_deltas([[1, 2, 3], [4, 5, 6]], [-4, -3])
    expected = [[-4, -8, -12], [-12, -15, -18]]
    assert  result == expected

# Run tests
test_calculate_deltas()
test_calculate_errors()
test_calculate_weight_deltas()

[[-4, -8, -12], [-12, -15, -18]]


In [33]:
def forward_propagation(inputs, weights, targets):
    preds = w_sum(inputs, weights) 
    deltas = [calculate_deltas(preds_per_neuron, targets) for preds_per_neuron in preds] 
    errors = [calculate_errors(deltas_per_neuron) for deltas_per_neuron in deltas]
    weight_deltas = [calculate_weight_deltas(inputs, deltas_per_neuron) for deltas_per_neuron in deltas]
    return preds, errors, weight_deltas

def back_propagation(weights: List[List[float]], weight_deltas: List[List[List[float]]]) -> None:
    num_neurons = len(weights)
    num_weights = len(weights[0])
    batch_size = len(weight_deltas[0])
    avg_deltas = []
    
    for i in range(num_neurons):
        avg_deltas_for_neuron = [0] * num_weights

        for j in range(num_weights):
            for k in range(batch_size):
                avg_deltas_for_neuron[j] += weight_deltas[i][k][j] / batch_size

        avg_deltas.append(avg_deltas_for_neuron)

    updated_weights = [[weights[i][j] - ALPHA * avg_deltas[i][j] for j in range(num_weights)] for i in range(num_neurons)]
    
    return updated_weights

In [36]:
# Set up
def test_forward_propagation():
    inputs = [[1, 2, 3], [4, 5, 3]]
    weights = [[1, 2, 2], [2, 1, 1], [1, 2, 1]]
    targets = [2, 4]
    preds, errors, weight_detltas = forward_propagation(inputs, weights, targets)
    expected_preds = [[11, 20], [7, 16], [8, 17]]
    expected_errors = [[81, 256], [25, 144], [36, 169]]
    expected_weight_deltas = [[[9, 18, 27], [64, 80, 48]], [[5, 10, 15], [48, 60, 36]], [[6, 12, 18], [52, 65, 39]]]
    assert preds == expected_preds
    assert errors == expected_errors
    assert weight_detltas == expected_weight_deltas

def test_back_propagation():
    weights = [[1, 2, 2], [2, 1, 1], [1, 2, 1]]
    weight_deltas = [[[9, 18, 27], [64, 80, 48]], [[5, 10, 15], [48, 60, 36]], [[6, 12, 18], [52, 65, 39]]]
    result = back_propagation(weights, weight_deltas)
    expected = [[0.635, 1.51, 1.625], [1.7349999999999999, 0.6499999999999999, 0.745], [0.71, 1.615, 0.715]]
    assert result == expected

# Run tests
test_forward_propagation()
test_back_propagation()

In [47]:
def train(data):
    weights = [[random.uniform(-0.5, 0.5) for _ in range(number_pixels)] for _ in range(NO_NEURONS_LAYER_0)]
    
    for i in range(0, len(train_data), BATCH_SIZE):
        batch = train_data[i: i + BATCH_SIZE]
        input_data = [[pixel / NORMALISATION for pixel in list(x[0].getdata())] for x in batch]
        targets = [x[1] for x in batch] 

        preds, errors, weight_deltas  = forward_propagation(input_data, weights, targets)
        weights = back_propagation(weights, weight_deltas)
        
        batch_mse = sum(sum(error for error in neuron_errors) / len(neuron_errors) for neuron_errors in errors) / NO_NEURONS_LAYER_0

        if i % (100 * BATCH_SIZE) == 0: 
            print(f"Batch {(i / BATCH_SIZE) + 1} with Avg MSE: {batch_mse}")

    return weights

In [48]:
weights = train(train_data)


Batch 1.0 with Avg MSE: 24.172855841331703


KeyboardInterrupt: 