In [74]:
import numpy as np
import pandas as pd
# This to_categorical function is *only* used for the one-hot-encoding of labels.
from tensorflow.keras.utils import to_categorical

# Import all datasets.
mnist_data = pd.read_csv("assets/training60000.csv", header=None)
mnist_labels = pd.read_csv("assets/training60000_labels.csv", header=None)
mnist_testing_data = pd.read_csv("assets/testing10000.csv", header=None)
mnist_testing_labels = pd.read_csv("assets/testing10000_labels.csv", header=None).to_numpy()

In [107]:
def logistic(z):
    """
    The logistic activation function.
    """
    return 1.0 / (1.0 + np.exp(-z))


def delta_logistic(z):
    """
    Derivative of the logistic function used for computing deltas.
    """
    return logistic(z) * (1 - logistic(z))


def softmax(z):
    """
    Softmax function used for converting inputs into values that sum to 1.
    Used as the activation function for the output layer.
    """
    return np.exp(z) / sum(np.exp(z))


def encoder(labels):
    """
    Returns one-hot-encoded label vectors using the keras.to_categorical function.
    """
    return to_categorical(labels)

def forward_pass(input_layer, Wh, Wo, input_bias):
    """
    Forward pass portion of the backpropagation algorithm.
    The matrix multiplications propagate the activations for the mini
    batch forward through the each layer of the network.
    """
    # Z's are obtained by matrix multiplication
    # of the layer's weights with the activations from the preceding layer.
    zh = np.dot(Wh, input_layer)

    # Activation function is applied to each element of the previous result. This
    # generates the activations for each neuron in the layer for each example in the batch.
    hidden_activations = logistic(zh)

    # Repeat for each layer:
    hidden_layer = hidden_activations
    # Concatenate bias vector to the top of the hidden_layer array
    hidden_layer = np.vstack([input_bias, hidden_layer])
    zo = np.dot(Wo, hidden_layer)
    output_activations = softmax(zo)
    return output_activations, hidden_activations, hidden_layer

In [123]:
def dnn():
    """
    Neural network function that generates a predictive model
    for recognizing hand-written numerical digits from the
    MNIST dataset.
    """
    # Create mini batches for both training and testing data
    batch_size = 100
    batches = np.array_split(mnist_data, 60000/batch_size)
    label_batches = np.array_split(mnist_labels, 60000/batch_size)
    test_batches = np.array_split(mnist_testing_data, 10000/batch_size)
    test_labels = np.array_split(mnist_testing_labels, 10000/batch_size)
    # Initialize network parameters
    i_neurons = 784
    h_neurons = 120
    o_neurons = 10
    num_epochs = 40
    learning_rate = 0.001

    # Initialize the weight matrix for each layer
    Wh = np.random.uniform(-0.5, 0.5, size=(h_neurons, i_neurons+1))
    Wo = np.random.uniform(-0.5, 0.5, size=(o_neurons, h_neurons+1))

    for epoch in range(num_epochs):
        for batch, labels in zip(batches, label_batches):
            # Forward pass
            # Transpose input layer and concatenate a vector of bias values (ones).
            input_layer = batch.transpose()
            input_bias = [1] * batch_size
            input_layer = np.vstack([input_bias, input_layer])
            
            # Obtain output layer activations, hidden layer activations, and hidden layer activations
            # with the bias vector concatenated.
            output_activations, activations, hidden_layer = forward_pass(input_layer, Wh, Wo, input_bias)
            # End forward pass
            
            # Begin backward pass
            
            # Create an array of one-hot-encoded vectors representing each label
            enc_labels = encoder(labels)
            enc_labels = enc_labels.transpose()

            # FOR EACH NEURON IN OUTPUT LAYER
            # Calculate deltas for neurons in output layer using SOFTMAX
            delta_outputs = output_activations - enc_labels

            # Calculate deltas for neurons in HIDDEN LAYER
            # dk = logistic_derivative(z) x (sum_weights x delta_i)
            delta_activations = delta_logistic(activations)
            # Concatenate bias values
            delta_activations = np.vstack([input_bias, delta_activations])
            # Weight^T x Deltas
            weight_t_deltas = np.dot(Wo.transpose(), delta_outputs)
            # Multiply these two results
            delta_hidden = np.multiply(weight_t_deltas, delta_activations)
            
            # Remove bias row for matrix multiplications in backprop
            delta_hidden_bias_deleted = np.delete(delta_hidden, 1, 0)


            # DELTA_weight = weight + delta & activation
            # Dot product of deltas and hidden layer activations
            output_delta_weights = np.dot(hidden_layer, delta_outputs.transpose())
            # Transpose after matrix multiplications
            output_delta_weights = output_delta_weights.transpose()
            # Hidden delta weights = dot product of input layer activations and hidden layer deltas
            hidden_delta_weights = np.dot(input_layer, delta_hidden_bias_deleted.transpose())
            # Transpose after matrix multiplications
            hidden_delta_weights = hidden_delta_weights.transpose()

            # FOR EACH WEIGHT IN NETWORK
            # Update the weights after summing error gradients over a complete pass
            # Weight = weight - learning_rate * delta_weight
            Wh = Wh - learning_rate * hidden_delta_weights
            Wo = Wo - learning_rate * output_delta_weights

    print("Done training model.\n")
    
    print("Testing model...\n")

    # Establish counter for keeping track of how many are correct
    count_correct = 0
    # Iterate through batches of test data and test labels
    for batch, label in zip(test_batches, test_labels):
        # Prepare input layer by transposing and concatenating bias vector
        input_layer = batch.transpose()
        input_bias = [1] * batch_size
        input_layer = np.vstack([input_bias, input_layer])
        
        # Forward pass of backpropagation algorithm. throw_1 and throw_2 are not necessary for testing.
        output, throw_1, throw_2 = forward_pass(input_layer, Wh, Wo, input_bias)

        # Iterate through the test batch and compare against labels.
        for i in range(len(test_labels)):
            # Use np.argmax to convert one-hot-encoded NN outputs to an integer prediction
            prediction = np.argmax(output.transpose()[i])
            # Gather label using iterator
            lbl = label[i]
            # Compare and mark correct if equal.
            if (prediction == lbl):
                count_correct += 1
    
    print("Results:\n")
    print("Network Properties:")
    print("Input Neurons:", i_neurons)
    print("Hidden Neurons:", h_neurons)
    print("Output Neurons:", o_neurons)
    print("Number of Hidden Layers: 1")
    print("Number of Epochs:", num_epochs)
    print("Batch Size:", batch_size)
    print("Learning Rate:", learning_rate)
    print()
    print("Correct Classification:", count_correct)
    print("Incorrect Classification:", 10000-count_correct)
    print("Percent correct:", 100*(count_correct/10000), "%")
                

In [124]:
dnn()

Done training model.

Testing model...

Results:

Network Properties:
Input Neurons: 784
Hidden Neurons: 120
Output Neurons: 10
Number of Hidden Layers: 1
Number of Epochs: 40
Batch Size: 100
Learning Rate: 0.001

Correct Classification: 9288
Incorrect Classification: 712
Percent correct: 92.88 %
