# Neural Network on MNIST Dataset

This script implements training and testing a neural network on the MNIST dataset for the final report of \
**Neele Elbersgerd (1496121)** in the course \
Real and Artificial Neural Networks, University of Melbourne. \
The code is in partly taken and adapted from *Make Your Own Neural Network* (c) Tariq Rashid, 2016, license is GPLv2

In [123]:
#---import packages
import numpy as np
import scipy
import time
import pickle # for storing results easily
import glob # for searching folders
import os # for soft paths
from os.path import join as opj
from groo.groo import get_root

#---set path
# I use the groo package for setting up a root folder 
# and have relatives paths from there
# if you want to run this as a guest, put root = "your/path"
root = get_root(".neuralroot")

## Define ANN class
Define a class that initialises neural network objects; adapted from Rashid, 2016

In [124]:
class neuralnet:
    def __init__(self, inodes: int, hnodes: int, onodes: int, lr: float) -> None:
        self.inodes = inodes
        self.hnodes = hnodes
        self.onodes = onodes
        self.lr = lr

        # initialise weights with normally distributed randoms
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes,self.inodes))
        self.who = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes,self.hnodes))

        # sigmoid as activation function
        self.activationf = lambda x: scipy.special.expit(x)
    
    def __str__(self): #descriptor method
        desc = f"This neural network has {self.inodes} input, {self.hnodes} hidden, and {self.onodes} output nodes. "
        desc = desc + f"The learning rate is: {self.lr}. "
        desc = desc + f"The activation function used is sigmoid with random normally distributed weights."
        return desc
    
    def train(self, inputs: list, targets: list):
        # convert inputs list to 2d array
        inputarr = np.array(inputs, ndmin=2).T
        targetarr = np.array(targets, ndmin=2).T

        # calculate signals
        hidden_in = np.dot(self.wih, inputarr) #into hidden layer 
        hidden_out = self.activationf(hidden_in) #from hidden layer
        final_in = np.dot(self.who, hidden_out) #into final output layer
        final_out = self.activationf(final_in) #from final output layer

        # output layer error is (target - actual)
        output_err = targetarr - final_out
        # hidden layer error is output_err, split by weights, recombined at hidden nodes
        hidden_err = np.dot(self.who.T, output_err) 

        # update the weights for the links between the hidden and output layers
        self.who += self.lr * np.dot((output_err * final_out * (1.0 - final_out)), np.transpose(hidden_out))
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * np.dot((hidden_err * hidden_out * (1.0 - hidden_out)), np.transpose(inputarr))

    def query(self, inputs: list):
        # convert inputs list to 2d array
        inputs = np.array(inputs, ndmin=2).T

        # calculate signals 
        hidden_in = np.dot(self.wih, inputs) #into hidden layer
        hidden_out = self.activationf(hidden_in) #from hidden layer
        final_in = np.dot(self.who, hidden_out) #into final output layer
        final_out = self.activationf(final_in) #from final output layer
        return final_out
    

## Define functions for training and testing

In [125]:
def train_mnist(ann, epochs: int):
    """Implement training of a neuralnet object based on the original MNIST dataset.

    Args:
        ann (neuralnet): object from neuralnet class
        epochs (int): number of epochs to run training on
    """    
    # load the augmented mnist training data CSV file into a list
    train_file = open(opj(root, "project", "data", "mnist_train.csv"), 'r')
    train_data = train_file.readlines()
    train_file.close()

    for e in range(epochs):
        # go through all records in the training data set
        for record in train_data:
            all_values = record.split(',')  #split the record
            inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 #scale inputs
            # create the target output values (all 0.01, except the desired label which is 0.99)
            targets = np.zeros(ann.onodes) + 0.01
            targets[int(float(all_values[0]))] = 0.99
            ann.train(inputs, targets)
        print(f"-- Epoch {e+1} of {epochs} done --")
    print("-- Done with training --")

In [126]:
def test_mnist(ann, filename: str):
    """Implement testing of a neuralnet object based on the augmented MNIST dataset.

    Args:
        ann (neuralnet): object from neuralnet class
        filename (str): filename of the dataset to use in testing

    Returns:
        ytrue (list): true labels of test set
        ypred (list): predicted labels of test set
        confmat (numpy array): confusion matrix
        confidence (numpy array): matrix of average confidence values
    """    
    # load the original mnist test data into a list
    test_file = open(opj(root, "project", "data", filename+".csv"), 'r')
    test_data = test_file.readlines()
    test_file.close()
    
    # initialise performance measures:
    ytrue = [] # true labels
    ypred = [] # prediction of ann
    confmat = np.zeros((ann.onodes,ann.onodes))  # confusion matrix
    confidence = np.zeros((ann.onodes,ann.onodes)) # confidence matrix
    
    # go through all the records in the test data set
    for record in test_data:
        all_values = record.split(',') # split the record by commas
        correct_label = int(all_values[0]) # correct label
        ytrue.append(correct_label) # add correct label
        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 # scale the inputs
        outputs = ann.query(inputs) #query the network
        guess = np.argmax(outputs) #index of highest value is the guess
        ypred.append(guess) #add network's guess
        confmat[correct_label][guess] += 1 #add count to cell of confusion matrix
        # add output vector (confidence) to the confidence matrix
        confidence[correct_label:(correct_label+1)] = confidence[correct_label:(correct_label+1)] + outputs.T
    
    #row-wise sum (how many images of each digit in test data)
    count = np.sum(confmat, axis=1) 
    #average confidence matrix to show the ann's average confidence for each cell
    confidence = confidence / count
    return ytrue, ypred, confmat, confidence

**Define function for performance score**

In [127]:
def perform_mnist(ytrue: list, ypred: list):
    """assessing neural network performance by generating a performance score.

    Args:
        ytrue (list): true labels of test set
        ypred (list): predicted labels of test set

    Returns:
        performance (float)
    """    
    score = [1 if i==j else 0 for i,j in zip(ytrue, ypred)]
    score = np.asarray(score)
    performance = score.sum() / score.size
    print(f"-- The performance is {performance} --")
    return performance

## Train the network
In the following cell, a neural network will be trained on the original MNIST data set. \
The hyperparameters, as well as training time, will be saved to the dictionary *res*.

In [128]:
#---initialise result dictionary to store model parameters and results
res = {}
res["hidden_nodes"] = 200
res["learning_rate"] = 0.1
res["epochs"] = 1

#---instance of neural network
ann = neuralnet(inodes=784, hnodes=res["hidden_nodes"], onodes=10, lr=res["learning_rate"])
print(ann)

#---train ann
start = time.time() # start time
train_mnist(ann, epochs = res["epochs"])
time_train = time.time() - start # end time
res["train_time"] = time_train

This neural network has 784 input, 200 hidden, and 10 output nodes. The learning rate is: 0.1. The activation function used is sigmoid with random normally distributed weights.


-- Epoch 1 of 1 done --
-- Done with training --


## Test the network
The next part implements the testing of the network with different (augmented) test datasets. \
I use the package pickle to save python objects (the dictionary *res*) to a file to be able to read them in later as the same data type. 

In [129]:
#---get all augmented testfile names in the folder
files = opj(root, "project", "data", f"augdata*.csv")
files = list(map(os.path.basename, glob.glob(files)))
files = [i[:-4] for i in files if "META" not in i]

In [131]:
#---loop to get results for all augmented test sets
for f in files:
    res["testfile"] = f
    print(f"-- Testing the ANN on {f} --")

    #---test
    start = time.time() # start time
    ytrue, ypred, confmat, confidence = test_mnist(ann, filename = res["testfile"])
    time_test = time.time()-start # end time
    res["test_time"] = time_test

    #---save performance indices
    res["performance"] = perform_mnist(ytrue, ypred)
    res["ypred"] = ypred
    res["ytrue"] = ytrue
    res["confmat"] = confmat
    res["confidence"] = confidence

    #---save results
    with open(opj(root, "project", "data", "results", (f+"RES.pkl")), "wb") as fb:
        pickle.dump(res, fb, protocol=pickle.HIGHEST_PROTOCOL)

-- Testing the ANN on mnist_test --
-- The performance is 0.9659090909090909 --
