In [2]:
import numpy as np
import pandas as pd
import math as m

#IN ORDER TO RUN TRAINING, you'll need to download the .csv version of the MNIST dataset from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv and place mnist_train.csv and mnist_test.csv
#in the mnist-dataset directory.

class Neural_Network:
    
    def __init__(self, layers, epochs, learningRate):
        
        self.layers = layers
        self.epochs = epochs
        self.learningRate = learningRate

        #Reads in MNIST training set
        self.data_initial = pd.read_csv('./mnist-dataset/mnist_train.csv')
        self.labels = self.data_initial['label'] #(60000 imgs,)
        self.data = self.data_initial.drop('label', axis=1) #(60000 imgs, 784, pixels)

        #Reads in MNIST test set
        self.testInitial = pd.read_csv('./mnist-dataset/mnist_test.csv')
        self.testLabels = self.testInitial['label'] #(10000 imgs,)
        self.testData = self.testInitial.drop('label', axis=1) #(10000 imgs, 784, pixels)

        self.a_0 = np.empty([784, 1]) 

        self.W_1 = np.random.default_rng().normal(0, 1, size=(16,784))
        self.b_1 = np.random.default_rng().normal(0, 1, size=(16,1))
        self.z_1 = np.empty([16, 1])
        self.a_1 = np.empty([16, 1])
        self.error_1 = np.empty([16, 1])
        
        self.W_2 = np.random.default_rng().normal(0, 1, size=(16,16))
        self.b_2 = np.random.default_rng().normal(0, 1, size=(16,1))
        self.z_2 = np.empty([16, 1])
        self.a_2 = np.empty([16, 1])
        self.error_2 = np.empty([16, 1])
       
        self.W_3 = np.random.default_rng().normal(0, 1, size=(10,16))
        self.b_3 = np.random.default_rng().normal(0, 1, size=(10,1))
        self.z_3 = np.empty([10, 1])
        self.a_3 = np.empty([10, 1])
        self.error_out = np.empty([10, 1])
        
        self.y = np.zeros([1,10]) #This is a row vector, will be transposed. 

    def feedForward(self, x, dataset):
        #Calculates all the activations in the network for the training example, x.

        #Grabs image pixel information from the xth row of the dataset. 
        #This gives us a numpy (784,1) colm vector of activations for a training example, x, 
        #on Layer 0 (input layer)

        #1 means feedForward a training example from the training dataset
        if (dataset == 1):
            self.a_0 = (self.data.iloc[x]).values.reshape((784,1))

        #2 means feedForward a training example from the test dataset.
        if (dataset == 2):
            self.a_0 = (self.testData.iloc[x]).values.reshape((784,1))

        #Going into Layer 1
        self.z_1 = (np.dot(self.W_1, self.a_0)) + self.b_1
        self.a_1 = self.sigmoid(self.z_1)

        #Going into Layer 2
        self.z_2 = (np.dot(self.W_2, self.a_1)) + self.b_2
        self.a_2 = self.sigmoid(self.z_2)

        #Going into Layer 3 (output layer)
        self.z_3 = (np.dot(self.W_3, self.a_2)) + self.b_3
        self.a_3 = self.sigmoid(self.z_3)
        
#    
    def backProp(self, x): 
        #Calculates the "error" on all the neurons in the network for a training example, x.
        
        #Creates the y column vector that represents the ideal output for all the output neurons for the spesific training example.
        label = self.labels.iloc[x]
        self.y[0,label] = label
        self.y = np.transpose(self.y)
        
        #Calculuate the error on the output neurons
        self.error_out = (self.a_3 - self.y) * self.dSigmoid(self.z_3)

        #Calculate the error on each of neurons on each of the layers. Calculating backwards.
        self.error_2 = np.dot((np.transpose(self.W_3)), self.error_out) * self.dSigmoid(self.z_2)

        self.error_1 = np.dot((np.transpose(self.W_2)), self.error_2) * self.dSigmoid(self.z_1)

        self.y = np.zeros([1,10])

        #for (matrix)(colmVector) --> np.dot computes what we want.

    def gradDescent(self):
        #Updates all the weights and biases based on the calculated errors from backprop.
                
        self.W_1 = self.W_1 - self.learningRate*(np.dot(self.error_1, np.transpose(self.a_0)))
        self.b_1 = self.b_1 - self.learningRate*(self.error_1)
        
        self.W_2 = self.W_2 - self.learningRate*(np.dot(self.error_2, np.transpose(self.a_1)))
        self.b_2 = self.b_2 - self.learningRate*(self.error_2)

        self.W_3 = self.W_3 - self.learningRate*(np.dot(self.error_out, np.transpose(self.a_2)))
        self.b_3 = self.b_3 - self.learningRate*(self.error_out)

    def startTraining(self):
        for epochs in range(self.epochs):
            print("So far finished ", epochs, " epoch")
            for x in range(60000): #size of the MNIST dataset.
                self.feedForward(x, 1)
                self.backProp(x)
                self.gradDescent()

    def evaluate(self):
        
        correct = 0
        
        for x in range(10000):
            label = self.testLabels.iloc[x]
            self.feedForward(x, 2)
            
            #Determine which activation is biggest in final layer
            a_3R = (self.a_3).reshape(1,10).flatten()
            
            biggest = 0
            theIndexOfBiggest = 0
            for i in range(a_3R.size):
                if (a_3R[i] > biggest):
                    biggest = a_3R[i]
                    theIndexOfBiggest = i

            if (theIndexOfBiggest == label):
                correct = correct + 1
                
        return (correct/10000)
        
    def sigmoid(self, colVector):
        return 1/(1 + m.e**(-1*colVector))

    def dSigmoid(self, colVector):
        return (self.sigmoid(colVector)) * (1 - self.sigmoid(colVector))

#12960 weights, 42 biases
nn = Neural_Network(4, 2, 0.1)
nn.startTraining()
print(nn.evaluate())

#So far, a very chopped implementation. 
#If you run this python program block, the model will train on 2 total epochs then evaluate the accuracy based on 10,000 test images.

So far finished  0  epoch


  return 1/(1 + m.e**(-1*colVector))


So far finished  1  epoch
0.1009
