In [7]:
import numpy as np
import pandas as pd
import math as m

#IN ORDER TO RUN TRAINING, you'll need to download the .csv version of the MNIST dataset from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv and place mnist_train.csv and mnist_test.csv
#in the mnist-dataset directory.

class Neural_Network:
    
    def __init__(self, layers, epochs, learningRate):
        
        self.layers = layers
        self.epochs = epochs
        self.learningRate = learningRate

        #Reads in MNIST training set
        self.data_initial = pd.read_csv('./mnist-dataset/mnist_train.csv')
        self.labels = self.data_initial['label'] #(60000 imgs,)
        self.data = self.data_initial.drop('label', axis=1) #(60000 imgs, 784, pixels)

        #Reads in MNIST test set
        self.testInitial = pd.read_csv('./mnist-dataset/mnist_test.csv')
        self.testLabels = self.testInitial['label'] #(10000 imgs,)
        self.testData = self.testInitial.drop('label', axis=1) #(10000 imgs, 784, pixels)

        self.a_0 = np.empty([784, 1]) 

        self.W_1 = np.random.default_rng().normal(0, 1, size=(16,784))
        self.b_1 = np.random.default_rng().normal(0, 1, size=(16,1))
        self.z_1 = np.empty([16, 1])
        self.a_1 = np.empty([16, 1])
        self.error_1 = np.empty([16, 1])
        
        self.W_2 = np.random.default_rng().normal(0, 1, size=(16,16))
        self.b_2 = np.random.default_rng().normal(0, 1, size=(16,1))
        self.z_2 = np.empty([16, 1])
        self.a_2 = np.empty([16, 1])
        self.error_2 = np.empty([16, 1])
       
        self.W_3 = np.random.default_rng().normal(0, 1, size=(10,16))
        self.b_3 = np.random.default_rng().normal(0, 1, size=(10,1))
        self.z_3 = np.empty([10, 1])
        self.a_3 = np.empty([10, 1])
        self.error_out = np.empty([10, 1])
        
        self.y = np.zeros([1,10]) #This is a row vector, will be transposed. 

    def feedForward(self, x, dataset):
        #Calculates all the activations in the network for the training example, x.

        #Grabs image pixel information from the xth row of the dataset. 
        #This gives us a numpy (784,1) colm vector of activations for a training example, x, 
        #on Layer 0 (input layer)

        #1 means feedForward a training example from the training dataset
        if (dataset == 1):
            self.a_0 = (self.data.iloc[x]).values.reshape((784,1))

        #2 means feedForward a training example from the test dataset.
        if (dataset == 2):
            self.a_0 = (self.testData.iloc[x]).values.reshape((784,1))

        #Going into Layer 1
        self.z_1 = (np.dot(self.W_1, self.a_0)) + self.b_1
        self.a_1 = self.sigmoid(self.z_1)

        #Going into Layer 2
        self.z_2 = (np.dot(self.W_2, self.a_1)) + self.b_2
        self.a_2 = self.sigmoid(self.z_2)

        #Going into Layer 3 (output layer)
        self.z_3 = (np.dot(self.W_3, self.a_2)) + self.b_3
        self.a_3 = self.sigmoid(self.z_3)
        
#    
    def backProp(self, x): 
        #Calculates the "error" on all the neurons in the network for a training example, x.
        
        #Grabs label for the training example from the label column vector initialized at start.
        #Then creates the y column vector that represents the ideal output for all the output neurons 
        #(for the spesific training example).
        label = self.labels.iloc[x]
        self.y[0,label] = label
        self.y = np.transpose(self.y)
        
        #Calculuate the error on the output neurons
        self.error_out = (self.a_3 - self.y) * self.dSigmoid(self.z_3)

        #Calculate the error on each of neurons on each of the layers. Calculating backwards.
        self.error_2 = np.dot((np.transpose(self.W_3)), self.error_out) * self.dSigmoid(self.z_2)

        self.error_1 = np.dot((np.transpose(self.W_2)), self.error_2) * self.dSigmoid(self.z_1)

        self.y = np.zeros([1,10])

        #for (matrix)(colmVector) --> np.dot computes what we want.

    def gradDescent(self):
        #Updates all the weights and biases based on the calculated errors from backprop.
                
        self.W_1 = self.W_1 - self.learningRate*(np.dot(self.error_1, np.transpose(self.a_0)))
        self.b_1 = self.b_1 - self.learningRate*(self.error_1)
        
        self.W_2 = self.W_2 - self.learningRate*(np.dot(self.error_2, np.transpose(self.a_1)))
        self.b_2 = self.b_2 - self.learningRate*(self.error_2)

        self.W_3 = self.W_3 - self.learningRate*(np.dot(self.error_out, np.transpose(self.a_2)))
        self.b_3 = self.b_3 - self.learningRate*(self.error_out)

    def startTraining(self):
        for epochs in range(self.epochs):
            print("So far finished ", epochs, " epoch")
            for x in range(60000): #size of the MNIST dataset.
                self.feedForward(x, 1)
                self.backProp(x)
                self.gradDescent()

    def evaluate(self):
        correct = 0
        
        for x in range(10000):
            label = self.testLabels.iloc[x]
            self.feedForward(x, 2)
            
            #Determine which activation is biggest in final layer
            a_3R = (self.a_3).reshape(1,10).flatten()
            
            biggest = 0
            theIndexOfBiggest = 0
            for i in range(a_3R.size):
                if (a_3R[i] > biggest):
                    biggest = a_3R[i]
                    theIndexOfBiggest = i

            if (theIndexOfBiggest == label):
                correct = correct + 1
                
        return (correct/10000)
        
    def sigmoid(self, colVector):
        return 1/(1 + m.e**(-1*colVector))

    def dSigmoid(self, colVector):
        return (self.sigmoid(colVector)) * (1 - self.sigmoid(colVector))

#12960 weights, 42 biases
nn = Neural_Network(4, 2, 0.1)
nn.startTraining()
print(nn.evaluate())

#So far, a very chopped implementation. 
#If you run this python program block, the model will train on 2 total epochs then evaluate the accuracy based on 10,000 test images.

So far finished  0  epoch


  return 1/(1 + m.e**(-1*colVector))


So far finished  1  epoch
So far finished  2  epoch
So far finished  3  epoch
So far finished  4  epoch
So far finished  5  epoch
So far finished  6  epoch
So far finished  7  epoch
So far finished  8  epoch
So far finished  9  epoch
So far finished  10  epoch
So far finished  11  epoch
So far finished  12  epoch
So far finished  13  epoch
So far finished  14  epoch
0.1018


### Reading in Data Test

In [22]:
import pandas as pd
import numpy as np
#np.transpose works on pandas DataFrames

data_initial = pd.read_csv('./mnist-dataset/mnist_train.csv')
labels = data_initial['label']
data = data_initial.drop('label', axis=1)

#Sanity Check
print(data_initial.shape)
print(data.shape) #Should be (60000, 784), 60k training examples (rows), each 784 pixels
print(labels.shape) #Should be (60000, ), 60k labels.

(60000, 785)
(60000, 784)
(60000,)


### Reading in a Image Test

In [24]:
# Returns 784,1 colm vector of the first img.

firstImg = data.iloc[0]
print(firstImg)
print(firstImg.shape)

1x1      0
1x2      0
1x3      0
1x4      0
1x5      0
        ..
28x24    0
28x25    0
28x26    0
28x27    0
28x28    0
Name: 0, Length: 784, dtype: int64
(784,)


### Reading in Label and Creating Y Colm Vector Test

In [66]:
y = np.zeros([1, 10])
y0 = labels.iloc[0]
y[0,y0] = y0
y = np.transpose(y)

print(y)

#I currently don't think transpose OR reshaping to the transpose actually does anything.

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [5.]
 [0.]
 [0.]
 [0.]
 [0.]]


### Testing Why Gradient Descent is Broken

In [88]:
W_1 = np.random.default_rng().normal(0, 1, size=(16,784))
error_1 = np.empty([16, 1])
learningRate = 0.1

a_0 = (data.iloc[0]).values.reshape((784,1))
print(a_0.shape)

W_1 = W_1 - learningRate*(np.dot(error_1, np.transpose(a_0)))

print(W_1)

#Turns out, it was because my a_0 was still considered a panda series, not an numpy array. 
#That's why .transpose wasn't working.

(784, 1)
[[-0.40528597 -0.40384543  0.09419205 ... -0.28404044  0.66003423
   1.98236918]
 [-0.20340656 -1.18015369  1.25078338 ...  0.50561667  1.33540059
  -0.58290972]
 [ 0.95574544 -0.32636246 -0.95895502 ...  0.62481656 -0.4425282
   2.36529777]
 ...
 [-1.39743211 -0.96980717 -0.71149636 ... -0.77834672  1.31991467
  -0.39134117]
 [-0.04281638  0.94211695 -0.02820208 ... -0.93458087 -0.08543699
  -0.02447355]
 [-0.06146447 -0.32451686 -0.49651673 ... -0.32895833  0.76089135
   0.03232471]]


### Testing Why Feedforward is (Now) Broken, after using ^

In [91]:
# I am going into this debug assuming that Feedforward broke on the first iteration on the model intially.
# Not later.

import math as m

def sigmoid(colVector):
    return 1/(1 + m.e**(-1*colVector))

a_0 = (data.iloc[0]).values.reshape((784,1))

W_1 = np.random.default_rng().normal(0, 1, size=(16,784))
b_1 = np.random.default_rng().normal(0, 1, size=(16,1))

W_2 = np.random.default_rng().normal(0, 1, size=(16,16))
b_2 = np.random.default_rng().normal(0, 1, size=(16,1))

W_3 = np.random.default_rng().normal(0, 1, size=(10,16))
b_3 = np.random.default_rng().normal(0, 1, size=(10,1))

#Going into Layer 1
z_1 = (np.dot(W_1, a_0)) + b_1
a_1 = sigmoid(z_1)
print(a_1.shape)

#Going into Layer 2
z_2 = (np.dot(W_2, a_1)) + b_2
a_2 = sigmoid(z_2)
print(a_2.shape)

#Going into Layer 3 (output layer)
z_3 = (np.dot(W_3, a_2)) + b_3
a_3 = sigmoid(z_3) 
print(a_3.shape)

(16, 1)
(16, 1)
(10, 1)


  return 1/(1 + m.e**(-1*colVector))


### Testing my Algorithm For Determing Accuracy

In [4]:
import numpy as np
import pandas as pd

testInitial = pd.read_csv('./mnist-dataset/mnist_test.csv')
testLabels = testInitial['label'] #(10000 imgs,)
testData = testInitial.drop('label', axis=1) #(10000 imgs, 784, pixels)
correct = 0

label = testLabels.iloc[0]

#Determine which activation is biggest in final layer
a_3R = np.array([0.2,0.3,0.1,0.5,0.1,0.2,0.4,0.9,0.3,0.8]) #7
a_3R = np.array([0.2,0.3,0.1,0.5,0.1,0.2,0.4,0.7,0.3,0.8]) #9
a_3R = np.array([0.2,0.3,0.1,0.9,0.1,0.2,0.4,0.3,0.3,0.6]) #4


biggest = 0
for i in range(a_3R.size):
    if (a_3R[i] > biggest):
        biggest = a_3R[i]
        theIndexOfBiggest = i
    #how do I now return the index of the biggest thing?
        
if (theIndexOfBiggest == label):
    correct = correct + 1

print(theIndexOfBiggest)
print(biggest)
print(correct/10)

7
0.9
1.0


### Checking Sizes

In [1]:
import numpy as np

a_0 = np.array([1,2,3,4]).reshape(4,1)
W_1 = np.array([[1,2,3,4], [5, 6, 7, 8]]).reshape(2,4)
b_1 = np.array([[1], [1]]).reshape(2,1)
z_1 = (np.dot(W_1, a_0)) + b_1

print(a_0)
print(W_1)
print(z_1)

[[1]
 [2]
 [3]
 [4]]
[[1 2 3 4]
 [5 6 7 8]]
[[31]
 [71]]


### Checking Sigmoid (Old Test)

In [None]:
import math as m

def sigmoid(colVector):
    return 1/(1 + np.exp(-1*colVector))

a_0 = np.arange(4).reshape(4, 1)
W_1 = np.random.default_rng().normal(0, 1, size=(2,4))
b_1 = np.random.default_rng().normal(0, 1, size=(2,1))
z_1 = (np.dot(W_1, a_0)) + b_1

a_1 = sigmoid(z_1)

print(a_1)

### Checking Sigmoid Again (Old Test)

In [None]:
import math as m

def sigmoid(colVector):
    return 1/(1 + np.exp(-1*colVector))

a_0 = np.arange(784).reshape(784, 1)
W_1 = np.random.default_rng().normal(0, 1, size=(16,784))
b_1 = np.random.default_rng().normal(0, 1, size=(16,1))
z_1 = (np.dot(W_1, a_0)) + b_1

a_1 = sigmoid(z_1)