<a href="https://colab.research.google.com/github/tomaszfrelek2/Neural-Networks/blob/main/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
data = pd.read_csv("/content/mnist_train.csv")


In [None]:
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
#change data from a pd container to a numpy array
data = np.array(data)
#get size of data
m = data.shape[0]
n = data.shape[1] - 1

np.random.shuffle(data)

trainingData = data.transpose()

#parallel arrays, expected vals is y matrix, xVals is a pixel value representation
#of the input picture
expectedVals = trainingData[0]
xVals = trainingData[1:]
xVals = xVals / 255
print(expectedVals)
print(xVals.shape)



[5 4 0 ... 3 4 3]
(784, 60000)


In [None]:

def init_params():
    #matrices of weights and biases from the input to the first hidden layer
  w1 = np.random.rand(10,784) - 0.5
  b1 = np.random.rand(10,1) - 0.5
  #matrices of weights and biases from the first to the second hidden layer
  w2 = np.random.rand(10,10) - 0.5
  b2 = np.random.rand(10,1) - 0.5

  return w1, b1, w2, b2

#represents the ReLU function, if z < 0, returns 0, otherwise returns z
#z should be an array
def ReLU(z):
  return np.maximum(0,z)


#gives derivative of ReLU funcion(technically the derivative of ReLU is undefined at x = 0 but we'll just ignore that ;) )
def ReLUDerivative(z):
  return z > 0

#idk bruh i stole this from chat gpt
def softmaxDerivative(z):
  s = softmax(z)
  return np.diag(s) - np.outer(s, s)
#represents the softmax function, if z < 0, returns 0, otherwise returns z
#z should be an array
def softmax(z):
    exp_z = np.exp(z - np.max(z))  # Subtracting the maximum for numerical stability
    return exp_z / exp_z.sum(axis=0)

#defines forward propagation for two hidden layered network
def forwardPropagation(w1,b1,w2,b2,x):
   #multiply the input layer by the various weight values and add the bias
   z1 = w1.dot(x) + b1
   #activation function of first hidden ayer
   a1 = ReLU(z1)

   z2 = w2.dot(a1) + b2
   #activation function of second hidden layer
   a2 = softmax(z2)

   return z1, a1, z2, a2

#takes an m x 1 matrix and returns its one-hot encoding as a 10xm matrix
def oneHotEncoding(y):
  #create a mx10 array to represent the one-hot encoding of y
  oneHotY = np.zeros((y.size,10))
  for i in range(y.size):
    oneHotY[i][y[i]] = 1
  return oneHotY.transpose()

# defines the formulas for calculating the derivatives necessary for backpropagation
#NOTE- we don't calculate the derivative of the softmax function since it's computationally expensive(large matrix inversion) and not really that useful
def backwardPropagation(Z1, A1, Z2, A2, W1, W2, X, Y):
    oneHotY = oneHotEncoding(Y)
    dZ2 = 2 * (A2 - oneHotY)
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLUDerivative(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2
#updates the weights and biases according to the values calculated by backpropogation
def updateParams(w1,b1,w2,b2,dw1,db1,dw2,db2,learningRate):
  w1 -= (learningRate * dw1)
  b1 -= (learningRate * db1)
  w2 -= (learningRate * dw2)
  b2 -= (learningRate * db2)
  return w1, b1, w2, b2


def get_predictions(A2):
    return np.argmax(A2, 0)

#checks the predictions against the y matrix
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


#runs model
def gradientDescent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forwardPropagation(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backwardPropagation(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = updateParams(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2



In [None]:
W1, b1, W2, b2 = gradientDescent(xVals, expectedVals,.2, 500)

Iteration:  0
[4 4 3 ... 4 4 5] [5 4 0 ... 3 4 3]
0.08631666666666667
Iteration:  10
[2 5 5 ... 5 8 2] [5 4 0 ... 3 4 3]
0.22781666666666667
Iteration:  20
[1 6 0 ... 3 9 3] [5 4 0 ... 3 4 3]
0.49566666666666664
Iteration:  30
[1 4 0 ... 3 9 3] [5 4 0 ... 3 4 3]
0.63975
Iteration:  40
[8 9 0 ... 3 9 3] [5 4 0 ... 3 4 3]
0.6118333333333333
Iteration:  50
[8 4 0 ... 3 9 3] [5 4 0 ... 3 4 3]
0.71445
Iteration:  60
[8 4 0 ... 3 9 3] [5 4 0 ... 3 4 3]
0.7410666666666667
Iteration:  70
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.7633666666666666
Iteration:  80
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.7797333333333333
Iteration:  90
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.7940166666666667
Iteration:  100
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.8062666666666667
Iteration:  110
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.81735
Iteration:  120
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.8255833333333333
Iteration:  130
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.8339
Iteration:  140
[8 4 0 ... 3 4 3] [5 4 0 ... 3 4 3]
0.83975

Using our home-brewed neural network, we can get an accuracy of 90% on the mnist data set! Pretty good for an unoptimised hundred-or-so lines of code