<a href="https://colab.research.google.com/github/rstar900/ML_And_Neural_Nets/blob/main/MNIST_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import the dataset from a repo
!git clone https://github.com/mnielsen/neural-networks-and-deep-learning.git

Cloning into 'neural-networks-and-deep-learning'...
remote: Enumerating objects: 1163, done.[K
remote: Total 1163 (delta 0), reused 0 (delta 0), pack-reused 1163[K
Receiving objects: 100% (1163/1163), 20.42 MiB | 20.03 MiB/s, done.
Resolving deltas: 100% (577/577), done.


In [15]:
# Install NumPy in case it is not installed
!pip install numpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [17]:
# Imports
import numpy as np
import random
import pickle as cPickle # Python 3.x fix for working with cPickle library
import gzip

In [35]:
# Code for loading of MNIST dataset
# load_data() loads data into 3 tuples -> training_data, validation_data, test_data
# load_data_wrapper() modifies the format of training data a little for use in our Neural Network

def load_data():
  f = gzip.open('/content/neural-networks-and-deep-learning/data/mnist.pkl.gz', 'rb')
  u = cPickle._Unpickler(f)
  u.encoding = 'bytes'
  training_data, validation_data, test_data = u.load()
  f.close()
  return (training_data, validation_data, test_data)  

def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = list(zip(training_inputs, training_results))
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = list(zip(validation_inputs, va_d[1]))
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = list(zip(test_inputs, te_d[1]))
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [9]:
# Sigmoid function
def sigmoid(z):
  return 1.0 / (1.0 + np.exp(-z))

# Sigmoid function's derivative
def sigmoid_prime(z):
  return sigmoid(z) * (1 - sigmoid(z))
  

In [12]:
# The Network Class for defining our whole Neural Network
class Network:

  # Constructor
  def __init__(self, sizes):
    self.num_layers = len(sizes)
    self.sizes = sizes
    self.biases = [np.random.randn(y,1) for y in sizes[1:]]
    self.weights = [np.random.randn(y,x) for x,y in zip(sizes[:-1], sizes[1:])]


  # Feed forward function for getting activations of current layer based on activations of previous layer
  def feedforward(self, a):
    """ a is the input to the network """
    for b, w in zip(self.biases, self.weights):
      a = sigmoid(np.dot(w,a) + b)
    return a


  # Top level Stochastic Gradient Descent (SGD) function for dividing the data into mini_batches and managing epochs    
  def SGD(self, training_data, epochs, mini_batch_size, eta, test_data = None):
    if test_data: n_test = len(test_data)
    n = len(training_data)
    for j in range(epochs):
      random.shuffle(training_data)
      mini_batches = [training_data[k : k + mini_batch_size] for k in range(0, n, mini_batch_size)]
      for mini_batch in mini_batches:
        self.update_mini_batch(mini_batch, eta)
      if test_data:
        print("Epoch {}: {} / {}".format(j, self.evaluate(test_data), n_test))
      else:
        print("Epoch {} complete".format(j))   


  # update_mini_batch with new weights and biases
  def update_mini_batch(self, mini_batch, eta):
    nabla_w = [np.zeros(w.shape) for w in self.weights]
    nabla_b = [np.zeros(b.shape) for b in self.biases]
    for x, y in mini_batch:
      delta_nabla_b, delta_nabla_w = self.backprop(x,y)
      nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
      nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
    self.biases = [b - (eta/len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]
    self.weights = [w - (eta/len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)] 


  # TODO: backprop (need to understand first from second chapter and then come back to it again and complete the whole program)
  def backprop(self, x, y):
    delta_nabla_w = [np.zeros(w.shape) for w in self.weights]
    delta_nabla_b = [np.zeros(b.shape) for b in self.biases]

    # Feedforward
    activation = x
    activations = [x]
    zs = [] # List to store weighted sums (z) for each node, layer by layer
    for b, w in zip(self.biases, self.weights):
      z = np.dot(w, activation) + b
      activation = sigmoid(z)
      zs.append(z)
      activations.append(activation)

    # Find the erro, weight and bias (delta) for the last layer
    delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
    delta_nabla_b[-1] = delta
    delta_nabla_w[-1] = np.dot(delta, activations[-2].transpose())

    # Back propogation
    for l in range(2, self.num_layers):
      z = zs[-l]
      sp = sigmoid_prime(z)
      delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
      delta_nabla_b[-l] = delta
      delta_nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())

    return (delta_nabla_b, delta_nabla_w)  
    
  # Cost derivative function
  def cost_derivative(self, output_activations, y):
    return (output_activations - y)

  # Evaluate the accuracy of the model with test data
  def evaluate(self, test_data):
    # argmax will return the index of the highest activation value of the last layer
    test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
    return sum(int(x == y) for (x, y) in test_results) 

In [45]:
# loading the dataset into our network
training_data, validation_data, test_data = load_data_wrapper()
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data) # 30 epochs, mini batch size of 10, and learning rate of 3.0 

Epoch 0: 9118 / 10000
Epoch 1: 9259 / 10000
Epoch 2: 9309 / 10000
Epoch 3: 9382 / 10000
Epoch 4: 9410 / 10000
Epoch 5: 9429 / 10000
Epoch 6: 9419 / 10000
Epoch 7: 9432 / 10000
Epoch 8: 9393 / 10000
Epoch 9: 9426 / 10000
Epoch 10: 9467 / 10000
Epoch 11: 9420 / 10000
Epoch 12: 9480 / 10000
Epoch 13: 9481 / 10000
Epoch 14: 9472 / 10000
Epoch 15: 9449 / 10000
Epoch 16: 9465 / 10000
Epoch 17: 9493 / 10000
Epoch 18: 9476 / 10000
Epoch 19: 9466 / 10000
Epoch 20: 9487 / 10000
Epoch 21: 9485 / 10000
Epoch 22: 9501 / 10000
Epoch 23: 9497 / 10000
Epoch 24: 9533 / 10000
Epoch 25: 9492 / 10000
Epoch 26: 9500 / 10000
Epoch 27: 9497 / 10000
Epoch 28: 9512 / 10000
Epoch 29: 9497 / 10000
