In [1]:
import numpy as np
import random

In [2]:
class Network(object):
    
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]] # '0' not taken in sizes because it is the input layer 
        #will return array of vectors corresponding to biases for each neuron in each layer
        
        self.weights =  [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] #will return array of weight matrices -
        #with each having dimensions (num_neurons_in_next_layer)x(num_neurons_in_current_layer)
    
    def feedforward(self, a):  # a is input layer

        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)      #activations of one layer is stored in 'a', and is then used to -
                                               #calculate activations for the next layer
        return a                               #returns output of network, for current pass
    
    def SGD(self, training_data, num_epochs, mini_batch_size, eta, test_data=None):
    
        if test_data:                    #if test data is provided, will evaluate on that and print after each epoch
            n_test = len(test_data)
        n = len(training_data)

        for i in range(num_epochs):       #repeat for these many epochs
            random.shuffle(training_data) #shuffle training data
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]#will create array of mini-
                                                                                            #batches of training data for SGD.

            for mini_batch in mini_batches:              #completing updation for all batches will make one epoch
                self.update_mini_batch(mini_batch, eta) #using a single mini_batch, update weights and save inside class net itself
            if test_data:
                print("Epoch " + str(i) + " " + str(self.evaluate(test_data)) + "/" + str(n_test))
            else:
                print("Epoch " + str(i) + "complete")

    def update_mini_batch(self,mini_batch,eta):

        del_b_sum = [np.zeros(b.shape) for b in self.biases]
        del_w_sum = [np.zeros(w.shape) for w in self.weights]

        for x, y in mini_batch:
            del_b, del_w = self.backprop(x, y)                  #find derivatives wrt bases and weights FOR ONE EXAMPLE
            del_b_sum = [dbs + db for dbs, db in zip(del_b_sum, del_b)]#sum with derivatives for other examples, to take average at the end
            del_w_sum = [dws + dw for dws, dw in zip(del_w_sum, del_w)]#same for weights

        self.biases = [b - eta/len(mini_batch)*db_sum for b, db_sum in zip(self.biases, del_b_sum)]#update biases after finding derivative over mini batch
        self.weights = [w - eta/len(mini_batch)*dw_sum for w, dw_sum in zip(self.weights, del_w_sum)]#same for weights


    def backprop(self, x, y):

        del_b = [np.zeros(b.shape) for b in self.biases]
        del_w = [np.zeros(w.shape) for w in self.weights]

        #Forward pass
        zs = []
        activations = []
        a = x                                #set inputs as first activation layer
        activations.append(a)
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, a) + b             #find z for next layer
            zs.append(z)                     #save z in and array for later(backpass)
            a = sigmoid(z)                   #find activation for next layer
            activations.append(a)            #save activations in an array

        #Backward pass
        delta = (activations[-1] - y)*sigmoid_prime(zs[-1])     #find delta for last layer, using formula (hadamard product)
        del_b[-1] = delta                                       #find and save derivative wrt biases for last layer, using formula
        del_w[-1] = np.dot(delta, activations[-2].transpose())  #find and save derivative wrt weights for last layer, using formula

        for l in range(2, self.num_layers):
            delta = np.dot(self.weights[-l+1].transpose(), delta)*sigmoid_prime(zs[-l])  #find delta for previous layer, using formula
            del_b[-l] = delta                                 #find and save derivative wrt biases for previous layer, using formula
            del_w[-l] = np.dot(delta, activations[-l-1].transpose()) #find and save derivative wrt weights for previous layer, using formula

        return del_b, del_w


    def evaluate(self, test_data):
        
        test_results = [(np.argmax(self.feedforward(x)), y) #argmax returns index of element with largest value in an array
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)


In [3]:
# Miscellaneous functions   
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [4]:
import sys
sys.path.append("../")

import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)                #compatibility b/w python 2 and 3. python 3 cannot use len() on zip
valid_data = list(validation_data)              #therefore convert zip to list
test_data = list(test_data)

In [5]:
net = Network([784, 30, 10])      #create neural net with 3 layers(including input and output) and 1 hidden layer of 30 neurons
net.SGD(training_data, 20, 10, 3.0, test_data=test_data)

Epoch 0 9084/10000
Epoch 1 9241/10000
Epoch 2 9338/10000
Epoch 3 9367/10000
Epoch 4 9367/10000
Epoch 5 9424/10000
Epoch 6 9436/10000
Epoch 7 9395/10000
Epoch 8 9409/10000
Epoch 9 9465/10000
Epoch 10 9437/10000
Epoch 11 9471/10000
Epoch 12 9474/10000
Epoch 13 9470/10000
Epoch 14 9479/10000
Epoch 15 9490/10000
Epoch 16 9517/10000
Epoch 17 9503/10000
Epoch 18 9487/10000
Epoch 19 9509/10000
