### Fully Vectorized Implementaion

In [1]:
import numpy as np
import random

In [2]:
class VectorizedNetwork(object):
    
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]] # '0' not taken in sizes because it is the input layer 
        #will return array of vectors corresponding to biases for each neuron in each layer
        
        self.weights =  [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] #will return array of weight matrices -
        #with each having dimensions (num_neurons_in_next_layer)x(num_neurons_in_current_layer)
    
    def feedforward(self, a):  # a is input layer

        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)      #activations of one layer is stored in 'a', and is then used to -
                                               #calculate activations for the next layer
        return a                               #returns output of network, for current pass
    
    def SGD(self, training_data, num_epochs, mini_batch_size, eta, test_data=None):
    
        self.mini_batch_size = mini_batch_size
        if test_data:                    #if test data is provided, will evaluate on that and print after each epoch
            n_test = len(test_data)
        n = len(training_data)

        for i in range(num_epochs):       #repeat for these many epochs
            random.shuffle(training_data) #shuffle training data
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]#will create array of mini-
                                                                                            #batches of training data for SGD.

            for mini_batch in mini_batches:              #completing updation for all batches will make one epoch
                self.update_mini_batch(mini_batch, eta) #using a single mini_batch, update weights and save inside class net itself
            if test_data:
                print("Epoch " + str(i) + " " + str(self.evaluate(test_data)) + "/" + str(n_test))
            else:
                print("Epoch " + str(i) + "complete")

    def update_mini_batch(self, mini_batch, eta):

        X = np.zeros(self.sizes[0])
        for example in mini_batch:       #minibatch is a tuple of a part of examples from training data and their answers
            X = np.column_stack((X, example[0])) #make a matrix X, containing all examples from mini_batch across columns
        X = np.delete(X, 0, axis=1)
        
        Y = np.zeros(self.sizes[-1])
        for answer in mini_batch:
            Y = np.column_stack((Y, answer[1]))
        Y = np.delete(Y, 0, axis=1)
        
        del_B, del_w = self.backprop(X, Y)
        del_B_sum = [np.sum(dB, axis=1).reshape(dB.shape[0], 1) for dB in del_B] 

        self.biases = [b - eta/self.mini_batch_size*dBs for b, dBs in zip(self.biases, del_B_sum)]
        self.weights = [w - eta/self.mini_batch_size*dw for w, dw in zip(self.weights, del_w)]

    def backprop(self,X, Y):

        B = []
        for bias in self.biases:
            B.append(np.tile(bias, self.mini_batch_size))#tile is used to duplicate one column several times and make a new array
        
        del_B = [np.zeros(b.shape) for b in B]
        del_w = [np.zeros(w.shape) for w in self.weights]
        
        #Forward pass
        Zs = []
        Activations = []
        A = X
        Activations.append(A)

        for b, w in zip(B, self.weights):
            Z = np.dot(w, A) + b
            Zs.append(Z)
            A = sigmoid(Z)
            Activations.append(A)

        #Back pass
        delta = (Activations[-1] - Y)*sigmoid_prime(Zs[-1])
        del_B[-1] = delta
        del_w[-1] = np.dot(delta, Activations[-2].transpose())

        for l in range(2, self.num_layers):
            delta = np.dot(self.weights[-l+1].transpose(), delta)*sigmoid_prime(Zs[-l])
            del_B[-l] = delta
            del_w[-l] = np.dot(delta, Activations[-l-1].transpose())

        return del_B, del_w
     
    def evaluate(self, test_data):
        
        test_results = [(np.argmax(self.feedforward(x)), y) #argmax returns index of element with largest value in an array
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)


In [3]:
# Miscellaneous functions   
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [4]:
import sys
sys.path.append("../")

import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data = list(training_data)                #compatibility b/w python 2 and 3. python 3 cannot use len() on zip
valid_data = list(validation_data)              #therefore convert zip to list
test_data = list(test_data)

In [5]:
net = VectorizedNetwork([784, 30, 10])    #create neural net with 3 layers(including input and output) and 1 hidden layer of 30 neurons
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)

Epoch 0 9079/10000
Epoch 1 9229/10000
Epoch 2 9252/10000
Epoch 3 9312/10000
Epoch 4 9320/10000
Epoch 5 9343/10000
Epoch 6 9363/10000
Epoch 7 9373/10000
Epoch 8 9364/10000
Epoch 9 9379/10000
Epoch 10 9410/10000
Epoch 11 9393/10000
Epoch 12 9390/10000
Epoch 13 9395/10000
Epoch 14 9404/10000
Epoch 15 9423/10000
Epoch 16 9453/10000
Epoch 17 9419/10000
Epoch 18 9426/10000
Epoch 19 9426/10000
Epoch 20 9433/10000
Epoch 21 9418/10000
Epoch 22 9463/10000
Epoch 23 9443/10000
Epoch 24 9440/10000
Epoch 25 9447/10000
Epoch 26 9455/10000
Epoch 27 9457/10000
Epoch 28 9416/10000
Epoch 29 9448/10000
