In [1]:
import numpy as np
import pandas as pd
from mnist import MNIST
import random

In [2]:
mndata = MNIST("dat")
train = np.array(mndata.load_training()).T
test = np.array(mndata.load_testing()).T

def printShape(name, arr):
    print("{0} shape:".format(name) + str(arr.shape))

In [13]:
class NeuralNetwork(object):
    def __init__(self, sizes):
        """
        constructor for a neural network.
        from https://github.com/mnielsen/neural-networks-and-deep-learning/
             blob/master/src/network.py
        """
        self.nLayers = len(sizes)
        self.sizes = sizes
        
        # array of bias vectors
        
        self.biases = [np.random.randn(nextLay, 1) \
                       for nextLay in sizes[1:]]
        
        # array of weight matrices
        
        self.weights = [np.random.randn(nextLay, prevLay) \
                        for prevLay, nextLay in zip(sizes[:-1], sizes[1:])]
        
    def feedFwd(self, arr):
        """
        forward feeding
        """
        for b, w in zip(self.biases, self.weights):
            arr = self.smd(np.dot(w, arr) + b)
        
        return arr
    
    def stochasticGradDesc(self, trainDat, nEpoch, sSSize, rate, testDat = None):
        
        if testDat is not None: 
            nTest = len(testDat)
        
        nTrain = len(trainDat)
        
        for i in range(nEpoch):
            random.shuffle(trainDat)
            
            subSets = [train[k : k + sSSize] for k in range(0, nTrain, sSSize)]
            
            for subSet in subSets:
                self.updSubSet(subSet, rate)
                
            if testDat is not None:
                print("Epoch {0} accuracy: ".format(
                    i) + str((self.evaluate(testDat)/nTest)*100) + "%")
                
            else:
                print("Epoch {0} complete".format(i))
        
        
        
    
    def updSubSet(self, subSet, rate):
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        sSSize = len(subSet)
        
        # update
        
        for x, y in subSet:
            delNabB, delNabW = self.propBwd(x, y)
            
            nabB = [nB + dnB for nB, dnB in zip(nabB, delNabB)]
            nabW = [nW + dnW for nW, dnW in zip(nabW, delNabW)]
            
            
        self.weights = [Wi - (rate / sSSize) * nabWi \
                        for Wi, nabWi in zip(self.weights, nabW)]
        
        self.biases = [Bi - (rate / sSSize) * nabBi  \
                       for Bi, nabBi in zip(self.biases, nabB)]
        
        
    def evaluate(self, testDat):
        results = [(np.argmax(self.feedFwd(x)), y) for (x, y) in testDat] # tuple (int, int)
        
        return sum(int(x == y) for x, y in results)
        
        
    def difCost(self, outAct, y):
        return outAct - y
        
    def propBwd(self, x, y):
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        # forward propagation
        
        act = np.array([x]).T
        
        acts = [act]
        
        zVecs = []
        
        # feedFwd is not called to store the z and a values
        
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, act) + b
            
            zVecs.append(z)
            
            act = self.smd(z)
            acts.append(act)
            
        # bwd        
        delta = self.difCost(acts[-1], self.oneHot(y)) * self.difsmd(zVecs[-1])
        
        nabB[-1] = delta
        nabW[-1] = np.dot(delta, np.array(acts[-2]).T)
        
        # for the rest of the network
        
        for l in range(2, self.nLayers):
            z = zVecs[-l]
            
            dsmd = self.difsmd(z)
            delta = np.dot(self.weights[-l + 1].T, delta) * dsmd
            
            nabB[-l] = delta
            nabW[-l] = np.dot(delta, acts[-l - 1].T)
            
        return nabB, nabW
    
    
    ####### activation functions #######
            
    def smd(self, z):
        """sigmoid function"""
        return 1 / (1 + np.exp(-z))
    
    def difsmd(self, z):
        return self.smd(z) * (1 - self.smd(z))
    
    def oneHot(self, y):
        arr = np.zeros(10)
        arr[y] = 1
        return np.array([arr]).T
    

In [16]:
neuNet = NeuralNetwork([784, 16, 10])

neuNet.stochasticGradDesc(train, 50, 5, 0.2, testDat = test)

  return 1 / (1 + np.exp(-z))


Epoch 0 accuracy: 0.0%
Epoch 1 accuracy: 0.0%
Epoch 2 accuracy: 16.75%
Epoch 3 accuracy: 0.0%
Epoch 4 accuracy: 16.75%
Epoch 5 accuracy: 0.0%
Epoch 6 accuracy: 0.0%
Epoch 7 accuracy: 0.0%
Epoch 8 accuracy: 0.0%
Epoch 9 accuracy: 0.0%
Epoch 10 accuracy: 0.0%
Epoch 11 accuracy: 0.0%
Epoch 12 accuracy: 0.0%
Epoch 13 accuracy: 0.0%
Epoch 14 accuracy: 0.0%
Epoch 15 accuracy: 0.0%
Epoch 16 accuracy: 0.0%
Epoch 17 accuracy: 0.0%
Epoch 18 accuracy: 0.0%
Epoch 19 accuracy: 0.0%
Epoch 20 accuracy: 0.0%
Epoch 21 accuracy: 0.0%
Epoch 22 accuracy: 0.0%
Epoch 23 accuracy: 0.0%
Epoch 24 accuracy: 0.0%
Epoch 25 accuracy: 0.0%
Epoch 26 accuracy: 0.0%
Epoch 27 accuracy: 0.0%
Epoch 28 accuracy: 0.0%
Epoch 29 accuracy: 0.0%
Epoch 30 accuracy: 0.0%
Epoch 31 accuracy: 0.0%
Epoch 32 accuracy: 0.0%
Epoch 33 accuracy: 0.0%
Epoch 34 accuracy: 0.0%
Epoch 35 accuracy: 0.0%
Epoch 36 accuracy: 0.0%
Epoch 37 accuracy: 0.0%
Epoch 38 accuracy: 0.0%
Epoch 39 accuracy: 0.0%
Epoch 40 accuracy: 0.0%
Epoch 41 accuracy: 0.0