In [1]:
import numpy as np
from mnist import MNIST
import random

In [2]:
mndata = MNIST("dat")
train = np.array(mndata.load_training()).T
test = np.array(mndata.load_testing()).T

In [3]:
def printShape(name, arr):
    print("{0} shape:".format(name) + str(arr.shape))

class NeuralNetwork(object):
    def __init__(self, sizes):
        """
        constructor for a neural network.
        from https://github.com/mnielsen/neural-networks-and-deep-learning/
             blob/master/src/network.py
        """
        self.nLayers = len(sizes)
        self.sizes = sizes
        
        # array of bias vectors, list with `nLayers` amount of vecs
        # containing `size` elements
        
        self.biases = [np.random.randn(nextLay, 1) \
                       for nextLay in sizes[1:]]
        
        # array of weight matrices, if the previous layer is 
        # sized a, and next layer b, the weight matrix would
        # be sized (b * a) to accomodate transformation
        # (b * 1) = (b * a) . (a * 1)
        
        self.weights = [np.random.randn(nextLay, prevLay) \
                        for prevLay, nextLay in zip(sizes[:-1], sizes[1:])]
        
    def feedFwd(self, arr):
        """
        forward feeding
        """
        for b, w in zip(self.biases, self.weights):
            arr = self.reLU(np.dot(w, arr) + b)
        
        return arr
    
    def stochasticGradDesc(self, trainDat, nEpoch, sSSize, rate, testDat = None):
        if testDat is not None: 
            nTest = len(testDat)
        
        nTrain = len(trainDat)
        
        for i in range(nEpoch):
            random.shuffle(trainDat)
            
            subSets = [train[k : k + sSSize] for k in range(0, nTrain, sSSize)]
            
            for subSet in subSets:
                self.updSubSet(subSet, rate)
                
            if testDat is not None:
                print("Epoch {0} accuracy: ".format( \
                    i) + str((self.evaluate(testDat)/nTest)*100) + "%")
                
            else:
                print("Epoch {0} complete".format(i))
        
    def updSubSet(self, subSet, rate):
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        sSSize = len(subSet)
        
        # update
        
        for x, y in subSet:
            delNabB, delNabW = self.propBwd(x, y)
            
            nabB = [nB + dnB for nB, dnB in zip(nabB, delNabB)]
            nabW = [nW + dnW for nW, dnW in zip(nabW, delNabW)]
            
            
        self.weights = [Wi - ((rate / sSSize) * nabWi) \
                        for Wi, nabWi in zip(self.weights, nabW)]
        
        self.biases = [Bi - ((rate / sSSize) * nabBi)  \
                       for Bi, nabBi in zip(self.biases, nabB)]
        
        
    def evaluate(self, testDat):
        results = [(np.argmax(self.feedFwd(x)), y) for (x, y) in testDat]
        # tuple (int, int)
        return sum(int(x == y) for (x, y) in results)
        
        
    def difCost(self, outAct, y):
        return outAct - y # (10 * 1)
        
    def propBwd(self, x, y):
        """
        backwards propagation
        
        x - activation layer: ndarray, size m * 1
        y - output: int, could be converted to ndarray, size 10 * 1
            with oneHot(y).
        """
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        # forward propagation
        
        act = np.array([x]).T
        
        acts = [act]
        
        zVecs = []
        
        # feedFwd is not called to store the z and a values
        
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, act) + b # (m * 1)
            
            zVecs.append(z)
            
            act = self.reLU(z)
            acts.append(act)
            
        # bwd        
        delta = self.difCost(acts[-1], self.oneHot(y)) * self.reLU(zVecs[-1])
        
        nabB[-1] = delta # (out * 1)
        nabW[-1] = np.dot(delta, np.array(acts[-2]).T)
        
        # for the rest of the network
        
        for l in range(2, self.nLayers):
            z = zVecs[-l]
            
            dReLU = self.difReLU(z)
            delta = np.dot(self.weights[-l + 1].T, delta) * dReLU
            
            nabB[-l] = delta # m * 1
            nabW[-l] = np.dot(delta, acts[-l - 1].T) # (m * 1). (1 * m) = (m * m)
            
        return nabB, nabW
    
    
    ####### activation functions #######
            
    def smd(self, z):
        """sigmoid function"""
        return 1 / (1 + np.exp(-z))
    
    def difsmd(self, z):
        return self.smd(z) * (1 - self.smd(z))
    
    def oneHot(self, y):
        arr = np.zeros(10)
        arr[y] = 1
        return np.array([arr]).T
    
    def reLU(self, z):
        return np.maximum(0, z)
    
    def difReLU(self, z):
        if isinstance(z, np.ndarray):
            z = np.array([i > 0 for i in z])
            return z
        
        return z > 0
        
    

In [4]:
neuNet = NeuralNetwork([784, 16, 10])

neuNet.stochasticGradDesc(train[:3001], 20, 5, 0.5, testDat = test)

Epoch 0 accuracy: 9.8%
Epoch 1 accuracy: 9.8%


KeyboardInterrupt: 