In [1]:
import numpy as np
from mnist import MNIST
import random

In [2]:
def count_numspercent(num, arr):
    print(np.count_nonzero(arr == num)/len(arr))

In [3]:
mndata = MNIST("dat")
train = np.array(mndata.load_training()).T
test = np.array(mndata.load_testing()).T

testLabels = test[:, 1]

for i in range(10):
    count_numspercent(i, testLabels)

0.098
0.1135
0.1032
0.101
0.0982
0.0892
0.0958
0.1028
0.0974
0.1009


In [4]:
def printShape(name, arr):
    print("{0} shape:".format(name) + str(arr.shape))

class NeuralNetwork(object):
    def __init__(self, sizes):
        """
        constructor for a neural network.
        from https://github.com/mnielsen/neural-networks-and-deep-learning/
             blob/master/src/network.py
        """
        self.nLayers = len(sizes)
        self.sizes = sizes
        
        # array of bias vectors, list with `nLayers` amount of vecs
        # containing `size` elements
        
        self.biases = [np.random.uniform(-0.5, 0.5, (nextSize, 1)) \
                       for nextSize in sizes[1:]]
        
        # array of weight matrices, if the previous layer is 
        # sized a, and next layer b, the weight matrix would
        # be sized (b * a) to accomodate transformation
        # (b * 1) = (b * a) . (a * 1)
        
        self.weights = [np.random.uniform(-0.5, 0.5, (nextSize, prevSize)) \
                        for prevSize, nextSize in zip(sizes[:-1], sizes[1:])]
        
    def feedFwd(self, arr):
        """
        forward feeding
        """
        for b, w in zip(self.biases, self.weights):
            arr = self.reLU(np.dot(w, arr) + b)
        
        return arr
    
    def stochasticGradDesc(self, trainDat, nEpoch, sSSize, rate, testDat = None):
        
        print("starting ...")
        
        if testDat is not None: 
            nTest = len(testDat)
        
        nTrain = len(trainDat)
        
        for i in range(nEpoch):
            random.shuffle(trainDat)
            
            subSets = [train[k : k + sSSize] for k in range(0, nTrain, sSSize)]
            
            for subSet in subSets:
                self.updSubSet(subSet, rate)
                
            if testDat is not None:
                percent = (self.evaluate(testDat)/nTest)*100
                print("Epoch {0} accuracy: {1}% = 100 * {2}/{3}.".format( \
                    i, percent, self.evaluate(testDat), nTest))
                
            else:
                print("Epoch {0} complete".format(i))
                
        print("end.")
        
    def updSubSet(self, subSet, rate):
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        set_size = len(subSet)
        
        # update
        
        for x, y in subSet:
            delNabB, delNabW = self.propBwd(x, y)
            
            nabB = [nB + dnB for nB, dnB in zip(nabB, delNabB)]
            nabW = [nW + dnW for nW, dnW in zip(nabW, delNabW)]
            
            
        self.weights = [Wi - ((rate / set_size) * nabWi) \
                        for Wi, nabWi in zip(self.weights, nabW)]
        
        self.biases = [Bi - ((rate / set_size) * nabBi) \
                       for Bi, nabBi in zip(self.biases, nabB)]
        
        
    def evaluate(self, testDat):
        results = [(np.argmax(self.feedFwd(x)), y) for (x, y) in testDat]
        # tuple (int, int)
        return sum(int(x == y) for (x, y) in results)
        
        
    def difCost(self, outAct, y):
        return outAct - y # (10 * 1)
        
    def propBwd(self, x, y):
        """
        backwards propagation
        
        x - activation layer: ndarray, size m * 1
        y - output: int, could be converted to ndarray, size 10 * 1
            with oneHot(y).
        """
        
        # initialise weight gradients and bias gradients
        
        nabB = [np.zeros(b.shape) for b in self.biases]
        nabW = [np.zeros(w.shape) for w in self.weights]
        
        # forward propagation
        
        act = np.array([x]).T
        
        acts = [act]
        
        zVecs = []
        
        # feedFwd is not called to store the z and a values
        
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, act) + b # (m * 1)
            
            zVecs.append(z)
            
            act = self.reLU(z)
            acts.append(act)
            
        # bwd        
        delta = self.difCost(acts[-1], self.oneHot(y))
        
        nabW[-1] = np.dot(delta, np.array(acts[-2]).T)
        nabB[-1] = delta # (out * 1)
        
        # for the rest of the network
        
        for l in range(2, self.nLayers):
            z = zVecs[-l]
            
            delta = (self.weights[-l + 1].T @ delta) * self.difReLU(z)
            
            nabW[-l] = delta @ acts[-l - 1].T # (m * 1). (1 * m) = (m * m)
            nabB[-l] = delta # m * 1
            
        return nabB, nabW
    
    
    ####### activation functions #######
            
    def smd(self, z):
        """sigmoid function"""
        return 1 / (1 + np.exp(-z))
    
    def difsmd(self, z):
        return self.smd(z) * (1 - self.smd(z))
    
    def oneHot(self, y):
        arr = np.zeros(10)
        arr[y] = 1
        return np.array([arr]).T
    
    def reLU(self, z):
        return np.maximum(0, z)
    
    def difReLU(self, z):
        if isinstance(z, np.ndarray):
            z = np.array([i > 0 for i in z])
            return z
        
        return z > 0
    
    def softMaxOne(self, z, z_arr):
        return np.exp(z)/sum(np.exp(z_arr))
        
    

In [5]:
neuNet = NeuralNetwork([784, 16, 10, 10])

neuNet.stochasticGradDesc(train, 11, 10, 0.1, testDat = test)

starting ...
Epoch 0 accuracy: 9.8% = 100 * 980/10000.
Epoch 1 accuracy: 9.8% = 100 * 980/10000.
Epoch 2 accuracy: 9.8% = 100 * 980/10000.
Epoch 3 accuracy: 9.8% = 100 * 980/10000.
Epoch 4 accuracy: 9.8% = 100 * 980/10000.
Epoch 5 accuracy: 9.8% = 100 * 980/10000.
Epoch 6 accuracy: 9.8% = 100 * 980/10000.
Epoch 7 accuracy: 9.8% = 100 * 980/10000.
Epoch 8 accuracy: 9.8% = 100 * 980/10000.
Epoch 9 accuracy: 9.8% = 100 * 980/10000.
Epoch 10 accuracy: 9.8% = 100 * 980/10000.


In [6]:
arr_1 = np.arange(9).reshape(3, 3)
arr_2 = np.array([np.arange(3)]).T
print(arr_2.shape)
print(arr_1 * arr_2)

(3, 1)
[[ 0  0  0]
 [ 3  4  5]
 [12 14 16]]
