In [2]:
import numpy as np
from numpy import random

#sigmoid(z)
def sigmoid(z): 
    return 1/(1+np.exp(-z)) 

#MSE
def cost(yPred, y): 
    yPred, y = np.array(yPred), np.array(y)
    return 0.5*(yPred-y)**2

#derivative of cost 
def derivateCost(yPred, y): 
    return yPred - y

#iterator
def dataIter(batchSize, data):
    #random.shuffle(data)
    batches = []

    for i in range(0, np.array(data[0]).shape[0], batchSize):
        batchX, batchY = np.array(data[0][i:i+batchSize]), np.array(data[1][i:i+batchSize])
        batches.append([batchX, batchY])
    return batches

#accuracy
def accuracy(predicted, actual):
    predicted = predicted > 0.5
    actual = np.array(actual).reshape(actual.shape[0])
    predicted = np.array(predicted).reshape(predicted.shape[0])
    
    acc = (np.sum(actual == predicted)/len(actual)) * 100
    return acc

#normalize, scale btw 0 and 1
def normalize(x):
    x = x - np.amin(x)
    x = x/np.amax(x)
    return x

#calculates error of entire batch.
def calculateError(net, dataset):
    j = 0
    yPred = net.forwardPass(dataset[0])
    y = dataset[1]
    costEx = cost(yPred, y) #error of single example.
    j = costEx.sum()
    return j/len(dataset[0]) #taking mean
    

In [26]:
import numpy as np
from matplotlib import pyplot as plt 
#from helperFunctions import *
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.datasets import mnist 

(x_train, y_train), (x_test, y_test) = mnist.load_data()

trainY = np.array(y_train).reshape(-1, 1)
x_train = np.array(x_train).reshape(60000, 28*28)
train = [x_train, trainY]
testY = np.array(y_test).reshape(-1, 1)

test = [x_test, testY]
trainError = []
trainAccuracy = []
testError = []

batchSize = 32
class MyNeuralNet:
    def __init__(self, layerArray):
        
        """ 
        layerArray: 
            array of dimensions of layers. 
            size of layerArray is the number of layers in our network
        """
        trainError = []
        trainAccuracy = []
        testError = []


        self.layers = layerArray #layes in our network
        self.B = [] #bias matrix
        self.W = [] #weights matrix
        self.input = None 

        for layerNum in range(1, len(layerArray)): #1st layer is input so we exclude that
            biasVector = np.zeros((layerArray[layerNum], 1)) #bias zero initialized 
            self.B.append(biasVector)
            weightsMatrix = np.random.normal(loc = 0, scale = 1, size = (layerArray[layerNum], layerArray[layerNum-1])) #weights initialized with normal dist
            self.W.append(weightsMatrix)

    
    def netSize(self):
        """ 
        number of layers in the network excluding the input layer
        """    
        return len(self.layers) - 1
    
    def activateLayer(self, z):
        """
        applies activation function to the layer z. 
        activation : sigmiod
        """
        activatedLayer = sigmoid(z)
        return activatedLayer
    
    def derivatieActivateLayer(self, z):
        """ 
        applies derivate of activation function to the layer z. 
        activation : sigmiod
        """
        z = np.array(z)
        sigmoid = self.activateLayer(z)
        return sigmoid*(1-sigmoid)


    def forwardPass(self, layer):
        """
        passes through the network, calculates linear score and then applies sigmoid to it.
        """
        layer = layer.reshape((batchSize, -1, 1))
        for i in range(self.netSize()):
            print(self.W[i].shape, self.B[i].shape, layer.shape)
            layer = np.dot(self.W[i], layer) + self.B[i]
            layer = self.activateLayer(layer, func)
        return layer
    
    
    def backPropagate(self, x, y):
        """
        Backpropagates through the network to calculate gradients. 
        """

        #dW and dB hold the gradients of cost wrt. weights and biases. initilially zero
        dW = []
        dB = []
        for i in range(self.netSize()):
            dW.append(np.zeros(self.W[i].shape))
            dB.append(np.zeros(self.B[i].shape))
        

        outputLayers = [] #Z's
        activeOutputLayers = [] #Sigmoid of Z's 
        n = self.netSize()
        activeOutput = x #input layer 
        activeOutputLayers.append(activeOutput)
        activeOutput = activeOutput.reshape((-1, 1))
        
        for b,w in zip(self.B, self.W):
            output = np.dot(w, activeOutput) + b
            outputLayers.append(output)
            activeOutput = self.activateLayer(output)
            activeOutputLayers.append(activeOutput)

        outputLayers = np.array(outputLayers)
        activeOutputLayers = np.array(activeOutputLayers)
        n = self.netSize()
        dZ = derivateCost(activeOutput, y) * self.derivatieActivateLayer(output)

        dW[n-1] = np.dot(dZ, activeOutputLayers[-2].T)
        dB[n-1] = dZ
        for l in range(2, n):
            dZ = np.dot(self.W[-l+1].T, dZ) * self.derivatieActivateLayer(outputLayers[-l])
            dB[-l] = dZ
            dW[-l] = np.dot(dZ, activeOutputLayers[-l-1].T)

        return (np.array(dB), np.array(dW))
   

    def train(self, train, test, epochs, batchSize, learningRate, validation = None):
        """
        Trains the network using mini-batch gradient descent.
        """

        for i in range(epochs):
            for batch in dataIter(batchSize, train):
                xBatch, yBatch = batch[0], batch[1]
                dW = []
                dB = []
                #print(xBatch)

                #initialize gradients
                for j in range(self.netSize()):
                    dW.append(np.zeros(self.W[j].shape))
                    dB.append(np.zeros(self.B[j].shape))

                for x, y in zip(xBatch, yBatch):
                    #x = x.reshape(14, 1)
                    #obtain gradients by backpropagating
                    gradB, gradW = self.backPropagate(x, y)
                    n = self.netSize()

                    #summing weights and biases for all examples in the mini batch
                    dW = [w + gradw for w, gradw in zip(dW, gradW)]
                    dB = [b + gradb for b, gradb in zip(dB, gradB)]
                    break
                for j in range(self.netSize()):
                    self.W[j] = self.W[j] - (learningRate/batchSize)*dW[j]
                    self.B[j] = self.B[j] - (learningRate/batchSize)*dB[j]
                break
            trainError.append(calculateError(self, train))
            trainAccuracy.append(accuracy(self.forwardPass(train[0]), train[1]))
            testError.append(calculateError(self, test))

            print("Epoch no: {}\n----------".format(i+1))
            print("Train Error: {0} \n Train Accuracy: {1} \n Test Error: {2} \n \n \n".format(trainError[i], trainAccuracy[i], testError[i]))
        plt.plot(range(epochs), trainError, label = 'Train Error')
        plt.plot(range(epochs), testError, label = 'Test Error')
        plt.legend()
        plt.title("Train and Test loss vs epochs")
        plt.show()

        plt.plot(range(epochs), trainAccuracy, label = 'Train Accuracy')
        plt.legend()
        plt.title("Train accuracy vs epochs")
        #plt.show()
        
        print("TEST Accuracy: ", accuracy(self.forwardPass(test[0]), test[1]))


        
network = MyNeuralNet([train[0].shape[1], 30, 10])
network.train(train, test, 10, batchSize, 0.03)

(30, 784) (30, 1) (32, 1470000, 1)


ValueError: shapes (30,784) and (32,1470000,1) not aligned: 784 (dim 1) != 1470000 (dim 1)