# Neural Network 
## with MNIST Dataset

In [0]:
import numpy as np
import keras
%matplotlib inline
import matplotlib.pyplot as plt
import random


In [2]:
# Load Data
(train_x_orig, train_y_orig), (test_x_orig, test_y_orig) = keras.datasets.mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [3]:
# Data Values

m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y_orig.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y_orig.shape))

Number of training examples: 60000
Number of testing examples: 10000
Each image is of size: (28, 28, 3)
train_x_orig shape: (60000, 28, 28)
train_y shape: (60000,)
test_x_orig shape: (10000, 28, 28)
test_y shape: (10000,)


In [0]:
# Prepare Training Labels

digits = 10
examples = train_y_orig.shape[0]

train_y_orig = train_y_orig.reshape(1, examples)

train_y = np.eye(digits)[train_y_orig.astype('int32')]
train_y = train_y.T.reshape(digits, examples)
#print(train_y)

In [0]:
# Prepare Test Labels

m_test = test_y_orig.shape[0]
test_y_orig = test_y_orig.reshape(1, m_test)

test_y = np.eye(digits)[test_y_orig.astype('int32')]
test_y = test_y.T.reshape(digits, m_test)
#print(test_y)

## NN

In [6]:
#Model data For Neural Network

# Convert into shape of (784,60000)
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T  
# Convert into shape of (784,10000)
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255.
test_x = test_x_flatten / 255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_y = train_y
test_y = test_y
print ("train_y's shape: " + str(train_y.shape))
print ("test_y's shape: " + str(test_y.shape))



train_x's shape: (784, 60000)
test_x's shape: (784, 10000)
train_y's shape: (10, 60000)
test_y's shape: (10, 10000)


In [0]:
# Initializing hyperparameters
n_x = train_x.shape[0]
n_h = 50
layers = [n_x, n_h]
learning_rate = 1
output = 10
m = train_x.shape[1]
epochs = 1000

In [0]:
class NeuralNet:
    def __init__(self, epochs=1000, layers = [784, 50], output=10):
        # Intialize weights and biasis
        self.epochs = epochs
        self.layers = layers
        self.output = output
        self.W1 = np.random.randn(layers[1], layers[0])
        self.b1 = np.zeros((layers[1], 1))
        self.W2 = np.random.randn(output, layers[1])
        self.b2 = np.zeros((output, 1))
        self.costs = []
    
    
    # activation Functions
    def sigmoid(self,value):
        return 1.0 / ( 1.0+np.exp(-value))

    def sigmoid_prime(self,value):
        return self.sigmoid(value) * (1- self.sigmoid(value))

    def softmax(self,value):
        return np.exp(value) / (np.sum(np.exp(value), axis = 0))

    def sofmax_prime(self,value):
        return self.softmax(value) * (1- self.softmax(value))
    
    # Cost Function
    def compute_cost(self,Y, Y_hat):
        cost_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
        m = Y.shape[1]
        cost = -(1/m) * cost_sum
        return cost

    # Running the Neural Network
    def fit(self, train_x, train_y):
        m = train_x.shape[1]
        for i in range(self.epochs):
            # Forward Propogation
            Z1 = np.matmul(self.W1,train_x) + self.b1
            A1 = self.sigmoid(Z1)     # Activation Sigmoid
            Z2 = np.matmul(self.W2,A1) + self.b2
            A2 = self.softmax(Z2)     # Activation Softmax
            
            cost = self.compute_cost(train_y, A2)
            
            # Back Propogation
            dZ2 = A2 - train_y
            dW2 = (1./m) * np.matmul(dZ2, A1.T)
            db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

            dA1 = np.matmul(self.W2.T,dZ2)
            dZ1 = dA1 * self.sigmoid_prime(Z1)
            dW1 = (1./m) * np.matmul(dZ1, train_x.T)
            db1 = (1./m) * np.sum(dZ1, axis = 1, keepdims=True)

            # Updating Weights and biasis
            self.W2 = self.W2 - learning_rate * dW2
            self.b2 = self.b2 - learning_rate * db2
            self.W1 = self.W1 - learning_rate * dW1
            self.b1 = self.b1 - learning_rate * db1

            if (i % 100 == 0):
                print("Epoch", i, "cost: ", cost)

        print("Final cost:", cost)
    
    
    def predict(self, test_x, test_y):
        Z1 = np.matmul(self.W1, test_x) + self.b1
        A1 = self.sigmoid(Z1)
        Z2 = np.matmul(self.W2, A1) + self.b2
        A2 = self.softmax(Z2)
        predictions = np.argmax(A2, axis=0)
        labels = np.argmax(test_y, axis=0)

        correct = 0 
        for i in range(labels.size):
            if predictions[i] == labels[i]:
                correct +=1
        print('Test Accuracy : {}'.format(100*correct/labels.size))
        
        return predictions
    

In [19]:
nn = NeuralNet()
nn.fit(train_x, train_y)

Epoch 0 cost:  6.89133706375447
Epoch 100 cost:  0.756986723610496
Epoch 200 cost:  0.5708993450546843
Epoch 300 cost:  0.49369314952488197
Epoch 400 cost:  0.4475272593978021
Epoch 500 cost:  0.41523351480846243
Epoch 600 cost:  0.3905839974564431
Epoch 700 cost:  0.37074442312719047
Epoch 800 cost:  0.35419709447735975
Epoch 900 cost:  0.34003993350927725
Final cost: 0.3278180022413853


In [21]:
nn.predict(test_x, test_y)

Test Accuracy : 90.24


array([7, 2, 1, ..., 4, 5, 6])

## CNN

In [0]:
def relu(x):
    return np.maximum(0, x)


def sigmoid(x):
    sig = 1 / (1 + np.exp(-x))
    return sig


def dSigmoid(x):
    dsig = sigmoid(x) * (1 - sigmoid(x))
    return dsig


def softmax(x):
    maxs = np.amax(x)
    expScores = np.exp(x - maxs)
    out = expScores / np.sum(expScores, axis=0, keepdims=True)
    return out


def AutoDiff_CL(partialL_Z, prevOut, summedVals):
    dB = partialL_Z

    dW = np.dot(partialL_Z, prevOut.transpose())

    return dB, dW, partialL_Z


def AutoDiff_FC(partialL_Z, prevWeights, prevOut, summedValues):
    deltaSig = dSigmoid(summedValues)

    partialLZ = np.dot(prevWeights.transpose(), partialL_Z) * deltaSig

    dB = partialLZ

    d0, d1, d2 = prevOut.shape
    prevOut = prevOut.reshape((1, d0 * d1 * d2))
    dW = np.dot(partialLZ, prevOut)
    dW = dW.reshape((partialLZ.shape[0], d0, d1, d2))

    return dB, dW, partialLZ


def AutoDiff_PL(partialL_Z, prevWeights, prevOut, maxIndices, poolSize, output):
    x, y, z = output.shape

    a, b, c, d = prevWeights.shape

    prevWeights = prevWeights.reshape((a, b * c * d))

    output = output.reshape((x * y * z, 1))

    maxIndices = maxIndices.reshape((x, y * z, 2))

    sp = 1

    partialL_Z = np.dot(prevWeights.transpose(), partialL_Z) * sp

    partialL_Z = partialL_Z.reshape((x, y * z))
    output = output.reshape((x, y * z))

    depth, height, width = prevOut.shape

    partialNew = np.zeros((depth, height, width))

    for d in range(depth):

        row = 0
        col = 0

        for i in range(maxIndices.shape[1]):

            section = prevOut[d][row:row + poolSize[0], col:col + poolSize[0]]

            partialPool = getPartialSec(output[d][i], partialL_Z[d][i], section)

            partialNew[d][row:row + poolSize[0], col:col + poolSize[0]] = partialPool

            col += poolSize[1]

            if col >= width:
                col = 0
                row += poolSize[1]

    return partialNew


def AutoDiff_ConvL(partialL_Z, prevWeights, stride, im, summedValues):
    numFilters, depth, filterSize, filterSize = prevWeights.shape

    deltaB = np.zeros((numFilters, 1))
    deltaW = np.zeros((prevWeights.shape))

    convOutNum = (partialL_Z.shape[1]) * (partialL_Z.shape[2])

    partialL_Z = partialL_Z.reshape((partialL_Z.shape[0], partialL_Z.shape[1] * partialL_Z.shape[2]))

    for i in range(numFilters):

        row = 0
        col = 0

        for j in range(convOutNum):

            sec = im[:, row:row + filterSize, col:col + filterSize]

            deltaW[i] += sec * partialL_Z[i][j]

            deltaB[i] += partialL_Z[i][j]

            col += stride

            if (col + filterSize) - stride >= im.shape[2]:
                col = 0
                row += stride

    return deltaB, deltaW


def getPartialSec(val, partialL_Z, section):
    dim1, dim2 = section.shape

    section = section.reshape((dim1 * dim2))

    partialSection = np.zeros((section.shape))

    for i in range(len(section)):

        num = section[i]

        if num < val:

            partialSection[i] = 0
        else:

            partialSection[i] = partialL_Z

    return partialSection.reshape((dim1, dim2))


In [0]:
class CNN(object):

    def __init__(self, inputShape, layers):

        self.inputShape = inputShape

        layerClasses = {'Convolution': ConvolutionalLayer,
                        'Pooling': PoolingLayer,
                        'fullyConnected': FullyConnectedLayer,
                        'outputLayer': ClassificationLayer}

        CNNLayers = []
        shape = inputShape
        for i in range(len(layers)):
            layerName = list(layers[i].keys())[0]

            layerClass = layerClasses[layerName]

            layerArguments = list(layers[i].values())[0]

            currentLayer = layerClass(shape, **layerArguments)

            shape = currentLayer.output.shape

            CNNLayers.append(currentLayer)

        self.layers = CNNLayers

        self.weightShapes = [currentLayer.weights.shape for currentLayer in self.layers if
                             type(currentLayer).__name__ != 'PoolingLayer']
        self.biasShapes = [currentLayer.biases.shape for currentLayer in self.layers if
                           type(currentLayer).__name__ != 'PoolingLayer']

    def forwardPass(self, im):

        previousOutput = im

        for currentLayer in self.layers:

            inputData = previousOutput

            className = type(currentLayer).__name__

            if className == 'ConvolutionalLayer':
                currentLayer.convolution(inputData)

            if className == 'PoolingLayer':
                currentLayer.pool(inputData)

            if className == 'FullyConnectedLayer':
                currentLayer.forwardPass(inputData)

            if className == 'ClassificationLayer':
                currentLayer.classify(inputData)

            previousOutput = currentLayer.output

        finalOutput = previousOutput
        return finalOutput

    def train(self, trainingData, batchSize, learningRate, numEpochs, lamdaVal=None):

        trainSize = len(trainingData)

        meanError = []

        epochNum = 1

        numTrainingRuns = len(trainingData) / batchSize * numEpochs * 1.0
        currentRun = 1

        for currentEpoch in range(numEpochs):
            if epochNum %100 == 0:
                print('Starting Epoch ', epochNum, ' of ', numEpochs)

            random.shuffle(trainingData)

            batches = [trainingData[i:i + batchSize] for i in range(0, trainSize, batchSize)]

            losses = 0

            batchNum = 1

            cn = 0
            for currentBatch in batches:

                batchNum += 1
                currentRun += 1
                cn += 1

                batchLoss = self.updateLoss(currentBatch, learningRate)
                losses = losses + batchLoss

            meanError.append(round(losses / cn, 2))
            epochNum += 1

        print('Done Training')

    def updateLoss(self, batch, LearningRate):

        derivW = [np.zeros(shape) for shape in self.weightShapes]
        derivB = [np.zeros(shape) for shape in self.biasShapes]

        batchLength = len(batch)

        for image, label in batch:
            im = image.reshape((1, 28, 28))

            flag = self.forwardPass(im)

            finalO, partialB, partialW = self.backpropogate(im, label)

            derivB = [nb + db for nb, db in zip(derivB, partialB)]
            derivW = [nw + dw for nw, dw in zip(derivW, partialW)]

        error = crossELoss(label, finalO)

        ind = 0

        wIndex = []

        for layer in self.layers:

            if type(layer).__name__ != 'PoolingLayer':
                wIndex.append(ind)

            ind += 1

        for iterationNum, (lnw, lnb) in enumerate(zip(derivW, derivB)):
            layer = self.layers[wIndex[iterationNum]]

            layer.weights -= LearningRate * lnw / batchLength
            layer.biases -= LearningRate * lnb / batchLength

        return error

    def backpropogate(self, im, label):

        derivW = [np.zeros(shape) for shape in self.weightShapes]
        derivB = [np.zeros(shape) for shape in self.biasShapes]

        prediction = self.layers[len(self.layers) - 1].output

        partialL_Z = (prediction - label)

        for layerNum in range(len(self.layers) - 1, -1, -1):

            layer2 = layerNum
            layer1 = layerNum - 1

            currentLayer = self.layers[layer2]

            if layer1 > -1:
                prevOut = self.layers[layer1].output


            elif layer1 == -1:
                prevOut = im

            if layer1 > -1: layer1Name = type(self.layers[layer1]).__name__
            if layer1 == -1: layer1Name = 'image'
            layer2Name = type(self.layers[layer2]).__name__

            if layer1Name == 'FullyConnectedLayer' and layer2Name == 'ClassificationLayer':
                deltaB, deltaW, partialL_Z = AutoDiff_CL(partialL_Z, prevOut,
                                                         currentLayer.summedValues)

            if layer1Name == 'PoolingLayer' and layer2Name == 'FullyConnectedLayer':
                deltaB, deltaW, partialL_Z = AutoDiff_FC(partialL_Z, prevWeights,
                                                         prevOut, currentLayer.summedValues)

            if layer1Name == 'ConvolutionalLayer' and layer2Name == 'PoolingLayer':
                partialL_Z = AutoDiff_PL(partialL_Z, prevWeights, prevOut,
                                         currentLayer.maxIndices, currentLayer.poolSize, currentLayer.output)

            if layer1Name == 'image' and layer2Name == 'ConvolutionalLayer':
                prevWeights = currentLayer.weights

                deltaB, deltaW = AutoDiff_ConvL(partialL_Z, prevWeights, currentLayer.stride, im,
                                                currentLayer.outputValues)

            if not (layer1Name == 'ConvolutionalLayer' and layer2Name == 'PoolingLayer'):

                if layer1 == -1:
                    layer1 = 0

                derivB[layer1], derivW[layer1] = deltaB, deltaW
                prevWeights = currentLayer.weights

        return self.layers[-1].output, derivB, derivW


class ConvolutionalLayer(object):

    def __init__(self, inputShape, filterSize, numFilters, stride):

        self.depth = inputShape[0]
        self.height = inputShape[1]
        self.width = inputShape[2]

        self.filterSize = filterSize
        self.stride = stride
        self.numFilters = numFilters
        self.padding = 0

        self.weights = np.random.randn(numFilters, self.depth, filterSize, filterSize)
        self.biases = np.random.rand(self.numFilters, 1)

        self.outputRows = int((self.height - self.filterSize + 2 * self.padding) / self.stride + 1)
        self.outputCols = int((self.width - self.filterSize + 2 * self.padding) / self.stride + 1)

        self.output = np.zeros((self.numFilters, self.outputRows, self.outputCols))
        self.outputValues = np.zeros((self.numFilters, self.outputRows, self.outputCols))

        print('Convolutional Layer Initialized')

    def convolution(self, inputData):

        self.outputValues = self.outputValues.reshape((self.numFilters, self.outputRows * self.outputCols))
        self.output = self.output.reshape((self.numFilters, self.outputRows * self.outputCols))

        outputLength = self.outputRows * self.outputCols

        for i in range(self.numFilters):
            col = 0
            row = 0

            for j in range(outputLength):

                dotProduct = inputData[:, row:row + self.filterSize, col:col + self.filterSize] * self.weights[i]

                sumValue = np.sum(dotProduct)

                self.outputValues[i][j] = sumValue + self.biases[i]

                self.output[i][j] = sigmoid(self.outputValues[i][j])

                col += self.stride

                if col + self.filterSize - self.stride >= self.width:
                    col = 0
                    row += self.stride

        self.outputValues = self.outputValues.reshape((self.numFilters, self.outputRows, self.outputCols))
        self.output = self.output.reshape((self.numFilters, self.outputRows, self.outputCols))


class PoolingLayer(object):

    def __init__(self, inputShape, poolSize):

        self.depth = inputShape[0]
        self.height = inputShape[1]
        self.width = inputShape[2]

        self.poolSize = poolSize
        self.stride = 2

        self.outputHeight = (self.height - self.poolSize[0]) / self.stride + 1
        self.outputWidth = (self.width - self.poolSize[0]) / self.stride + 1

        self.outputHeight = int(self.outputHeight)
        self.outputWidth = int(self.outputWidth)
        self.output = np.empty((self.depth, self.outputHeight, self.outputWidth))

        self.maxIndices = np.empty((self.depth, self.outputHeight, self.outputWidth, 2))

        print('Pooling Layer Initialized')

    def pool(self, inputData):

        self.Length = self.outputHeight * self.outputWidth

        self.output = self.output.reshape((self.depth, self.Length))
        self.maxIndices = self.maxIndices.reshape((self.depth, self.Length, 2))

        for i in range(self.depth):
            row = 0
            col = 0

            for j in range(self.Length - 1):

                section = inputData[i][row:row + self.poolSize[0], col:col + self.poolSize[0]]

                maxVal = np.amax(section)
                self.output[i][j] = maxVal

                maxIndex = np.where(section == np.max(section))
                if len(maxIndex[0]) > 1:
                    maxIndex = [maxIndex[0][0], maxIndex[1][0]]

                maxIndex = int(maxIndex[0]) + row, int(maxIndex[1]) + col

                self.maxIndices[i][j] = maxIndex

                col += self.stride

                if col >= self.width:
                    col = 0
                    row += self.stride

        self.output = self.output.reshape((self.depth, self.outputHeight, self.outputWidth))
        self.maxIndices = self.maxIndices.reshape((self.depth, self.outputHeight, self.outputWidth, 2))


class SingleLayer(object):

    def __init__(self, inputShape, outputNum):
        self.output = np.ones((outputNum, 1))
        self.summedValues = np.ones((outputNum, 1))


class FullyConnectedLayer(SingleLayer):

    def __init__(self, inputShape, numOutput):
        super(SingleLayer, self).__init__()

        self.output = np.ones((numOutput, 1))
        self.summedValues = np.ones((numOutput, 1))

        self.depth = inputShape[0]
        self.width = inputShape[1]
        self.height = inputShape[2]

        self.numOutput = numOutput

        self.weights = np.random.randn(self.numOutput, self.depth, self.height, self.width)
        self.biases = np.random.rand(self.numOutput, 1)

        print('Fully Connected Layer Initialized')

    def forwardPass(self, inputData):
        self.weights = self.weights.reshape((self.numOutput, self.depth * self.height * self.width))
        inputData = inputData.reshape((self.depth * self.height * self.width, 1))

        self.summedValues = np.dot(self.weights, inputData) + self.biases

        self.output = sigmoid(self.summedValues)

        self.weights = self.weights.reshape((self.numOutput, self.depth, self.height, self.width))


class ClassificationLayer(SingleLayer):

    def __init__(self, inputShape, numClasses):
        super(SingleLayer, self).__init__()

        self.output = np.ones((numClasses, 1))
        self.summedValues = np.ones((numClasses, 1))

        self.numClasses = numClasses

        self.weights = np.random.randn(self.numClasses, inputShape[0])
        self.biases = np.random.randn(self.numClasses, 1)

        print('Classification Layer Initialized')

    def classify(self, data):
        self.summedValues = np.dot(self.weights, data) + self.biases

        self.output = softmax(self.summedValues)


def getAccuracy(net, testData):
    print('Begin Testing')
    numCorrect = 0
    for i in range(len(testData)):
        im = testData[i][0].reshape(1, 28, 28)
        label = testData[i][1]
        prediction = np.argmax(net.forwardPass(im))

        if prediction == label: numCorrect += 1

        if (i + 1) % int(0.1 * len(testData)) == 0:
            print('{0}% Completed'.format(int(float(i + 1) / len(testData) * 100)))

    print('Accuracy: ', numCorrect / len(testData) * 100)


def loss(desired, final):
    return .5 * np.sum(1.0 * desired - final) ** 2


def crossELoss(desired, final):
    return -np.sum(desired * np.log(final + .000000000000000000000000000001))


In [0]:
trainData = train_x_orig
testData = test_x_orig

In [0]:
def oneHot(i):
    vec = np.zeros((10,1))
    vec[i] = 1
    return vec



trainLabel = [oneHot(x) for x in train_y_orig]
testLabel = test_y_orig

In [0]:
training = [trainData,trainLabel]
testing = [testData,testLabel]
training = list(zip(trainData,trainLabel))
testing = list(zip(testData,testLabel))


In [77]:
smax = 1
x, y = training[0][0].shape
inputShape = (1, x, y)

layers = [
    {'Convolution': {'filterSize': 5, 'stride': 1, 'numFilters': 20}},
    {'Pooling': {'poolSize': (2, 2)}},
    {'fullyConnected': {'numOutput': 50}},
    {'outputLayer': {'numClasses': 10}}
]

test = CNN(inputShape, layers)
print(' ')
print('Model Initialized')

batchSize = 10
if smax:
    learningRate = .1
else:
    learningRate = 1.5
numEpochs = 200
test.train(training, batchSize, learningRate, numEpochs)


Convolutional Layer Initialized
Pooling Layer Initialized
Fully Connected Layer Initialized
Classification Layer Initialized
 
Model Initialized


  


Starting Epoch  100  of  200
Starting Epoch  200  of  200
Done Training


In [78]:
def getAccuracy(net, testData):
    print('Begin Testing')
    numCorrect = 0

    for i in range(len(testData)):

        im = testData[i][0].reshape(1, 28, 28)
        label = testData[i][1]

        prediction = np.argmax(net.forwardPass(im))
#         print(prediction)
#         print(label[i])

        if prediction == label[i]: numCorrect += 1

    print('Accuracy: {}%'.format(numCorrect / len(testData) * 100)) 


getAccuracy(test, testing)


Begin Testing
9
7
Accuracy: 79.84%


  
