In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import random

In [None]:
import keras

Importing the libraries to be used.

In [None]:
from keras.datasets import mnist

Using keras only to import the dataset.

In [None]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

print(type(train_x))
print(train_x.shape)
print(type(train_y))
print(train_y.shape)

In [None]:
fig = plt.figure()


for i in range(6):
    fig.add_subplot(1,6,i+1)
    plt.imshow(train_x[i], vmin = 0, vmax = 255, cmap = 'gray')

plt.show()

Displaying first few digits of the training dataset.

In [None]:
train_x_upd = np.zeros(784*train_x.shape[0]).reshape(train_x.shape[0], 784)
for i in range(train_x.shape[0]):
  train_x_upd[i] = train_x[i].reshape(784)
  train_x_upd[i] = train_x_upd[i]/256

print(type(train_x_upd[1]))
print(train_x_upd[1].shape)

In [None]:
test_x_upd = np.zeros(784*test_x.shape[0]).reshape(test_x.shape[0], 784)
for i in range(test_x.shape[0]):
  test_x_upd[i] = test_x[i].reshape(784)
  test_x_upd[i] = test_x_upd[i]/256

print(type(test_x_upd[1]))
print(test_x_upd[1].shape)

In [None]:
print(test_y)

Process the data to convert it from a 28x28 array to a 784x1 array. Further, the input is normalised by dividing the values by 256, so that the input layer activations lie between 0 and 1. 

In [None]:
def output(x):
  out = np.zeros(10)
  out[x] = 1.0
  return out

fig = plt.figure(figsize=(2,2))

plt.imshow(train_x_upd[0].reshape(28,28), cmap='gray', vmin = 0, vmax = 1)
plt.show()

print(train_y[0])
w = output(train_y[0])
print(w)

In [None]:
print(train_y.shape)
output_arr = np.zeros(10*train_y.shape[0])
output_arr = output_arr.reshape(train_y.shape[0], 10)
print(output_arr.shape)

for i in range(train_y.shape[0]):
  output_arr[i] = output(train_y[i])

output_arr
train_y = output_arr.copy()

Convert the output labels into an array of size 10x1 which can be directly compared with the output layer activations.

In [None]:
def sigmoid_num(x):
  if(x > 20):
    return 1
  if(x < -20):
    return 0
  return (1/(1 + np.exp(-x)))

sigmoid = np.vectorize(sigmoid_num)
sigmoid(-5)

In [None]:
def sigmoid_prime_num(x):
  return ((sigmoid_num(x))*(1 - sigmoid_num(x)))

sigmoid_prime = np.vectorize(sigmoid_prime_num)

The sigmoid function is the activation function. It has been vectorised to make it easy to apply on numpy arrays. Also to avoid np.exp overflow issues, sigmoid has been defines specifically for |x|>20.

In [None]:
class Network(object):

    #this class contains all the methods which are needed to train the network.
    #this class also has the weights, biases and neuron activations as attributes.

    def __init__(self, size_layers):
        self.inputlayer = np.zeros(size_layers[0])
        self.hiddenlayer = np.zeros(size_layers[1])
        self.outputlayer = np.zeros(size_layers[2])

        self.hid_z = np.zeros(size_layers[1])
        self.out_z = np.zeros(size_layers[2])

        self.out_error = np.zeros(size_layers[2])
        self.hid_error = np.zeros(size_layers[1])

        self.weights_inp_hid = np.random.normal(size = (size_layers[1],size_layers[0]))
        self.weights_hid_out = np.random.normal(size = (size_layers[2],size_layers[1]))

        self.bias_hid = np.random.normal(size = size_layers[1])
        self.bias_out = np.random.normal(size = size_layers[2])

    def feedfwd(self, a):
        #given an input image, this method calculates the neron activations at each layer of the network.
        self.inputlayer = a

        self.hid_z = np.matmul(self.weights_inp_hid, self.inputlayer) + self.bias_hid
        self.hiddenlayer = sigmoid(self.hid_z)

        self.out_z = np.matmul(self.weights_hid_out, self.hiddenlayer) + self.bias_out
        self.outputlayer = sigmoid(self.out_z)

    def backprop(self, expected_res):
        #this method calculates and returns the various terms of the gradient of the cost function, using the backpropagation algorithm
        self.out_error = np.multiply(self.outputlayer - expected_res, sigmoid_prime(self.out_z))
        self.hid_error = np.multiply(np.matmul(np.transpose(self.weights_hid_out), self.out_error), sigmoid_prime(self.hid_z))

        grad_bias_out = self.out_error
        grad_bias_hid = self.hid_error
        
        grad_weight_inp_hid = np.outer(self.hid_error, self.inputlayer)
        grad_weight_hid_out = np.outer(self.out_error, self.hiddenlayer)

        return grad_weight_inp_hid, grad_weight_hid_out, grad_bias_hid, grad_bias_out

    def gradient_desc(self, sample, learn_rate): 
        #sample is a list of tuples (x,y) where x is the input layer values, and y is the expected output.
        #this method calls backprop() to evaluate the gradient of the cost function. Then it averages the gradient over all inputs in sample and 
        #using this average gradient and the learn_rate, modifies the weights and biases (gradient descent)
        avg_grad_weight_inp_hid = np.zeros(shape = self.weights_inp_hid.shape)
        avg_grad_weight_hid_out = np.zeros(shape = self.weights_hid_out.shape)
        avg_grad_bias_hid = np.zeros(shape = self.bias_hid.shape)
        avg_grad_bias_out = np.zeros(shape = self.bias_out.shape)

        for a in sample:
            self.feedfwd(a[0])
            grad_weight_inp_hid, grad_weight_hid_out, grad_bias_hid, grad_bias_out = self.backprop(a[1])

            avg_grad_weight_inp_hid = (avg_grad_weight_inp_hid*i + grad_weight_inp_hid)/(i+1)
            avg_grad_weight_hid_out = (avg_grad_weight_hid_out*i + grad_weight_hid_out)/(i+1)
            avg_grad_bias_hid = (avg_grad_bias_hid*i + grad_bias_hid)/(i+1)
            avg_grad_bias_out = (avg_grad_bias_out*i + grad_bias_out)/(i+1)

        length = len(sample)

        self.weights_inp_hid -= (learn_rate/length) * avg_grad_weight_inp_hid
        self.weights_hid_out -= (learn_rate/length) * avg_grad_weight_hid_out
        self.bias_hid -= (learn_rate/length) * avg_grad_bias_hid
        self.bias_out -= (learn_rate/length) * avg_grad_bias_out

    def stochastic_batch(self, train_x_upd, train_y, batch_size, epochs, learn_rate, test_x_upd, test_y):
        #instead of taking the average over all of the inputs in the training data, we can divide the training data into random minibatches, 
        #and modify our weights and biases using the average gradient over the inputs of each minibatch.

        #we do this for the given number of epochs. Each epoch involves going over all the inputs of the training set.
        for epoch in range(epochs):
            train = [a for a in zip(train_x_upd, train_y)]
            random.shuffle(train)
            batches = [train[k: k+batch_size] for k in range(0, train_x_upd.shape[0], batch_size)]
            for batch in batches:
                self.gradient_desc(batch, learn_rate)

            evaluate = 0

            for i in range(test_x_upd.shape[0]):
                self.feedfwd(test_x_upd[i])
                if(test_y[i] == self.outputlayer.argmax()): evaluate+=1
            print("Epoch no. {0} done. Accuracy {1} pc".format(epoch, evaluate/test_x.shape[0]*100))

        

In [None]:
net = Network(size_layers=[784, 20, 10])

net.stochastic_batch(train_x_upd, train_y, 600, 30, 3000.0, test_x_upd, test_y)

Epoch no. 0 done. Accuracy 9.69 pc


In [None]:
net.stochastic_batch(train_x_upd, train_y, 60, 30, 300.0, test_x_upd, test_y)

In [None]:
net.stochastic_batch(train_x_upd, train_y, 10, 30, 30.0, test_x_upd, test_y)

In [None]:
net.stochastic_batch(train_x_upd, train_y, 60, 30, 300.0, test_x_upd, test_y)

In [None]:
net.stochastic_batch(train_x_upd, train_y, 10, 100, 90.0, test_x_upd, test_y)

In [None]:
np.savetxt("weights_inp_hid.csv", net.weights_inp_hid, delimiter= ',')
np.savetxt("weights_hid_out.csv", net.weights_hid_out, delimiter= ',')
np.savetxt("bias_hid.csv", net.bias_hid, delimiter= ',')
np.savetxt("bias_out.csv", net.bias_out, delimiter= ',')

In [None]:
evaluate = 0

for i in range(train_x_upd.shape[0]):
    net.feedfwd(train_x_upd[i])
    if(train_y[i].argmax() == net.outputlayer.argmax()): evaluate+=1
print("Accuracy {0} pc on training dataset".format(evaluate/train_x_upd.shape[0]*100))
