Load the Python libraries

In [0]:
import sys,numpy as np
from keras.datasets import mnist

Using TensorFlow backend.


In [0]:
sum(np.array([1,3,4]) < np.array([4,6,3]))

2

Load the MNIST dataset

In [0]:
(xtrain,ytrain),(xtest,ytest)=mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
xtrain.shape

(60000, 28, 28)

Convert the images of size 28*28 into single array of 784 values. This is called as straightening. 

In [0]:
images=xtrain[0:60000].reshape(60000,28*28)/255

In [0]:
labels=ytrain[0:60000]

In [0]:
images.shape

(60000, 784)

Convert the categorical labels values to numeric values using one hot encoding

In [0]:
one_hot_labels=np.zeros((len(labels),10))

In [0]:
for i,l in enumerate(labels):
  one_hot_labels[i][l]=1

labels=one_hot_labels

Reshape the testdata and hot encode the test labels data.

In [0]:
testimages=xtest.reshape(len(xtest),28*28)/255

In [0]:
testlabels=np.zeros((len(ytest),10))

In [0]:
for i,l in enumerate(ytest):
  testlabels[i][l]=1

In [0]:
np.random.seed(1)

Function to return the Relu value

In [0]:
def relu(x):
    return (x>=0)*x

Function to return the derivative of Relu 

In [0]:
def relu2deriv(output):
    return output>=0

Define the learning rate, no. of iterations, hidden layer size, no of labels in the output layer

In [0]:
alpha=0.001
iterations=10
hidden_size1=100
pixels=784
num_labels=10
hidden_size2=100

Set the batch size for mini batch gradient descent

In [0]:
batchsize=3

Initialise the weights for 0 to 1 layer with random values

In [0]:
weights_0_1=0.2*np.random.random((pixels,hidden_size1))-0.1

Initialise the weights for 1 to 2 and 2 to 3 layers with random values

In [0]:
weights_1_2=0.2*np.random.random((hidden_size1,hidden_size2))-0.1
weights_2_3=0.2*np.random.random((hidden_size2,num_labels))-0.1

In [0]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  #

In [0]:
#datagen.fit(xtrain)

ValueError: ignored

Algorithm to train the model

In [0]:
#Iteratively carry out the forward and back propagation
for j in range(iterations):
  error,correct_cnt=(0.0,0)

  #Carry out mini batch gradient descent. Calculate the batches and iterate over it.
  for i in range(int(len(images)/batchsize)):

    #Calculate the batch start and end value to fetch to those images.
    batchstart,batchend=((i*batchsize),((i+1)*batchsize))

    #Select the first layer as the images from the batch size.
    layer_0=images[batchstart:batchend]

    #Create the first layer by multiplying the imput layer with the weights and passing it through Relu activation function. 
    layer_1=relu(np.dot(layer_0,weights_0_1))

    #Drop few neurons from the layers to avoid overfitting the model
    dropout_mask=np.random.randint(2,size=layer_1.shape)
    layer_1*=dropout_mask*2

    #Create the second layer by multiplying layer 1 with its input layer's weights
    layer_2=np.dot(layer_1,weights_1_2)

    #Create the third layer by multiplying layer 2 with its input layer's weights
    layer_3=np.dot(layer_2,weights_2_3)

    #Calculate the error by finding the difference between training final layer data and the calculated final layer data
    error+=np.sum((labels[batchstart:batchend]-layer_3)**2)

    #Conduct the backpropagation
    for k in range(batchsize):

      correct_cnt+=int(np.argmax(layer_3[k:k+1])==np.argmax(labels[batchstart+k:batchstart+k+1]))

      #Calculate the third layer delta by taking the difference of expected and actual output
      layer_3_delta=(labels[batchstart:batchend]-layer_3)/batchsize

      #Calculate the second layer delta by multiplying the third layer delta and weights from second layer
      layer_2_delta=layer_3_delta.dot(weights_2_3.T)

      #Calculate the second layer delta by multiplying the third layer delta and weights from second layer and its Relu derivative. 
      #As Relu is applied on this layer.
      layer_1_delta=layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)

      #Update the weight values
      weights_2_3+=alpha*layer_2.T.dot(layer_3_delta)
      weights_1_2+=alpha*layer_1.T.dot(layer_2_delta)
      weights_0_1+=alpha*layer_0.T.dot(layer_1_delta)


  #Print the ratio of error and correct values for every iteration
  print("I"+str(j))
  print("Error "+str(error/float(len(images)))[0:5])
  print("Correct "+str(correct_cnt/float(len(images))))

I0
Error 0.549
Correct 0.6986166666666667
I1
Error 0.451
Correct 0.7779333333333334
I2
Error 0.427
Correct 0.7961333333333334
I3
Error 0.418
Correct 0.802
I4
Error 0.411
Correct 0.8077833333333333
I5
Error 0.409
Correct 0.8099
I6
Error 0.409
Correct 0.8116333333333333
I7
Error 0.408
Correct 0.813
I8
Error 0.408
Correct 0.8113
I9
Error 0.409
Correct 0.8113666666666667


Function to test the model

In [0]:
if(j%10==0 or j==iterations-1):
  error,correct_cnt=(0.0,0)

  #For the test images as the input, apply the calculated weight values during the training phase to find the labels of the images.
  for i in range(len(testimages)):

    layer_0=testimages[i:i+1]
    layer_1=relu(np.dot(layer_0,weights_0_1))
    layer_2=np.dot(layer_1,weights_1_2)
    layer_3=np.dot(layer_2,weights_2_3)

    #Calculate the difference between the expected and actual values. Calculate the error and correct values predicted.
    error+=np.sum((testlabels[i:i+1]-layer_3)**2)
    correct_cnt+=int(np.argmax(layer_3)==np.argmax(testlabels[i:i+1]))
    


print("shape",testlabels.shape)
print("shape",layer_3.shape)
print("Error "+str(error/float(len(testimages)))[0:5])
print("Correct "+str(correct_cnt/float(len(testimages))))


shape (10000, 10)
shape (1, 10)
Error 0.314
Correct 0.8575
