In [58]:
import numpy as np
import h5py
import time
import copy
from random import randint

#READING THE DATA
MNIST_data = h5py.File('MNISTdata.hdf5', 'r')
x_train = np.float32(MNIST_data['x_train'][:] )
y_train = np.int32(np.array(MNIST_data['y_train'][:,0]))
x_test = np.float32( MNIST_data['x_test'][:] )
y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
MNIST_data.close()

In [59]:
x_train = x_train.reshape(60000,28,28)
x_test = x_test.reshape(10000,28, 28)

In [60]:
def convolve2d(image, kernel):
    # This function which takes an image and a kernel 
    # and returns the convolution of them
    # Args:
    #   image: a numpy array of size [image_height, image_width].
    
    # Returns:
    #   a numpy array of size [image height,image width,no. of channels]
    
    output = np.zeros((image.shape[0]-kernel.shape[0]+1,image.shape[1]-kernel.shape[1]+1,kernel.shape[2])) # convolution output

    for x in range(image.shape[0]-kernel.shape[0]+1):     # Loop over every pixel of the image
        for y in range(image.shape[1]-kernel.shape[1]+1):
            # element-wise multiplication of the kernel and the image
            output[x,y]=np.sum(kernel * image[x:x+kernel.shape[0],y:y+kernel.shape[1],:], axis=(0,1))   
            #output[x,y]
    return output

In [61]:
# kernel(filter): a numpy array of size [kernel_height, kernel_width, no. of channels].
def initialize_filter(ky, kx, C):
    kernel = np.random.randn(ky,kx,C) / np.sqrt(28)
    return kernel

In [62]:
#initializing filter parameters
ky = 3
kx = 3
C = 5

In [63]:
#initializing parameters
W = np.random.randn(10,28 - ky +1, 28-kx +1, C) / np.sqrt((28-ky+1) * (28-kx + 1) * (C))
b = np.zeros(10)
Kernel = initialize_filter(ky, kx, C)

In [64]:
#initialize epoch and learning rate
EPOCHS = 10
LR = 0.01

In [65]:
#training
for epoch in range(EPOCHS):

    #Decreasing the LR after every 5 epochs
    if (epoch > 4):
        LR = 0.001
    if (epoch > 9):
        LR = 0.0001
    if (epoch > 14):
        LR = 0.00001
        
    for each_image in range(len(x_train)):
        n_random = randint(0, len(x_train)-1) #random number bw 0 and len(x_train) - 1
        x = x_train[n_random] 
        
        x_t = x.reshape(28,28,1)
        
    
        #Foward step
        Z = convolve2d(x_t, Kernel) #convolution
        H = np.maximum(Z,0)
        U = np.sum(W*H, axis=(1,2,3)) + b
        
        m = U - np.max(U) #Trick to stablize exponentiation
        y_hat = np.exp(m)/np.sum(np.exp(m)) #softmax layer
    
        #Calculating loss
        loss = - np.log(y_hat[y_train[n_random]]) #Cross entropy
      
        #Calculating gradient
        e_Y = np.zeros(10)
        e_Y[y_train[n_random]] = 1.0 #One hot encoding of y
        db = - (e_Y - y_hat) #dL/db
        dW = db.reshape(10,1,1,1) * H    
        delta = np.sum(db.reshape(10,1,1,1) * W, axis=0)
       
        dZ = ((Z > 0) * 1.0)
        dk = convolve2d(x_t, dZ * delta)
       
    
        #Updating parameters
        b = b - LR * db
        Kernel = Kernel - LR * dk
        W = W - LR * dW
    
    #Accuracy
    total_correct = 0
    for n in range(len(x_test)):
        y = y_test[n]
        image = x_test[n]
        Z = convolve2d(image.reshape(28,28,1), Kernel)
        H = np.maximum(Z,0)
        U = np.sum(W*H, axis=(1,2,3)) + b
        m = U - np.max(U) #stablize exponentiation
        y_hat = np.exp(m)/np.sum(np.exp(m))
        prediction = np.argmax(y_hat)
        if (prediction == y):
            total_correct += 1
    print("Test accuracy after epoch no. " + str(epoch+1) + ": " + str(total_correct/np.float(len(x_test))))

Test accuracy after epoch no. 1: 0.9626
Test accuracy after epoch no. 2: 0.9691
Test accuracy after epoch no. 3: 0.9734
Test accuracy after epoch no. 4: 0.9752
Test accuracy after epoch no. 5: 0.973
Test accuracy after epoch no. 6: 0.9791
Test accuracy after epoch no. 7: 0.98
Test accuracy after epoch no. 8: 0.98
Test accuracy after epoch no. 9: 0.9792
Test accuracy after epoch no. 10: 0.9793
