In [1]:
import numpy as np
from mnist import MNIST
import os
import h5py
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [2]:
def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

In [3]:
def flattening(X):
    (a,b,c,d) = X.shape
    X = np.reshape(X,(a,b*c*d))
    return X.T

In [4]:
def inv_flattening(X,a,b,c,d):
    X = np.reshape(X,(a,b,c,d))
    return X

In [5]:
def relu(z):
    return np.maximum(0,z)

In [6]:
def relu_back(dA,Z):
    Z[Z>0] = 1
    Z[Z<0] = 0
    return np.multiply(dA,Z)

In [7]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [8]:
def softmax(z):
    A = np.exp(z)
    B = np.sum(np.exp(z))
    return A/B

In [9]:
def zero_pad(X, pad):
    X_pad = np.pad(X,((0,0),(pad,pad),(pad,pad),(0,0)), 'constant', constant_values = (0,0))
    return X_pad

In [10]:
def create_mask_from_window(x):

    mask = (x == np.max(x))
    
    return mask

In [11]:
def distribute_value(dz, shape):
    
    (n_H, n_W) = shape
    
    average = dz/(n_H*n_W)
    
    a = np.full((n_H,n_W), average)
    
    return a

In [12]:
def conv_single_step(a_slice_prev, W, b):

    s = np.multiply(a_slice_prev,W)
    
    Z = np.sum(s)
    
    Z = Z+b

    return Z

In [13]:
def costfun(Y,Y_hat):
    (m,n) = Y.shape
    A = np.multiply(Y,np.log(Y_hat))
    cost1 = np.sum(A, axis = 0)
    cost = (-1/m)*np.sum(cost1)
    return cost

In [14]:
def batchnormalization(z):
    mean = np.mean(z)
    std = np.std_dev(z)
    z = np.divide((z-mean),std)
    return z

In [15]:
def conv_forward(A_prev, W, b, hparameters):
      
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    (f, f, n_C_prev, n_C) = W.shape
    
    stride = hparameters['stride']
    pad = hparameters['pad']
    
    n_H = int((n_H_prev-f+2*pad)/stride) + 1 
    n_W = int((n_W_prev-f+2*pad)/stride) + 1
    
    Z = np.zeros((m,n_H,n_W,n_C))
    
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):                               
        a_prev_pad = A_prev_pad[i,:,:,:]                               
        for h in range(n_H):                           
            for w in range(n_W):                       
                for c in range(n_C):                   
                    
                    vert_start = h *stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
                    
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, W[:,:,:,c], b[:,:,:,c])
                                        
       
    assert(Z.shape == (m, n_H, n_W, n_C))
    
    cache = (A_prev, W, b, hparameters)
    
    return Z, cache

In [16]:
# Backward convolution 
def conv_backward(dZ, cache):
    
    (A_prev, W, b, hparameters) = cache
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    (f, f, n_C_prev, n_C) = W.shape
    
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    
    (m, n_H, n_W, n_C) = dZ.shape
    
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1,1,1,n_C))

    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                       
        
        a_prev_pad = A_prev_pad[i,:,:,:]
        da_prev_pad = dA_prev_pad[i,:,:,:]
        
        for h in range(n_H):                   
            for w in range(n_W):               
                for c in range(n_C):           
                    
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_slice = A_prev_pad[i,vert_start:vert_end,horiz_start:horiz_end,:]

                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad,pad:-pad,:]
    
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    
    return dA_prev, dW, db

In [17]:
def pool_forward(A_prev, hparameters, mode = "max"):
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    f = hparameters["f"]
    stride = hparameters["stride"]
    
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    
    A = np.zeros((m, n_H, n_W, n_C))              
    
    for i in range(m):                         
        for h in range(n_H):                     
            for w in range(n_W):                 
                for c in range (n_C):            
                    
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_prev_slice = A_prev[vert_start:vert_end,horiz_start:horiz_end,:]
                    
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_prev_slice)
    
        
    cache = (A_prev, hparameters)
    
    assert(A.shape == (m, n_H, n_W, n_C))
    
    return A, cache

In [18]:
#Backward pooling 
def pool_backward(dA, cache, mode = "max"):
    
    (A_prev, hparameters) = cache
    
    stride = hparameters['stride']
    f = hparameters['f']
    
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    
    dA_prev = np.zeros((A_prev.shape))
    
    for i in range(m):                       
        
        a_prev = A_prev[i,:,:,:]
        
        for h in range(n_H):                   
            for w in range(n_W):               
                for c in range(n_C):           
                    
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    if mode == "max":
                        
                        a_prev_slice = a_prev[vert_start:vert_end,horiz_start:horiz_end,c]
                        
                        mask = create_mask_from_window(a_prev_slice)
        
                        dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += mask * dA[i,h,w,c]
                        
                    elif mode == "average":
                        
                        da = dA[i,h,w,c]
                
                        shape = (f,f)
                        
                        dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(da, shape)
    
    assert(dA_prev.shape == A_prev.shape)
    
    return dA_prev

In [30]:
np.random.seed(1)
mndata = MNIST('/home/shabaz/Documents/python-mnist/')
X_train, y_train = mndata.load_training() 

X_train = np.array(X_train)  #Data Scaling 
y_train = np.array(y_train)

X_train = X_train.reshape(X_train.shape[0],28,28,1)

X = X_train[:500,:,:,:]
y_train = y_train.reshape(X_train.shape[0],1)
Y = y_train[:500,:]
Y = convert_to_one_hot(Y, 10).T
(n,l,m,o) = X.shape

cost = []

##################### Hyperparameter declaration ############
# for Forward convolution
W = np.random.randn(3,3,1,8)
b = np.random.randn(1,1,1,8)

alpha = 0.001   # for Convolotion part
alpha1 = 0.001  # for first layer of neural network  (RelU)
alpha2 = 0.001  # for second layer of neural network (Softmax)

In [33]:
for i in range(100):
    # for Forward convolution
    hparameters = {"pad" : 1,"stride": 1}
    Z, cache_conv = conv_forward(X, W, b, hparameters)

    # Implementing RelU 
    R1 = relu(Z)

    # Pooling 
    hparameters = {"stride" : 2, "f": 2}
    A, cache = pool_forward(R1, hparameters,mode = "max")
    (a,l,m,o) = A.shape

    ########## Hyperparameter for forward and backward ########
    # Weight for neural layers
    W1 = np.random.randn(l*m*o,10)*0.01
    W2 = np.random.randn(10,10)*0.01

    # bias for neural layers
    b1 = np.random.randn(1,1)*0.01
    b2 = np.random.randn(1,1)*0.01

    # Flattening
    S = flattening(A)
    
    ###################### Forward Prop #######################
    ###################### Layer 1 ############################
    Z1 = np.dot(S.T,W1)+b1
    A1 = sigmoid(Z1)
    
    ###################### Layer 2 ############################
    Z2 = np.dot(A1,W2)+b2
    A2 = softmax(Z2)

    ##############1####### Backward Prop #######################
    ################### Layer 2 ###############################
    dZ2 = dZ2 = A2 - Y  
    dW2 = (1/n)*np.dot(A1.T,dZ2)
    W2 = W2 - alpha2*dW2
    db2 = (1/n)*np.sum(dZ2)
    b2 = b2 - alpha2*db2

    ###################### Layer 1 ############################
    dA1 = np.dot(dZ2,W2)
    dZ1_temp = np.dot(dZ2,W2)
    dZ1 = relu_back(dZ1_temp,Z1)
    dW1 = (1/n)*np.dot(S,dZ1)
    W1 = W1 - alpha1*dW1
    db1 = (1/n)*np.sum(dZ1)
    b1 = b1 - alpha1*db1

    ##################### Cost calculation ####################
    cost.append(costfun(Y,A2))
    print(cost)
    
    ##################### Backward pooling ####################
    dS_prev = np.dot(W1,dZ1.T)
    dS = inv_flattening(dS_prev,a,l,m,o)
    dA_prev = pool_backward(dS, cache, mode = "max")

    #################### Backward RelU ########################
    dR1 = relu_back(dA_prev,R1)

    #################### Backwar convolution ##################
    dA, dW, db = conv_backward(Z, cache_conv)

    #################### Parameters Update ####################
    W = W - alpha*dW
    b = b - alpha*db

  


[8.51664664546852, 8.517899848430664, 8.518427967178082, 8.517070559057213, 8.516985984461927, 8.51798951597624, 8.518502207241127, 8.51660700077845, 8.517707706300577, 8.517658234270971, 8.516197042821839]
[8.51664664546852, 8.517899848430664, 8.518427967178082, 8.517070559057213, 8.516985984461927, 8.51798951597624, 8.518502207241127, 8.51660700077845, 8.517707706300577, 8.517658234270971, 8.516197042821839, 8.517246015856951]
[8.51664664546852, 8.517899848430664, 8.518427967178082, 8.517070559057213, 8.516985984461927, 8.51798951597624, 8.518502207241127, 8.51660700077845, 8.517707706300577, 8.517658234270971, 8.516197042821839, 8.517246015856951, 8.516314543196247]
[8.51664664546852, 8.517899848430664, 8.518427967178082, 8.517070559057213, 8.516985984461927, 8.51798951597624, 8.518502207241127, 8.51660700077845, 8.517707706300577, 8.517658234270971, 8.516197042821839, 8.517246015856951, 8.516314543196247, 8.518147743079094]
[8.51664664546852, 8.517899848430664, 8.518427967178082, 8

In [34]:
######################## Accuracy Calculation ########################
A2[np.reshape(np.amax(A2,axis=1),(n,1))==A2] = 1
A2[np.reshape(np.amax(A2,axis=1),(n,1))!=A2] = 0

acc_mat = np.multiply(A2,Y)
acc = np.sum(acc_mat)/n

print("Train accuracy of the model is", acc)

Train accuracy of the model is 0.1
