In [1]:
import numpy as np

In [2]:
image_width=32
image_height=32
image_channel=1
batch_size=32
learning_rate=0.009
mu=0.95

In [3]:
def parameter_initializer():
    W1=np.random.randn(3,3,4)*0.01  #multplying 0.01 so that the weights and biases are small at start
    b1=np.random.randn(1,1,4)*0.01
    
    W2=np.random.randn(16,16,4)*0.01
    b2=np.random.randn(1,1,4)*0.01

    W3=np.random.randn(1024,10)*0.01
    b3=np.random.randn(10,1)*0.01
    
    
    params={'W1':W1,'b1':b1,'W2':W2,'b2':b2,'W3':W3,'b3':b3}
    return params

In [4]:
def conv_single_step(a_slice_prev,W,b):
    """
    Apply one filter defined by parameters W on a single slice (a_slice_prev) of the output activation 
    of the previous layer.
    
    argument:
    a_slice_prev=previous layer output
    W=filter(window,window,channels)
    b=bias
    """
    s=a_slice_prev*W
    Z=np.sum(s)
    Z=Z+float(b)
    return Z

In [5]:
def conv_forward(a_prev,w,b,stride):
    """
    This is used for forward propagation of a convolution neural network
    
    arguments:
    a_prev=previous layer output shape=(m,nh_prev,nw_prev,nc_prev)
    w=filter shape=(nh,cw,nc_prev,nc)
    bias= bias shape=(1,1,1,nc)
    """
    (nh_prev,nw_prev,nc_prev)=a_prev.shape
    (f,f,nc)=w.shape
    nh=int((nh_prev-f)/stride)+1
    nw=int((nw_prev-f)/stride)+1
    Z=np.zeros((nh,nw,nc))
    for h in range(nh):
        for w in range(nw):
            for c in range(nc):
                vert_start = h*stride
                vert_end = vert_start+f
                horiz_start = w*stride
                horiz_end = horiz_start+f
                a_slice_prev = a_prev[vert_start:vert_end,horiz_start:horiz_end,:]
                Z[h,w,c]=conv_single_step(a_slice_prev,W[...,c],b[...,c])
    Z[Z<=0]=0 #relu layer
    cache=(a_prev,w,b)
    return Z,cache
    
    

In [6]:
def softmax_cost(out,label):            
    """
    softmax layer 
    """
    eout=np.exp(out,dtype=np.float)
    probs=eout/sum(eout)
    p=sum(label*probs)
    cost=-np.log(p)
    return cost,probs

In [7]:
def conv_backward(dz,cache,stride):
    """
    backpropagation for convolution layer
    
    arguments:
    dz=desent of previous layer
    cache=values of bias,weight of the layer during the forward pass
    
    returns:
    da_prev=desent used for calculating the dw,db of the next layer
    dw=correction of the weights
    db=correction of the bias
    """
    (A_prev,w,b,hparameters)=cache
    (nh_prev,nw_prev,nc_prev)=a_prev.shape
    (f,f,nc)=W.shape
    (nh,nw,nc)=dz.shape
    da_prev=np.zeros((nh_prev,nw_prev,nc_prev))
    dw=np.zeros((f,f,nc))
    db=np.zeros((1,1,nc))
    for h in range(nh):
        for w in range(nw):
            for c in range(nc):
                vert_start = h*stride
                vert_end = vert_start+f
                horiz_start = w*stride
                horiz_end = horiz_start+f
                da_prev[vert_start:vert_end, horiz_start:horiz_end, :]+= W[:,:,c] * dz[h, w, c]
                dw[:,:,c]=a_prev[vert_start:vert_end,horiz_start:horiz_end,:]*dz[h,w,c]
                db[:,:,c]+=dz[h,w,c]
    da_prev[da_prev<=0]=0#relu 
    return da_prev,dw,db
                    
                    

In [8]:
def convolution(image,label,params):
    """
    As all the helper function are created so this the complilation both front and backprop:
    
    input->convlayer1(3,3,4)->convlayer2(16,16,4)->fullyconnected(1024,10)->fullyconnected(10,1)->output
    """
    (image_height,image_width,image_channel)=image.shape
    conv1,conv1_cache=conv_forward(image,params["W1"],params["b1"],2)
    conv2,conv2_cache=conv_forward(conv1,params["W2"],params["b2"],2)
    fc1=conv2.reshape((1024,1)) #1024*1
    fc2=params["W3"].dot(fc1)+params["b3"] #10*1
        
        #backprop
    cost,probs=softmax_cost(fc2,label)
    dout=probs-label
    dw3=dout.dot(fc1.T)
    db3=(dount.T).T.reshape((1024,1))
    dfc1=params["W3"].T.dot(dout)
    da2_prev,dw2,db2=conv_backward(dfc1.T.reshape(conv2.shape),conv2_cache,2)
    da1_prev,dw1,db1=conv_backward(da2_prev,conv1_cache,2)
    return dw1,db1,dw2,db2,dw3,db3,cost
        

In [9]:
def update():
    """
    used update as a different function to show the update of batch gradient descent else we can directly update in 
    backpropagation function
    
    mu is a hyperparameter which helps in regularization
    """
    finalcost=[]
    params=parameter_initializer() 
    for i in range(0,batch_size):
        dw1,db1,dw2,db2,dw3,db3,cost=backpropagation(image,label,params)
        params["W1"]+=mu*params["W1"]-(learning_rate*dw1/batch_size)
        params["b1"]+=mu*params["b1"]-(learning_rate*db1/batch_size)
        params["W2"]+=mu*params["W2"]-(learning_rate*dw2/batch_size)
        params["b2"]+=mu*params["b2"]-(learning_rate*db2/batch_size)
        params["W3"]+=mu*params["W3"]-(learning_rate*dw3/batch_size)
        params["b3"]+=mu*params["b3"]-(learning_rate*db3/batch_size)
        finalcost.append(cost/batch_size)
    