In [1]:
import numpy as np
import gzip

In [2]:
def convolution(image,filt,bias,s=1):
    
    (n_f,n_c_f,f,_) = filt.shape
    n_c,in_dim,_ = image.shape
    
    out_dim = int((in_dim - f)/s) + 1
    
    assert n_c==n_c_f
    
    out = np.zeros((n_f,out_dim,out_dim))
    
    for curr_f in range(n_f):
        curr_y = out_y = 0
        
        while curr_y + f <= in_dim:
            curr_x = out_x = 0
            
            while curr_x + f<=in_dim:
                out[curr_f,out_y,out_x] = np.sum(filt[curr_f] * image[:,curr_y:curr_y + f,curr_x:curr_x + f]) + bias[curr_f]
                curr_x +=s
                out_x += 1
            
            curr_y +=s
            out_y +=1
    
    return out

In [3]:
def max_pooling(image,f=2,s=2):
    
    n_c,h_prev,w_prev = image.shape
    
    h = int((h_prev-f)/s) + 1
    w = int((w_prev - f)/s) + 1
    
    downsampled = np.zeros((n_c,h,w))
    
    for i in range(n_c):
        curr_y = out_y = 0
        
        while curr_y + g <=h_prev:
            curr_x = out_x = 0
            
            while curr_x + f <=w_prev:
                
                downsampled[i,out_y,out_x] = np.max(image[i,curr_y:curr_y+f,curr_x:curr_x + f])
                curr_x +=s
                out_x +=1
                
            curr_y +=s
            out_y +=1
            
    
    return downsampled

In [4]:
def softmax(raw_pred):
    out = np.exp(raw_pred)
    return out/np.sum(out)

In [5]:
def lossFunction(prob,label):
    return -np.sum(label*np.log(prob))

In [6]:
def extract_data(filename,num_images,IMAGE_WIDTH):
    print('Extracting',filename)
    
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(IMAGE_WIDTH*IMAGE_WIDTH*num_images)
        data = np.frombuffer(buf,dtype=np.uint8).astype(np.float32)
        data = data.reshape(num_images,IMAGE_WIDTH*IMAGE_WIDTH)
        
        return data

In [7]:
def extract_label(filename,num_images):
    print('Extracting',filename)
    
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1*num_images)
        labels = np.frombuffer(buf,dtype=np.uint8).astype(np.int64)
        
    return labels

In [8]:
def filterInitializer(size,scale = 1.0):
    
    stddev = scale/np.sqrt(np.prod(size))
    return np.random.normal(scale=stddev,size=size)

In [9]:
def weightInitializer(size):
    return np.random.standard_normal(size=size)*0.01

In [13]:
def convolutionBackward(dconv_prev, conv_in, filt, s):
    '''
    Backpropagation through a convolutional layer. 
    '''
    (n_f, n_c, f, _) = filt.shape
    (_, orig_dim, _) = conv_in.shape
    
    dout = np.zeros(conv_in.shape) 
    dfilt = np.zeros(filt.shape)
    dbias = np.zeros((n_f,1))
    for curr_f in range(n_f):
        
        curr_y = out_y = 0
        while curr_y + f <= orig_dim:
            curr_x = out_x = 0
            while curr_x + f <= orig_dim:
               
                dfilt[curr_f] += dconv_prev[curr_f, out_y, out_x] * conv_in[:, curr_y:curr_y+f, curr_x:curr_x+f]
                
                dout[:, curr_y:curr_y+f, curr_x:curr_x+f] += dconv_prev[curr_f, out_y, out_x] * filt[curr_f] 
                curr_x += s
                out_x += 1
            curr_y += s
            out_y += 1
        
        dbias[curr_f] = np.sum(dconv_prev[curr_f])
    
    return dout, dfilt, dbias

def nanargmax(arr):
    '''
    return index of the largest non-nan value in the array. Output is an ordered pair tuple
    '''
    idx = np.nanargmax(arr)
    idxs = np.unravel_index(idx, arr.shape)
    return idxs 

def maxpoolBackward(dpool, orig, f, s):
    '''
    Backpropagation through a maxpooling layer. The gradients are passed through the indices of greatest value in the original maxpooling during the forward step.
    '''
    (n_c, orig_dim, _) = orig.shape
    
    dout = np.zeros(orig.shape)
    
    for curr_c in range(n_c):
        curr_y = out_y = 0
        while curr_y + f <= orig_dim:
            curr_x = out_x = 0
            while curr_x + f <= orig_dim:
                
                (a, b) = nanargmax(orig[curr_c, curr_y:curr_y+f, curr_x:curr_x+f])
                dout[curr_c, curr_y+a, curr_x+b] = dpool[curr_c, out_y, out_x]
                
                curr_x += s
                out_x += 1
            curr_y += s
            out_y += 1
        
    return dout

In [12]:
def conVet(image,label,params,conv_s,pool_f,pool_s):
    
    [f1,f2,w3,w4,b1,b2,b3,b4] = params
    
    conv1 = convolution(image,f1,b1,conv_s) #First convolutional operation
    conv1[conv1<=0] = 0 #Introducing non linearity (RELU)
    
    conv2 = convolution(conv1,f2,b2,conv_s) #Second convolutional operation
    conv2[conv2<=0] #Introducing non linearity (RELU)
    
    pooled = max_pooling(conv2,pool_f,pool_s) #max pooling the output of convolutional operation
    
    (n_f,dim_,_) = pooled.shape
    fc = pooled.reshape((n_f*dim_*dim_,1)) #flatten
    
    z = w3.dot(fc) + b3
    z[z<=0] = 0
    
    out = w4.dot(z) + b4
    
    
    probs = softmax(out)
    
    loss = lossFunction(probs,label)
    
    #BackPropogation
    
    dout = probs - label
    dw4 = dout.dot(z.T)
    db4 = np.sum(dout,axis=1).reshape(b4.shape)
    
    dz = w4.T.dot(dout)
    dz[z<=0] = 0
    dw3 = dz.dot(fc.T)
    db3 = np.sum(dz,axis= 1).reshape(b3.shape)
    
    dfc = w3.T.dot(dz)
    dpool = dfc.reshape(pooled.shape)
    
    dconv2 = maxpoolBackward(dpool,conv2,pool_f,pool_s)
    dconv2[conv2<=0] = 0
    
    dconv1, df2, db2 = convolutionBackward(dconv2, conv1, f2, conv_s)
    dconv1[conv1<=0] = 0
    
    dimage,df1,db1 = convolutionBackward(dconv1,image,f1,conv_s)
    
    grades = [df1,df2,dw3,dw4,db1,db2,db3,db4]
    
    return grades,loss