In [1]:
import pandas as pd
import numpy as np
import math
import tensorflow as tf
import matplotlib.pyplot as plt
np.random.seed(1)
dataX=np.load('ex5_train_x.npy')
dataY=np.load('ex5_train_y.npy')

  from ._conv import register_converters as _register_converters


In [2]:
import h5py

In [3]:
dataset=h5py.File("Weights.hdf5","r")

In [4]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A

In [5]:
def relu(Z):    
    A = np.maximum(0,Z)
    return A

In [6]:
def CVrelu(Z):    
    A = np.maximum(0,Z)
    cache = Z
    return A,cache

In [7]:
def linear_forward_sigmoid(A, W, b):
    Z = np.dot(W, A) + b
    A1=sigmoid(Z)
    cache = ((A, W, b), Z)
    return A1, cache

In [8]:
def linear_forward_relu(A, W, b):
    Z = np.dot(W, A) + b
    A1=relu(Z)
    cache = ((A, W, b), Z)
    return A1, cache

In [9]:
def linear_backward_sigmoid(dA, cache):
    linear,Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    A_prev, W, b = linear
    m = A_prev.shape[1]    
    dW = (1/m) * np.dot(dZ, linear[0].T)
    db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(linear[1].T, dZ)
    
    return dA_prev, dW, db

In [10]:
def backward_relu(dA,cache):
    Z=cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    return dZ

In [11]:
def linear_backward_relu(dA, cache):
    linear,Z=cache
    
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    
    A_prev, W, b = linear
    m = A_prev.shape[1]    
    dW = (1/m) * np.dot(dZ, linear[0].T)
    db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(linear[1].T, dZ)
    
    return dA_prev, dW, db

In [12]:
def one_hot_Encoding(Y,classes):
    Yout=np.zeros((Y.shape[0],len(classes)))
    for i in range(0,len(Y)-1):
        indx=classes.index(Y[i])
        Yout[i,indx]=1
    return Yout

In [13]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)
    return X_pad

In [14]:
def conv_single_step(Inp, W, b):
    s = np.multiply(Inp, W) + b
    Z = np.sum(s)
    return Z

In [15]:
def conv_forward(A_prev, parameters):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = parameters['W'].shape
    stride = parameters['stride']
    pad = parameters['pad']
    n_H = int((n_H_prev - f + 2 * pad) / stride) + 1
    n_W = int((n_W_prev - f + 2 * pad) / stride) + 1
    Z = np.zeros((m, n_H, n_W, n_C))
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):                                
        a_prev_pad = A_prev_pad[i]                     
        for h in range(n_H):                          
            for w in range(n_W):                      
                for c in range(n_C):                   
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, parameters['W'][...,c], parameters['b'][...,c])
    
    cache = (A_prev,parameters)
    return Z, cache

In [16]:
def conv_backward(dZ, cache):
    grads = {}
    A_prev,parameters = cache
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = parameters['W'].shape
    stride = parameters["stride"]
    pad = parameters["pad"]
    (m, n_H, n_W, n_C) = dZ.shape
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                      
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                
            for w in range(n_W):               
                for c in range(n_C):           
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += parameters['W'][:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
        if pad !=0:
            dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
        else:
            dA_prev[i, :, :, :] = da_prev_pad[:,:,:]
        grads["dW"]=dW
        grads["dA"]=dA_prev
        grads["db"]=db
    return grads

In [17]:
def pool_forward(A_prev, hparameters, mode = "max"):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    f = hparameters["f"]
    stride = hparameters["stride"]
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    A = np.zeros((m, n_H, n_W, n_C))              
    for i in range(m):                           
        for h in range(n_H):                     
            for w in range(n_W):                
                for c in range (n_C):            
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end  = horiz_start + f
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_prev_slice)
    cache = (A_prev, hparameters)
    return A, cache

In [18]:
def create_mask_from_window(x):
    mask = x == np.max(x)
    return mask

In [19]:
def distribute_value(dz, shape):
    (n_H, n_W) = shape
    average = dz / (n_H * n_W)
    a = np.ones(shape) * average
    return a

In [20]:
def pool_backward(dA, cache, mode = "max"):
    grads = {}
    (A_prev, hparameters) = cache
    stride = hparameters["stride"]
    f = hparameters["f"]
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    dA_prev = np.zeros(A_prev.shape)
    
    for i in range(m):                       
        a_prev = A_prev[i]
        for h in range(n_H):                   
            for w in range(n_W):               
                for c in range(n_C):           
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    if mode == "max":
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = create_mask_from_window(a_prev_slice)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
                        
                    elif mode == "average":
                        da = dA[i, h, w, c]
                        shape = (f, f)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
    grads["dA"]=dA_prev
    return grads

In [21]:
def fc_initialize_parameters(layers):
    parameters = {}
    Len = len(layers)
    for i in range(1, Len):
        parameters['W' + str(i)] = np.random.randn(layers[i], layers[i-1]) * 0.008
        parameters['b' + str(i)] = np.random.randn(layers[i], 1)*0.008      
    return parameters

In [22]:
def cost_function(A, Y,parameters=0,lambd=0):
    m = Y.shape[1]
    #print(m)
    cost = None
    #sumW=0
    #Len = len(parameters) // 2
    #for i in range(1,Len):
    #    sumW=sumW+np.sum(np.square(parameters["W" + str(i)]))
     
    cost = (-1/m)*np.sum(np.multiply(Y, np.log(A)) + np.multiply(1-Y, np.log(1 - A))) #+ (lambd/(2*m))*sumW
    cost = np.squeeze(cost) 
    return cost

In [23]:
def gradientDescendent(FCparameters,conv1_param,conv2_param, gradsFC,gradsC1,gradsC2, lr,m=0,lambd=0):
    L = len(FCparameters) // 2 
    for l in range(L):
        FCparameters["W" + str(l+1)] = FCparameters["W" + str(l+1)] - lr * gradsFC["dW" + str(l+1)]#-((lr)/m)*FCparameters["W" + str(l+1)]
        FCparameters["b" + str(l+1)] = FCparameters["b" + str(l+1)] - lr * gradsFC["db" + str(l+1)]
    conv1_param['W']=conv1_param['W']-lr *gradsC1['dW']
    conv1_param['b']=conv1_param['b']-lr *gradsC1['db']
    conv2_param['W']=conv2_param['W']-lr *gradsC2['dW']
    conv2_param['b']=conv2_param['b']-lr *gradsC2['db']
    #return parameters

In [24]:
def fc_backpropogation(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) 
    current_cache = caches[L - 1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward_relu(dAL, current_cache)
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward_sigmoid(grads["dA" + str(l + 2)], current_cache)
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads

In [25]:
def fc_forward_propogation(X, parameters):
    caches = []
    A = X
    Len = len(parameters) // 2                  
    for i in range(1, Len):
        A_prev = A 
        W = parameters["W" + str(i)]
        b = parameters["b" + str(i)]
        A, cache = linear_forward_relu(A_prev, W, b)
        caches.append(cache)
    W = parameters["W" + str(Len)]
    b = parameters["b" + str(Len)]
    AL, cache = linear_forward_sigmoid(A, W, b)
    caches.append(cache)
    return AL, caches

In [26]:
def softMax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [27]:
def conv_initialize_parameters(prev_c,curr_c,filterSize,stride,pad):
    parameters = {}
    parameters['W'] = np.random.randn(filterSize[0],filterSize[1],prev_c,curr_c) * 0.008
    parameters['b'] = np.random.randn(1,1,1,curr_c)*0.008    
    parameters['stride'] = stride
    parameters['pad'] = pad
    return parameters

In [28]:
def pool_initialize_parameters(stride,f):
    parameters = {}   
    parameters['stride'] = stride
    parameters['f'] = f
    return parameters

In [29]:
def forwardAll(X,conv1_param,conv2_param,pool1_param,pool2_param,fc_params):
    Z1,cache1 = conv_forward(X,conv1_param)
    A1,cache2=CVrelu(Z1)
    A2, cache3=pool_forward(A1,pool1_param)
    Z2, cache4=conv_forward(A2,conv2_param)
    A3,cache5=CVrelu(Z2)
    A4,cache6=pool_forward(A3,pool2_param)
    A4=A4.reshape(A4.shape[0],(A4.shape[1]*A4.shape[2]*A4.shape[3]))
    A5,cache7=fc_forward_propogation(A4.T,fc_params)
    out=softMax(A5)
    return out,cache1,cache2,cache3,cache4,cache5,cache6,cache7

In [30]:
def backpropogationAll(A, Y, cache1,cache2,cache3,cache4,cache5,cache6,cache7):
    gradsFC=fc_backpropogation(A, Y,cache7)
    dAfc=np.ndarray(gradsFC["dA1"].shape)
    dAfc=dAfc.T
    dAfc=dAfc.reshape(dAfc.shape[0],9,9,16)
    gradsPool2=pool_backward(dAfc, cache6)
    #print(gradsPool2['dA'].shape)
    dAPool2=gradsPool2['dA']
    dZ1=backward_relu(dAPool2,cache5)
    gradsC2=conv_backward(dZ1,cache4)
    gradsPool1=pool_backward(gradsC2['dA'], cache3)
    dAPool1=gradsPool1['dA']
    dZ2=backward_relu(dAPool1,cache2)
    gradsC1=conv_backward(dZ2,cache1)
    return gradsFC,gradsPool2,gradsC2,gradsPool1,gradsC1

In [31]:
def model(X, Y, iteration,lr):
    
    conv1_param = conv_initialize_parameters(3,8,[4,4],2,1)
    conv2_param = conv_initialize_parameters(8,16,[4,4],2,0)
    pool1_param = pool_initialize_parameters(1,5)
    pool2_param = pool_initialize_parameters(1,5)
    ##Load weights from file
    conv1_param['W']=np.array(dataset["c1W"])
    conv1_param['b']=np.array(dataset["c1b"])
    conv2_param['W']=np.array(dataset["c2W"])
    conv2_param['b']=np.array(dataset["c2b"])
    layers=[1296,108,6]
    fc_params = fc_initialize_parameters(layers)
    fc_params['W1']=np.array(dataset["FCw1"])
    fc_params['b1']=np.array(dataset["FCb1"])
    fc_params['W2']=np.array(dataset["FCw2"])
    fc_params['b2']=np.array(dataset["FCb2"])
    for i in range(0, iteration):
        A,cache1,cache2,cache3,cache4,cache5,cache6,cache7=forwardAll(X,conv1_param,conv2_param,pool1_param,pool2_param,fc_params)
        cost = cost_function(A, Y.T)
        gradsFC,gradsPool2,gradsC2,gradsPool1,gradsC1 = backpropogationAll(A, Y.T, cache1,cache2,cache3,cache4,cache5,cache6,cache7)
        gradientDescendent(fc_params,conv1_param,conv2_param,gradsFC,gradsC1,gradsC2, lr)
        print ("Cost : %f" %(cost))
    return conv1_param,conv2_param,fc_params
    

In [33]:
Y=one_hot_Encoding(dataY,[0,1,2,3,4,5])
X=(dataX/255)-0.5
conv1_param,conv2_param,fc_params=model(X,Y,1,0.2)

Cost : 2.701776
