In [1]:
from __future__ import division
import pandas as pd
import numpy as np
%matplotlib inline 
import matplotlib.pyplot as plt
import tensorflow as tf


In [2]:
""" Reading data"""

data_x = np.load('ex5_train_x.npy', mmap_mode='r')
data_y = np.load('ex5_train_y.npy', mmap_mode='r')

"""normalising data_x"""
data_x1 = (data_x / 255) - 0.5
print(data_y.shape)
data_y1 = data_y.reshape(1020,1)
print(data_y1.shape)


(1020,)
(1020, 1)


In [3]:
"""first filter set"""

#filter_set1 = 0.01 * np.random.randn(8, 4, 4)
#filter_set2 = 0.01 * np.random.randn(16, 4, 4)


'first filter set'

In [4]:
def zero_padding(data, padding):
    m = len(data)
    data_padded = np.zeros((data.shape[0], data.shape[1]+2, data.shape[2]+2, data.shape[3]))
    for i in range(m):
        data_T = data[i].T
        data_temp = np.zeros((data_T.shape[0], data_T.shape[1]+2, data_T.shape[2]+2))
        for j in range(0, len(data_T)):
            temp = np.pad(data_T[j], (padding, padding), 'constant', constant_values=(0))
            data_temp[j] = temp
        data_padded[i] = data_temp.T
    return data_padded
            
    

In [5]:
def conv_forward(padding, stride, data, filter_size, filters, bias):
    
    m = len(data)
    image_size = data.shape[1]
    filters_number = filters.shape[3]
    if(padding > 0):
        data_aftr_pad = zero_padding(data_x1, padding)
    else:
        data_aftr_pad = data
        
    output_size = ((image_size + 2*padding - filter_size) / stride) + 1
    #output_size = ((image_size - filter_size) / stride) + 1
    #print("this is output size %s " %output_size)
    output = np.zeros((m, int(output_size), int(output_size), filters_number)) 
    n_h = output.shape[1]
    n_w = output.shape[2]
    n_ch = output.shape[3]
    for i in range(m):
        #data_each = data[i, :]
        data_each = data_aftr_pad[i, :]
        for h in range(n_h):
            vert_start = h * stride
            vert_end = h * stride + filter_size
            if(vert_end > data_each.shape[0]):
                break
            for w in range(n_w):
                horiz_start = w * stride
                horiz_end = w * stride + filter_size
                if(horiz_end > data_each.shape[1]):
                    break
                for c in range(n_ch):
                    #finding corner of the slice with filter size
                    #vert_start = h * stride
                    #vert_end = h * stride + filter_size
                    #horiz_start = w * stride
                    #horiz_end = w * stride + filter_size
                   # print("vert start   %s" %(vert_start))
                    #print("vert  end %s" %(vert_end))
                    #print("hori start  %s" %(horiz_start))
                    #print("hori  end %s" %(horiz_end))
                    a_slice = data_each[vert_start:vert_end, horiz_start:horiz_end, :]
                    #print(a_slice)
                    output[i, h, w, c] = conv_single_step1(a_slice, filters[:, :, :, c], bias[:, :, :, c])
    #print("endtime %s")
    #print(time.time())
    cache = (data, filters, bias, padding, stride)
    return output, cache      
        
    

In [6]:
def conv_single_step1(a_slice_prev, W, bias):
   
    sum = np.multiply(a_slice_prev, W) + bias
    Z = np.sum(sum)
    
    return Z

In [7]:
"""convolution single step - a filter(channel) and a part of data which is filtersize(data_part) is passed"""

def conv_single_step(data_part, channel, bias):
    #temp_matrix = np.multiply(data_part, channel)
    m = channel.shape[0]
    ch = data_part.shape[2]   #no.of channels in previous layer or in the slice
    #print("this is no.of channels %s" %ch)
    temp = 0
    for i in range(m):
        for j in range(m):
            for c in range(ch):
                #print(data_part[i][j][c])
                temp += data_part[i][j][c] * channel[i][j] 
    temp = temp + bias
    return temp

    

In [8]:
#conv1_data, cache_conv1 = conv_forward(1, 2, data_x1, 4, filter_set1, conv_bias1)

In [9]:
#print(conv1_data.shape)
#print(conv1_data)

In [10]:
#conv1_data_relu = relu_activation(conv1_data)
#print(conv1_data_relu.shape)
#print(conv1_data_relu)

In [11]:
"""pooling - mode is either 'max' or 'avg' """

def pool_forward(padding, stride, data, filter_size, mode):
    m = len(data)
    image_size = data.shape[1]
    filters_number = data.shape[3]
    output_size = ((image_size + 2 * padding - filter_size) / stride) + 1
    output = np.zeros((m, int(output_size), int(output_size), filters_number)) 
    n_h = output.shape[1]
    n_w = output.shape[2]
    n_ch = output.shape[3]
    for i in range(m):
        for h in range(n_h):
            vert_start = h * stride
            vert_end = h * stride + filter_size
            if(vert_end > data.shape[1]):
                break
            for w in range(n_w):
                horiz_start = w * stride
                horiz_end = w * stride + filter_size
                if(horiz_end > data.shape[2]):
                    break
                for c in range(n_ch):
                    a_slice = data[i,vert_start:vert_end, horiz_start:horiz_end, c]
                    if (mode == 'max'):
                        output[i, h, w, c] = np.max(a_slice)
                    elif (mode == 'avg'):
                        output[i, h, w, c] = np.mean(a_slice)
    cache = (data, stride, padding, filter_size)
    return output, cache                    
                        

In [12]:
#pool1_data, cache_pool1 = pool_forward(0, 1, conv1_data_relu, 5, 'max')
#print(pool1_data.shape)

In [13]:
##conv2_data, cache_conv2 = conv_forward(0, 2, pool1_data, 4, filter_set2, conv_bias2)
#print(conv2_data.shape)
#conv2_data_relu = relu_activation(conv2_data)
#print(conv2_data_relu)

In [14]:
#pool2_data, cache_pool2 = pool_forward(0, 1, conv2_data_relu, 5, 'max')
#print(pool2_data.shape)

In [15]:
#samples = pool2_data.shape[0]
#flatten_size = pool2_data.shape[1] * pool2_data.shape[2] * pool2_data.shape[3]
#flatten_data = pool2_data.reshape(samples, flatten_size )
#print(flatten_data.shape)

In [16]:
"""ReLU activation"""

def relu_activation(data_array):
    return np.maximum(data_array, 0)

In [17]:
def dRelu(data_array):
    data_array[data_array<=0] = 0
    data_array[data_array>0] = 1
    return data_array

In [18]:
"""Function for One hot encoding. input Y as an array and no.of samples is passed to the function"""

def One_Hot_Encoding(arr, samples_num):
    encode_matrix = np.zeros((samples_num, 6))
    for i in range(samples_num):
        encode_matrix[i][arr[i][0]] = 1
    return encode_matrix

In [19]:
Y_en = One_Hot_Encoding(data_y1, len(data_y1))

In [20]:
def initalize_parameters():

    #np.random.seed(1)
    
    W_first = 0.01 * np.random.randn(108, 1296)  # 108 X 1296
    W_second = 0.01 * np.random.randn(6, 108)  # 6 X 108

    
    bias1 = np.zeros((108, 1)) # 108 X 1
    bias2 = np.zeros((6, 1)) # 10 X 1
    
    
    return W_first, W_second, bias1, bias2

In [21]:
def forwardpropagation(W1, W2, A, b1, b2):
    
    """ performing forward propagation. returns new neurons and Zs"""

    part1 = np.dot(A, W1.T)      #3500 x 25
    Z1 = np.add(part1.T,b1)     #25 X 3500
    #A1 = sigmoid(Z1)            #25 X 3500
    A1 = relu_activation(Z1)
    
    part2 = np.dot(W2,A1) 
    Z2 = np.add(part2,b2)       #10 X 3500
    A2 = sigmoid(Z2)           # 25 X 3500
    A2 = A2.T                   #3500 X 10

     
    return A1, A2, Z1, Z2
         

In [22]:
def sigmoid(z):
    
    """ sigmoid function when an array is passed as parameter"""
    
    return 1/(1 + np.exp(-z))   

In [23]:
def dSigmoid(A):
    
    "derivative of sigmoid function or sigmoid prime"

    return np.multiply(A, (1 - A))

In [24]:

def backwardpropagation(A1, A2, Z1, Y, X):
    
    """ backward propagation function. the function ouputs gradient of weights and bias"""
    
    m = len(A1)
    dZ2 = A2 - Y   # 3500 X 10
    #print(dZ2.shape)
    dW2 = (1/m) * np.dot(dZ2.T, A1.T) # 10 X 25
    db2 = (1/m) * np.sum(dZ2.T, axis=1, keepdims = True) 
    #gdashZ1 = dSigmoid(A1)  # 25 X 3500
    gdashZ1 = dRelu(A1)
    dZ1_firstpart = np.dot(dZ2, W2) # 3500 X 25
    dZ1 = np.multiply(dZ1_firstpart.T, gdashZ1) # 25 X 3500
    dW1 = (1/m) * np.dot(dZ1, X) # 25 X 400
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims = True) 
    dA0 = np.dot(dZ1.T, W1)
        
    return dW1, dW2, db1, db2, dA0

In [25]:
def gradientDescent(dW1, dW2, db1, db2, B1, B2, W1, W2, alpha):
    
    
    """ gradient descent calcualtes new weights and bais"""

    
    w2 = W2 - alpha * dW2
    w1 = W1 - alpha * dW1
    b1 = B1 - alpha * db1
    b2 = B2 - alpha * db2
    
    return w1, w2, b1, b2 

In [26]:
def lossFunction(y_en, final_A1):
    
    """ calculating loss functions - y (is given data Y) and y1(is A2) are arrays """
    import math
    m = len(final_A1)
    final_A = np.clip(final_A1, 0.0001, 0.99999)
    loss = (-1/m) * np.sum(np.multiply(y_en, np.log(final_A)) + np.multiply(1 - y_en, np.log(1 - final_A)))
    
    return loss

In [27]:
def softmax(x):
    
    """ softmax function final A2 is passed as parameter (e power x) / sum(e power x)"""
    E = np.exp(x)
    deno =  np.sum(np.exp(x), axis=1, keepdims=True)
    deno = deno.reshape(deno.shape[0],1)
    probs = E / deno
    return probs

In [28]:
#W1, W2, B1, B2  = initalize_parameters()

In [29]:
#A1, A2, Z1, Z2 = forwardpropagation(W1, W2, flatten_data, B1, B2)

In [30]:
#print(A1.shape)

In [31]:
#l = lossFunction(Y_en, A2)
#print(l)

In [32]:
#dW1, dW2, db1, db2, dA0 = backwardpropagation(A1, A2, Z1, Y_en, flatten_data)

In [33]:
#print(dA0.shape)


In [34]:
#W1, W2, B1, B2 = gradientDescent(dW1, dW2, db1, db2, B1, B2, W1, W2, 0.01)

In [35]:
def pool_backward(dFA0, cache_pool, mode):
    
    """ cache_pool output from pooling forward"""
    """ dP is output of first layer fully connected NN during back propagation """
    
    data_prev, stride, padding, filter_size = cache_pool
    m, n_h_prev, n_w_prev, n_ch_prev = data_prev.shape
    m, n_h, n_w, n_ch = dFA0.shape
    dA = np.zeros(data_prev.shape)
    
    for i in range(m):
        data_each = data_prev[i]
        for h in range(n_h):
            vert_start = h * stride
            vert_end = h * stride + filter_size
            if(vert_end > data_prev.shape[1]):
                break
            for w in range(n_w):
                horiz_start = w * stride
                horiz_end = w * stride + filter_size
                if(horiz_end > data_prev.shape[2]):
                    break
                for c in range(n_ch):
                    if(mode == 'max'):
                        a_slice = data_each[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = a_slice == np.max(a_slice)
                        dA[i, vert_start:vert_end, horiz_start:horiz_end, c ] += np.multiply(mask , dFA0[i, h, w, c])
                    
                        
    return dA                    
    

In [36]:
#dA0_unflatten = dA0.reshape(pool2_data.shape)
#dA2 = pool_backward(dA0_unflatten, cache_pool2, 'max')
#print(dA2.shape)

In [37]:
def conv_backward(data_poolback, con_frwd_data, learning_rate):
    """data_poolback is output from pooling layer during back propagation. ouput of pool_backward function"""
    """con_frwd_data is output of conv_forward function for this layer. this is stored as cache in conv_forward function"""
    data_prev, filters, bias, padding, stride = con_frwd_data
    m, n_h, n_w, n_ch = data_poolback.shape
    print(data_poolback.shape)
    m, n_h_prev, n_w_prev, n_ch_prev= data_prev.shape
    filter_size = filters.shape[0]
    #filters_T = filters.T
    dA_prev = np.zeros((m, n_h_prev, n_w_prev, n_ch_prev))
    dW = np.zeros(filters.shape)
    #print("shape od dw")
    #print(dW.shape)
    db = np.zeros(bias.shape)
    if (padding > 0):
        data_prev_pad = zero_padding(data_prev, padding)
        dA_prev_pad = zero_padding(dA_prev, padding)
    else:
        data_prev_pad = data_prev
        dA_prev_pad = dA_prev
    for i in range(m):
        data_prev_pad_each =  data_prev_pad[i]
        dA_prev_pad_each = dA_prev_pad[i]
        for h in range(n_h):
            vert_start = h * stride
            vert_end = h * stride + filter_size
            if(vert_end > data_prev_pad.shape[1]):
                break
            for w in range(n_w):
                horiz_start = w * stride
                horiz_end = w * stride + filter_size
                if(horiz_end > data_prev_pad.shape[2]):
                    break
            for c in range(n_ch):
                a_slice = data_prev_pad_each[vert_start:vert_end, horiz_start:horiz_end, :]
                #print(data_poolback[i, h, w, c].shape)
                dA_prev_pad_each[vert_start:vert_end, horiz_start:horiz_end, :] += filters[:,:,:,c] * data_poolback[i, h, w, c]
                dW[:,:,:,c] += a_slice * data_poolback[i, h, w, c]
                db[:,:,:,c] += data_poolback[i, h, w, c]
        if(padding > 0):
            dA_prev[i, :, :, :] = dA_prev_pad_each[padding:-padding, padding:-padding, :]
        else:
            dA_prev[i, :, :, :] = dA_prev_pad_each
    new_W = filters - learning_rate * dW
    new_bias = bias - learning_rate * db
    
    return dA_prev, new_W, new_bias
                
            
    
    
    
    

In [49]:
np.random.seed(1)
filter_set1 = 0.01 * np.random.randn(4, 4, 3, 8)
filter_set2 = 0.01 * np.random.randn(4, 4, 8, 16)
conv_bias1 = np.zeros((1, 1, 1, 8))
conv_bias2 = np.zeros((1, 1, 1, 16))
#filter_set1 = 0.01 * np.random.randn(2, 2, 3, 10)
#filter_set2 = 0.01 * np.random.randn(2, 2, 10, 18)

print(filter_set1.shape)
#print(filter_set1)

(4, 4, 3, 8)


In [50]:
W1, W2, B1, B2  = initalize_parameters()

In [51]:
for i in range (15):
    
    #print("convolution forwd for 1st CNN layer - padding 1, stride 2, filters- 8x4x4 ")
    conv1_data, cache_conv1 = conv_forward(1, 2, data_x1, 4, filter_set1, conv_bias1) #4 should be here as 4th param
    ##print("shape of output of conv_forward 1 ")
    #print(conv1_data.shape)
    #print("passing conv_forward 1 ouput to ReLU activation")
    conv1_data_relu = relu_activation(conv1_data)
    #print("shape of output after Relu activation in CNN layer 1")
    #print(conv1_data_relu.shape)
    #print("passing relu output to pooling forward function in CNN layer 1 with padding 0, stride 1, filter- 5x5, mode-max")
    pool1_data, cache_pool1 = pool_forward(0, 1, conv1_data_relu, 5, 'max') # 5 here as 4th param
    #print("output shape after pool forward function in CNN layer 1")
    #print(pool1_data.shape)
    #print("passing pool forwd output from CNN layer 1 to conv_forward in CNN layer 2, padding 0, stride 2, filters-16x4x4")
    conv2_data, cache_conv2 = conv_forward(0, 2, pool1_data, 4, filter_set2, conv_bias2) #4 should be here as 4th param
    #print("output of conv_forward in CNN layer 2")
    #print(conv2_data.shape)
    #print("passing conv_forward 2 ouput to ReLU activation")
    conv2_data_relu = relu_activation(conv2_data)
    #print("shape of output after Relu activation in CNN layer 1")
    #print(conv2_data_relu.shape)
    #print("passing relu output to pooling forward function in CNN layer 2 with padding 0, stride 1, filter- 5x5, mode-max")
    pool2_data, cache_pool2 = pool_forward(0, 1, conv2_data_relu, 5, 'max') # 5 here as 4th param
    #print("output shape after pool forward function in CNN layer 2")
    #print(pool2_data.shape)
    samples = pool2_data.shape[0]
    flatten_size = pool2_data.shape[1] * pool2_data.shape[2] * pool2_data.shape[3]
    flatten_data = pool2_data.reshape(samples, flatten_size )
    #print("shape after flattening the pool_forward CNN layer 2 output")
    #print("input to fully connected layer")
    #print(flatten_data.shape)
    #print("forward propagation in fully connected NN")
    A1, A2, Z1, Z2 = forwardpropagation(W1, W2, flatten_data, B1, B2)
    #print("backward propagation of fully connected NN")
    print("##############################")
    loss = lossFunction(Y_en, A2)
    print("iteration i %s" %i)
    print("loss after forward %s iteration " %i)
    print(loss)
    print("#################################")
    #print("backward prop in fully connected NN")
    dW1, dW2, db1, db2, dA0 = backwardpropagation(A1, A2, Z1, Y_en, flatten_data)
    #print("gradient descent")
    W1, W2, B1, B2 = gradientDescent(dW1, dW2, db1, db2, B1, B2, W1, W2, 0.1)
    #print("unflatting the ouput of fully connected back prop")
    dA0_unflatten = dA0.reshape(pool2_data.shape)
    #print("shape of unflattened")
    #print(dA0_unflatten.shape)
    #print("pool_backward in CNN layer 2")
    dA_pool2 = pool_backward(dA0_unflatten, cache_pool2, 'max')
    #print("output shape of pool_backward in CNN layer 2")
    ##print(dA_pool2.shape)
    #print("conv_backward CNN layer 2")
    print("relu backeward start")
    dA_relu_back2= dRelu(dA_pool2)
    print("relu backward done")
    dA_cnn2, filter_set2, conv_bias2 = conv_backward(dA_relu_back2, cache_conv2, 0.1)
    #print("ouput shap eof conv_backward CNN layer 2")
    #print(dA_cnn2.shape)
    #print("pool_backward in CNN layer 1")
    dA_pool1 = pool_backward(dA_cnn2, cache_pool1, 'max')
    dA_relu_back1= dRelu(dA_pool1)
    ##print("output shape of pool_backward in CNN layer 1")
    #print(dA_pool1.shape)
    #print("conv_backward CNN layer 1")
    dA_cnn1, filter_set1, conv_bias1 = conv_backward(dA_relu_back1, cache_conv1, 0.1)
    #print("ouput shap eof conv_backward CNN layer 1")
    #print(dA_cnn1.shape)
    
    







##############################
iteration i 0
loss after forward 0 iteration 
4.158971527488293
#################################
relu backeward start
relu backward done
(1020, 13, 13, 16)
(1020, 32, 32, 8)
##############################
iteration i 1
loss after forward 1 iteration 
3.60162480790713
#################################
relu backeward start
relu backward done
(1020, 13, 13, 16)
(1020, 32, 32, 8)
##############################
iteration i 2
loss after forward 2 iteration 
3.2726633014458653
#################################
relu backeward start
relu backward done
(1020, 13, 13, 16)
(1020, 32, 32, 8)
##############################
iteration i 3
loss after forward 3 iteration 
3.072835634690749
#################################
relu backeward start
relu backward done
(1020, 13, 13, 16)
(1020, 32, 32, 8)
##############################
iteration i 4
loss after forward 4 iteration 
2.947247060498529
#################################
relu backeward start
relu backward done
(1020, 

In [52]:
#f.close()
import h5py

f = h5py.File("weights_rma.hdf5", "w")
dataset = f.create_dataset("filter_set1", data=filter_set1)
dataset = f.create_dataset("filter_set2", data=filter_set2)
dataset = f.create_dataset("conv_bias1", data=conv_bias1)
dataset = f.create_dataset("conv_bias2", data=conv_bias2)
dataset = f.create_dataset("W1", data=W1)
dataset = f.create_dataset("W2", data=W2)
dataset = f.create_dataset("B1", data=B1)
dataset = f.create_dataset("B2", data=B2)
f.close()



  from ._conv import register_converters as _register_converters


In [53]:
print(dA0)

[[ 4.81591845e-04  2.25639984e-05 -3.90978663e-05 ... -1.02174805e-03
   5.19311410e-05  5.76008548e-04]
 [ 4.81591845e-04  2.25639984e-05 -3.90978663e-05 ... -1.02174805e-03
   5.19311410e-05  5.76008548e-04]
 [ 4.81591845e-04  2.25639984e-05 -3.90978663e-05 ... -1.02174805e-03
   5.19311410e-05  5.76008548e-04]
 ...
 [ 4.81591845e-04  2.25639984e-05 -3.90978663e-05 ... -1.02174805e-03
   5.19311410e-05  5.76008548e-04]
 [-2.09653006e-04 -4.68454847e-05 -2.18185998e-04 ...  1.30092301e-04
   3.99819846e-04 -1.58624437e-04]
 [ 4.29742295e-04  3.99328698e-04 -1.40306337e-04 ... -1.37993080e-03
   2.80866303e-04 -5.69217093e-04]]
