In [1]:
import matplotlib.pyplot as plt
import numpy as np
import h5py
import math

In [42]:
# Convolve window
# Create mask
# -------------


# Pooling forward (max, average)
# Pooling backward (max, average)
# Convolution forward (for all data)
# Convolution backward (for all data)

# Forward propagation (including all steps)
# Backward propagation (including all steps)
# Parameter updating (Gradient descent or other optimization method)


In [3]:
def ryan_cal_Z(W,b,A):
    Z = np.dot(W, A) + b
    cache = [A,W,b,Z]
    return Z, cache

def ryan_initweight_convol(fil_size):
    np.random.seed(1)
    params ={}
    temp = 2/math.sqrt(1020+6)
    for i in range(1, len(fil_size)+1):
        params['W' + str(i)] = temp*np.random.randn(fil_size[i-1][0],fil_size[i-1][1],fil_size[i-1][2],fil_size[i-1][3])
        params['b' + str(i)] = temp*np.random.randn(1,1,1,fil_size[i-1][3])
        
    return params
        
def ryan_initweight(topo, params):
    np.random.seed(1)
    temp = int(len(params)//2)
    for i in range(1, len(topo)):
        params['W' + str(i+temp)] = 0.01*np.random.randn(topo[i], topo[i-1])
        params['b' + str(i+temp)] = 0.01*np.zeros(shape=(topo[i], 1))
    return params

def ryan_save_model(params, path):
    f = h5py.File(path,'w')
    for key, value in params.items():
        f.create_dataset(key, data=value)
    f.close()
    return f

def ryan_load_model(path):
    dataset = h5py.File(path,'r')
    params = {}
    for i in dataset.keys():
        params[i] = np.array(dataset[i])
    
    dataset.close()
    return params
        
def ryan_onehot(labels, df=1):
    if df == 1:
        labels = labels.as_matrix()
    temp = []
    for val in labels:
        if val not in temp:
            temp.append(val)
    temp.sort()
    
    result = np.zeros(shape=(len(labels),len(temp)))
    for key, val in enumerate(labels):
        result[key][temp.index(val)] = 1
    return result

def ryan_padding(X, pads):
    result = np.pad(X,((pads[0],pads[0]),(pads[1],pads[1]),(pads[2],pads[2]),(pads[3],pads[3])),'constant');
    return result

def ryan_convol_step(one_slice, W, b):
    Z = np.multiply(one_slice, W) + b
    Z = np.sum(Z)
    return Z
    
def ryan_compute_cost(lastA, Y):
    t1 = np.multiply(Y, np.log(lastA))
    t2 = np.multiply(1.0 - Y, np.log(1.0 - lastA))
    cost = (-1.0/Y.shape[1]) * np.sum(t1 + t2)
    return cost

# activation functions
def ryan_relu(Z):
    A = np.maximum(0, Z)
    return A

def ryan_relu_back(dA, Z):
    t = Z >= 0
    dZ = np.multiply(dA, t)
    return dZ

def ryan_sigmoid(Z):
    A = 1.0/(1 + np.exp(-1.0*(Z+1e-7)))
    return A

def ryan_sigmoid_back(dA, Z):
    temp = 1.0/(1 + np.exp(-1*Z))
    dZ = np.multiply(dA, np.multiply(temp, (np.subtract(np.ones(temp.shape), temp))))
    return dZ

def ryan_init_Z(A, W, s, p):

    (m, dim, _, _) = A.shape
    (f, _, _, c) = W.shape
    
    dim = int((dim+2*p-f)/s)+1
    
    Z = np.zeros((m, dim, dim, c))
    A1 = np.zeros((m, dim, dim, c))
    return Z, A1, dim, f
    
def ryan_convol_forward(A, W, b, s, p):
    """
    Implements the forward propagation for a convolution function
    
    Arguments:
    A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"
        
    Returns:
    Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward() function
    """
    
    Z, A1, dim, f = ryan_init_Z(A, W, s, p)
    A_pad = ryan_padding(A, [0,p,p,0])
    for i in range(Z.shape[0]):                                 # loop over the batch of training examples
        a_prev_pad = A_pad[i]                     # Select ith training example's padded activation
        for h in range(dim):                           # loop over vertical axis of the output volume
            for w in range(dim):                       # loop over horizontal axis of the output volume
                for c in range(Z.shape[3]):                   # loop over channels (= #filters) of the output volume
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h * s
                    vert_end = vert_start + f
                    horiz_start = w * s
                    horiz_end = horiz_start + f
                    # Use the corners to define the (3D) slice of a_prev_pad (See Hint above the cell). (≈1 line)
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron. (≈1 line)
                    Z[i, h, w, c] = ryan_convol_step(a_slice_prev, W[..., c], b[..., c])
                    A1[i, h, w, c] = ryan_relu(Z[i, h, w, c])
                    
    cache = [A, W, b, s, p, Z]
    
    return A1, cache

def ryan_pool_forward(A, s, f, mode = "max"):
    """
    Implements the forward pass of the pooling layer
    
    Arguments:
    A_prev -- Input data, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    hparameters -- python dictionary containing "f" and "stride"
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    A -- output of the pool layer, a numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters 
    """
    
  
#     (m, n_H_prev, n_W_prev, n_C_prev) = A.shape
    # Define the dimensions of the output
    dim = int(1 + (A.shape[1] - f) / s)
    
    
    # Initialize output matrix A
    A_pool = np.zeros((A.shape[0], dim, dim, A.shape[3]))

    
    ### START CODE HERE ###
    for i in range(A.shape[0]):                           # loop over the training examples
        for h in range(dim):                     # loop on the vertical axis of the output volume
            for w in range(dim):                 # loop on the horizontal axis of the output volume
                for c in range (A.shape[3]):            # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h * s
                    vert_end = vert_start + f
                    horiz_start = w * s
                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the current slice on the ith training example of A_prev, channel c. (≈1 line)
                    a_prev_slice = A[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    # Compute the pooling operation on the slice. Use an if statment to differentiate the modes. Use np.max/np.mean.
                    if mode == "max":
                        A_pool[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A_pool[i, h, w, c] = np.mean(a_prev_slice)
    
    ### END CODE HERE ###
    
    # Store the input and hparameters in "cache" for pool_backward()
    cache = (A, f, s)
        
    return A_pool, cache

def ryan_forward_all(X, params):
    A=X
    con_caches={}
    full_caches={}
    
    #l1
    A,cache=ryan_convol_forward(A,params['W' + str(1)],params['b' + str(1)], 2,1)
    A,cache1 = ryan_pool_forward(A, 1, 5, 'max')
    con_caches['con1'] = cache
    con_caches['pool1'] = cache1
    
    #l2
    A,cache=ryan_convol_forward(A,params['W' + str(2)],params['b' + str(2)], 2,0)
    A,cache1 = ryan_pool_forward(A, 1, 5, 'average')
    con_caches['con2'] = cache
    con_caches['pool2'] = cache1
    
    #flatten
    result = np.zeros((1020,1296))
    for i in range(A.shape[0]):
        result[i] = A[i].flatten()
    result = result.T
    
    #l3
    Z, cache = ryan_cal_Z(params['W' + str(3)], params['b' + str(3)], result)
    full_caches['full1'] = cache
    result = ryan_relu(Z)

    #l4
    Z, cache = ryan_cal_Z(params['W' + str(4)], params['b' + str(4)], result)
    full_caches['full2'] = cache
    result = ryan_sigmoid(Z)
    
    return result, con_caches, full_caches

In [4]:
X_train = np.load('ex5_train_x.npy')
X_train = X_train/255 - 0.5
Y_train = np.load('ex5_train_y.npy')

Y_onehot = ryan_onehot(Y_train, 0)

In [5]:
print(X_train.shape)
print(Y_onehot.shape)

(1020, 64, 64, 3)
(1020, 6)


In [6]:
params = ryan_initweight_convol(fil_size=[(4,4,3,8),(4,4,8,16)])
params = ryan_initweight([1296,108,6],params)

In [7]:
print(params['W1'].shape)
print(params['b1'].shape)
print("--------------")
print(params['W2'].shape)
print(params['b2'].shape)
print("---------------")
print(params['W3'].shape)
print(params['b3'].shape)
print("--------------")
print(params['W4'].shape)
print(params['b4'].shape)

(4, 4, 3, 8)
(1, 1, 1, 8)
--------------
(4, 4, 8, 16)
(1, 1, 1, 16)
---------------
(108, 1296)
(108, 1)
--------------
(6, 108)
(6, 1)


In [31]:
def ryan_update_params(params, grads, learning_rate):
    for i in range(int(len(params)//2)):
        params["W" + str(i + 1)] = params["W" + str(i + 1)] - learning_rate * grads["dW" + str(i + 1)]
        params["b" + str(i + 1)] = params["b" + str(i + 1)] - learning_rate * grads["db" + str(i + 1)]
        
    return params

def ryan_cal_grads(dZ, cache):
    m = cache[0].shape[1]
    dW = np.dot(dZ, cache[0].T)/m
    db = np.sum(dZ, axis=1, keepdims=True)/m
    dA = np.dot(cache[1].T, dZ)
    
    return dA, dW, db

def ryan_backward_all(lastA, Y, full_caches, con_caches):
    m = Y.shape[1]
    grads = {}
    temp1, temp2, temp3 = 0, 0, 0
    temp1 = np.divide(1 - Y, 1 - lastA) - np.divide(Y, lastA)
    
    #l4
    current_cache = full_caches['full2']
    dZ = ryan_sigmoid_back(temp1, current_cache[-1])
    temp1, temp2, temp3 = ryan_cal_grads(dZ, current_cache[0:-1])
    grads["dA4"] = temp1
    grads["dW4"] = temp2
    grads["db4"] = temp3

    #l3
    current_cache = full_caches['full1']
    dZ = ryan_relu_back(temp1, current_cache[-1])
    temp1, temp2, temp3 = ryan_cal_grads(dZ, current_cache[0:-1])
    grads["dA3"] = temp1
    grads["dW3"] = temp2
    grads["db3"] = temp3
    
    #l2
    temp1 = temp1.T.reshape((1020,9,9,16))
    dA_prev = ryan_pool_backward(temp1, con_caches['pool2'], 'average')
    dZ = ryan_relu_back(dA_prev, con_caches['con2'][-1])
    dA,dW,db = ryan_conv_backward(dZ, con_caches['con2'])
    grads["dA2"] = dA
    grads["dW2"] = dW
    grads["db2"] = db

    #l1
    print("-----------------------------")
    dA_prev = ryan_pool_backward(dA, con_caches['pool1'], 'max')
    dZ = ryan_relu_back(dA_prev, con_caches['con1'][-1])
    dA,dW,db = ryan_conv_backward(dZ, con_caches['con1'])
    grads["dA1"] = dA
    grads["dW1"] = dW
    grads["db1"] = db
    
    return grads

def ryan_conv_backward(dZ, cache):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward(), output of conv_forward()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev),
               numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, n_C_prev, n_C)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, n_C)
    """
    

    # Retrieve information from "cache"
    (A_prev, W, b, s, p, _) = cache
    
    # Retrieve dimensions from A_prev's shape
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    # Retrieve dimensions from W's shape
    (f, f, n_C_prev, n_C) = W.shape
    
    # Retrieve dimensions from dZ's shape
    (m, n_H, n_W, n_C) = dZ.shape
    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    # Pad A_prev and dA_prev
    A_prev_pad = ryan_padding(A_prev, [0,p,p,0])
    dA_prev_pad = ryan_padding(dA_prev, [0,p,p,0])

    
    for i in range(m):                       # loop over the training examples
        
        # select ith training example from A_prev_pad and dA_prev_pad
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    


        # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
#         print(da_prev_pad.shape)
        if(p==0):
            dA_prev[i, :, :, :] = da_prev_pad
        else:
            dA_prev[i, :, :, :] = da_prev_pad[p:-p, p:-p, :]
    
    return dA_prev, dW, db

In [13]:
def create_mask_from_window(x):
    """
    Creates a mask from an input matrix x, to identify the max entry of x.
    
    Arguments:
    x -- Array of shape (f, f)
    
    Returns:
    mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
    """
    
    ### START CODE HERE ### (≈1 line)
    mask = x == np.max(x)
    ### END CODE HERE ###
    
    return mask

def distribute_value(dz, shape):
    """
    Distributes the input value in the matrix of dimension shape
    
    Arguments:
    dz -- input scalar
    shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz
    
    Returns:
    a -- Array of size (n_H, n_W) for which we distributed the value of dz
    """
    
    ### START CODE HERE ###
    # Retrieve dimensions from shape (≈1 line)
    (n_H, n_W) = shape
    
    # Compute the value to distribute on the matrix (≈1 line)
    average = dz / (n_H * n_W)
    
    # Create a matrix where every entry is the "average" value (≈1 line)
    a = np.ones(shape) * average
    ### END CODE HERE ###
    
    return a

def ryan_pool_backward(dA, cache, mode = "max"):
    """
    Implements the backward pass of the pooling layer
    
    Arguments:
    dA -- gradient of cost with respect to the output of the pooling layer, same shape as A
    cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters 
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev
    """
    
    ### START CODE HERE ###
    
    # Retrieve information from cache (≈1 line)
    (A_prev, f, s) = cache
    
    # Retrieve dimensions from A_prev's shape and dA's shape (≈2 lines)
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    
    # Initialize dA_prev with zeros (≈1 line)
    dA_prev = np.zeros(A_prev.shape)
    
    for i in range(m):                       # loop over the training examples
        # select training example from A_prev (≈1 line)
        a_prev = A_prev[i]
        for h in range(n_H):                   # loop on the vertical axis
            for w in range(n_W):               # loop on the horizontal axis
                for c in range(n_C):           # loop over the channels (depth)
                    # Find the corners of the current "slice" (≈4 lines)
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                    # Compute the backward propagation in both modes.
                    if mode == "max":
                        # Use the corners and "c" to define the current slice from a_prev (≈1 line)
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        # Create the mask from a_prev_slice (≈1 line)
                        mask = create_mask_from_window(a_prev_slice)
                        # Set dA_prev to be dA_prev + (the mask multiplied by the correct entry of dA) (≈1 line)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
                        
                    elif mode == "average":
                        # Get the value a from dA (≈1 line)
                        da = dA[i, h, w, c]
                        # Define the shape of the filter as fxf (≈1 line)
                        shape = (f, f)
                        # Distribute it to get the correct slice of dA_prev. i.e. Add the distributed value of da. (≈1 line)
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
                            
    return dA_prev

In [None]:
def ryan_update_params(params, grads, learning_rate):
    for i in range(int(len(params)//2)):
        params["W" + str(i + 1)] = params["W" + str(i + 1)] - learning_rate * grads["dW" + str(i + 1)]
        params["b" + str(i + 1)] = params["b" + str(i + 1)] - learning_rate * grads["db" + str(i + 1)]
        
    return params

In [None]:
A, con_caches, full_caches = ryan_forward_all(X_train, params)
cost = ryan_compute_cost(A, Y_onehot.T)
print(cost)
grads = ryan_backward_all(A, Y_onehot.T, full_caches, con_caches)

print(grads['dA1'].shape)
print(grads['dW1'].shape)
print(grads['db1'].shape)

print(params['W1'].shape)
print(params['b1'].shape)

In [39]:
def ryan_update_params(params, grads, learning_rate):
    for i in range(int(len(params)//2)):
        params["W" + str(i + 1)] = params["W" + str(i + 1)] - learning_rate * grads["dW" + str(i + 1)]
        params["b" + str(i + 1)] = params["b" + str(i + 1)] - learning_rate * grads["db" + str(i + 1)]
            
    return params

In [38]:
params = ryan_update_params(params, grads, 0.1)

4


In [40]:
A, con_caches, full_caches = ryan_forward_all(X_train, params)
cost = ryan_compute_cost(A, Y_onehot.T)
print(cost)

nan




0.001