In [164]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py

In [165]:
x_train = np.load('ex5_train_x.npy')

In [166]:
x_train = (x_train / 255.)

In [167]:
y_train = np.load('ex5_train_y.npy')

In [168]:
type(y_train)

numpy.ndarray

In [169]:
def zero_pad(x, pad):
    x_pad = np.pad(x, ((0,0),(pad, pad), (pad, pad), (0,0)), 'constant', constant_values=0)
    return x_pad

In [170]:
np.random.seed(1)
x_pad = zero_pad(x_train, 2)

In [171]:
x_pad.shape, x_train.shape

((1020, 68, 68, 3), (1020, 64, 64, 3))

In [172]:
def conv_single_step(a_slice_prev, w, b):
    s = np.multiply(a_slice_prev, w) + b
    z = np.sum(s)
    return z

In [173]:
def conv_forward(A_prev, W, b, hparameters):
    
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    (f, f, n_C_prev, n_C) = W.shape

    
    stride = hparameters['stride']
    pad = hparameters['pad']
    
    
    n_H = int((n_H_prev - f + 2 * pad) / stride) + 1
    n_W = int((n_W_prev - f + 2 * pad) / stride) + 1
    
   
    Z = np.zeros((m, n_H, n_W, n_C))
    A = np.zeros((m, n_H, n_W, n_C))
    
    A_prev_pad = zero_pad(A_prev, pad)
    
    for i in range(m):                                 
        a_prev_pad = A_prev_pad[i]                     
        for h in range(n_H):                          
            for w in range(n_W):                       
                for c in range(n_C):                  
                   
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                   
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                   
                    Z[i, h, w, c] = conv_single_step(a_slice_prev, W[...,c], b[...,c])
                    A[i, h, w, c] = relu(Z[i, h, w, c])
    
    assert(Z.shape == (m, n_H, n_W, n_C))
    
   
    cache = (A_prev, W, b, hparameters)
    
    return A, cache

In [174]:
def pool_forward(A_prev, hparameters, mode="max"):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    f = hparameters["f"]
    stride = hparameters["stride"]
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    A = np.zeros((m, n_H, n_W, n_C))
    
    for i in range(m):
        for h in range(n_H):
            for w in range(n_W):
                for c in range (n_C):
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_prev_slice)
    cache = (A_prev, hparameters)
    assert(A.shape == (m, n_H, n_W, n_C))
    return A, cache

In [175]:
def conv_backward(dZ, cache):
    
    (A_prev, W, b, hparameters) = cache
    
   
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
   
    (f, f, n_C_prev, n_C) = W.shape
    
   
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    
   
    (m, n_H, n_W, n_C) = dZ.shape
    
    
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                      
        
        
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                  
            for w in range(n_W):               
                for c in range(n_C):           
                    
                   
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                   
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
    
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    
    return dA_prev, dW, db

In [176]:
def create_mask(x):
   
    mask = x == np.max(x)
   
    
    return mask

In [177]:
def distribute_value(dz, shape):
    
    (n_H, n_W) = shape
    
    
    average = dz / (n_H * n_W)
    
    
    a = np.ones(shape) * average
    
    
    return a

In [178]:
def pool_backward(dA, cache, mode = "max"):
    (A_prev, hparameters) = cache
    
    stride = hparameters["stride"]
    f = hparameters["f"]
    
    
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    m, n_H, n_W, n_C = dA.shape
    
    
    dA_prev = np.zeros(A_prev.shape)
    
    for i in range(m):                       
        a_prev = A_prev[i]
        for h in range(n_H):                  
            for w in range(n_W):               
                for c in range(n_C):           
                    
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                    
                    if mode == "max":
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                       
                        mask = create_mask(a_prev_slice)
                        
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
                        
                    elif mode == "average":
                       
                        da = dA[i, h, w, c]
                        
                        shape = (f, f)
                    
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
                        
    
    assert(dA_prev.shape == A_prev.shape)
    
    return dA_prev

In [179]:
def one_hot_encoding(mat):
    mat = mat.as_matrix()
    temp = []
    for val in mat:
        if val not in temp:
            temp.append(val)
    temp.sort()
    
    result = np.zeros(shape=(len(mat),len(temp)))
    for key, val in enumerate(mat):
        result[key][temp.index(val)] = 1
    return result

In [180]:
y_train_encoded = one_hot_encoding(pd.DataFrame(y_train))

In [181]:
def sigmoid(z):
    A = 1. / (1 + np.exp(-z))
#     z_temp = z
    return A

In [182]:
def relu(x):
    A = np.maximum(0,x)
    return A

In [183]:
def initialize_parameters():
   
    np.random.seed(1) 
    
    

    W1 = np.random.randn(4, 4, 3, 8) * 0.01
    b1 = np.random.randn(1, 1, 1, 8) * 0.01
    W2 = np.random.randn(4, 4, 8, 16) * 0.01
    b2 = np.random.randn(1, 1, 1, 16) * 0.01
    W4 = np.random.randn(108, 64)
    b4 = np.random.randn(108, 1)
    W5 = np.random.randn(6, 108)
    b5 = np.random.randn(6, 1)

    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2, "W4":W4, "b4":b4, "W5": W5, "b5":b5}
    
    return parameters

In [184]:
def calculate_cost(A5, Y):
    
    m = Y.shape[1]
#     W1 = parameters['W1']
#     W2 = parameters['W2']
    
    #A5 = np.matrix(A5)
    #print A2
#     logprobs = Y.dot(np.log(A2).T) + np.multiply((1 - Y), np.log(1 - A2))
#     cost = - np.sum(logprobs) / m
    logprobs = np.multiply(np.log(A5), Y) + np.multiply((1 - Y), np.log(1 - A5))
    cost = - np.nansum(logprobs) / m
    
    cost = np.squeeze(cost) 
    #print type(cost)
    #print cost
    assert(isinstance(cost, float))
    return cost

In [185]:
def forward_propagation(x, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W4 = parameters['W4']
    b4 = parameters['b4']
    W5 = parameters['W5']
    b5 = parameters['b5']
    hparameters = {"pad" : 1,
               "stride": 2, "f":4}
    C1, cache = conv_forward(x, W1, b1, hparameters)
#     print Z1
#     A1 = relu(Z1)
    print "C1 shape: ",C1.shape
    hparameters1 = {"stride" : 1,
                   "f": 5, "pad":0}
    P1, cache1= pool_forward(C1, hparameter, mode='max')
    print "P1 shape: ",P1.shape
    hparameters2 = {"pad" : 1,
                   "stride": 2, "f":4}
    C2, cache2 = conv_forward(P1, W2, b2, hparameters)
    print "C2 shape: ",C2.shape
    hparameters3 = {"stride" : 1,
                   "f": 5, "pad": 0}
    P2, cache3 = pool_forward(C2, hparameters, mode="max")
    print "P2 shape:",P2.shape
    F3 = P2.reshape(P2.shape[0], -1)
    F3 = F3.T
    print 'F3 shape: ',F3.shape
#     W4 = np.random.randn(A3.shape[1], 108) * 0.01
#     b4 = np.random.randn(A3.shape[1], 1) * 0.01
    Z4 = W4.dot(F3) + b4
    F4 = relu(Z4)
    Z5 = W5.dot(F4) + b5
    F5 = sigmoid(Z5)
#     cache4 = {'F3': F3, 'Z4':Z4, 'F4':F4, 'Z5':Z5, 'F5':F5}
    return F5

In [186]:
def update_parameters(parameters, gradients, learning_rate):
    
    dW1 = gradients['dW1']
    db1 = gradients['db1']
    dW2 = gradients['dW2']
    db2 = gradients['db2']
    dW4 = gradients['dW4']
    db4 = gradients['db4']
    dW5 = gradients['dW5']
    db5 = gradients['db5']
    
    parameters["W1"] = parameters["W1"] - learning_rate * dW1
    parameters["b1"] = parameters["b1"] - learning_rate * db1
    parameters["W2"] = parameters["W2"] - learning_rate * dW2
    parameters["b2"] = parameters["b2"] - learning_rate * db2
    parameters["W4"] = parameters["W4"] - learning_rate * dW4
    parameters["b4"] = parameters["b4"] - learning_rate * db4
    parameters["W5"] = parameters["W5"] - learning_rate * dW5
    parameters["b5"] = parameters["b5"] - learning_rate * db5
    return parameters

In [187]:
def model(x, y, learning_rate = 0.01, num_iterations=10, print_cost=False):
    parameters = initialize_parameters()
#     W1 = parameters['W1']
#     b1 = parameters['b1']
#     W2 = parameters['W2']
#     b2 = parameters['b2']
#     W4 = parameters['W4']
#     b4 = parameters['b4']
#     W5 = parameters['W5']
#     b5 = parameters['b5']
    for i in range(0, num_iterations):
        W1 = parameters['W1']
        b1 = parameters['b1']
        W2 = parameters['W2']
        b2 = parameters['b2']
        W4 = parameters['W4']
        b4 = parameters['b4']
        W5 = parameters['W5']
        b5 = parameters['b5']
        hparameters = {"pad" : 1,
               "stride": 2, "f":4}
        C1, cache = conv_forward(x, W1, b1, hparameters)
        print "C1 shape: ",C1.shape
        hparameters1 = {"stride" : 1,
                       "f": 5, "pad":0}
        P1, cache1= pool_forward(C1, hparameters)
        print "P1 shape: ",P1.shape
        hparameters2 = {"pad" : 1,
                       "stride": 2, "f":4}
        C2, cache2 = conv_forward(P1, W2, b2, hparameters)
        print "C2 shape: ",C2.shape
        hparameters3 = {"stride" : 1,
                       "f": 5, "pad": 0}
        P2, cache3 = pool_forward(C2, hparameters, mode="average")
        print "P2 shape:",P2.shape
        F3 = P2.reshape(P2.shape[0], -1)
        print "F3.shape: ",F3.shape
        F3 = F3.T
           
        Z4 = W4.dot(F3) + b4
        F4 = relu(Z4)
        print "F4 shape", F4.shape
        Z5 = W5.dot(F4) + b5
        F5 = sigmoid(Z5)
        print "F5 shape", F5.shape
        cost = calculate_cost(F5, y)
        m = x.shape[-1]
        W4 = parameters['W4']
       
        dZ5 = np.array(F5 - y)
        dW5 = (1.0/m) * np.dot(dZ5, F4.T)
        db5 = (1.0 / m) * np.sum(dZ5, axis=1, keepdims=True)
        dZ4 = np.multiply(np.dot(W5.T, dZ5), F4 * (1 - F4))
        dZ4 = np.array(dZ4)
#         dW4 = (1.0 / m) * np.dot(dZ4, F3.T)
#         db4 = (1.0 / m) * np.sum(dZ4, axis=1, keepdims=True)
        dW4 = (1. / m) * np.dot(dZ4, F3.T)
        dZ4 = np.array(dZ4)
        db4 = 1. / m * np.sum(dZ4, axis=1, keepdims=True)
        dA3 = np.dot(W4.T, dZ4)
        dA3 = dA3.T
        print "dA3 shape: ", dA3.shape
        dP2 = dA3.reshape((1020, 2, 2, 16))
        dA2 = pool_backward(dP2, cache3, mode='average')
        dZ2 = np.multiply(dA2, np.int64(C2 > 0))
        print "dZ2 shape:",dZ2.shape
        dP1, dW2, db2 = conv_backward(C2, cache2)
        dA1 = pool_backward(dP1, cache1, mode='max')
        print "dA1 shape: ",dA1.shape
        dZ1 = np.multiply(dA1, np.int64(C1 > 0))
        x, dW1, db1 = conv_backward(C1, cache)
        print "Done conv_backward 1"
        gradients = {"dW2": dW2, "db2": db2, "dW1": dW1, "db1": db1, "dW4": dW4, "dW5":dW5, "db4":db4, "db5":db5}

        parameters = update_parameters(parameters, gradients, learning_rate)
        if print_cost and i % 1 == 0:
            print ("Cost after %i iterations: %f" % (i, cost))
            
    return parameters

In [188]:
parameters = model(x_train, y_train_encoded.T, learning_rate = 0.1, num_iterations=3, print_cost=True)

C1 shape:  (1020, 32, 32, 8)
P1 shape:  (1020, 15, 15, 8)
C2 shape:  (1020, 7, 7, 16)
P2 shape: (1020, 2, 2, 16)
F3.shape:  (1020, 64)
F4 shape (108, 1020)
F5 shape (6, 1020)
dA3 shape:  (1020, 64)
dZ2 shape: (1020, 7, 7, 16)
dA1 shape:  (1020, 32, 32, 8)
Done conv_backward 1
Cost after 0 iterations: 40.325467
C1 shape:  (1020, 32, 32, 8)
P1 shape:  (1020, 15, 15, 8)
C2 shape:  (1020, 7, 7, 16)
P2 shape: (1020, 2, 2, 16)
F3.shape:  (1020, 64)
F4 shape (108, 1020)
F5 shape (6, 1020)
dA3 shape:  (1020, 64)


  
  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()


dZ2 shape: (1020, 7, 7, 16)
dA1 shape:  (1020, 32, 32, 8)
Done conv_backward 1
Cost after 1 iterations: inf
C1 shape:  (1020, 32, 32, 8)
P1 shape:  (1020, 15, 15, 8)
C2 shape:  (1020, 7, 7, 16)
P2 shape: (1020, 2, 2, 16)
F3.shape:  (1020, 64)
F4 shape (108, 1020)
F5 shape (6, 1020)
dA3 shape:  (1020, 64)
dZ2 shape: (1020, 7, 7, 16)
dA1 shape:  (1020, 32, 32, 8)
Done conv_backward 1
Cost after 2 iterations: inf


In [189]:
f = h5py.File('./' + "para_Tri_Ninh.hdf5", "w")
dataset = f.create_dataset("W5", data=parameters['W5'])
dataset = f.create_dataset("b5", data=parameters['b5'])
dataset = f.create_dataset("W4", data=parameters['W4'])
dataset = f.create_dataset("b4", data=parameters['b4'])
dataset = f.create_dataset("W2", data=parameters['W2'])
dataset = f.create_dataset("b2", data=parameters['b2'])
dataset = f.create_dataset("W1", data=parameters['W1'])
dataset = f.create_dataset("b1", data=parameters['b1'])
f.close()