In [1]:
import numpy as np
import h5py
import math

In [2]:
#old stuff
def ryan_update_params(params, grads, learning_rate):
    for i in range(int(len(params)//2)):
        params["W" + str(i + 1)] = params["W" + str(i + 1)] - learning_rate * grads["dW" + str(i + 1)]
        params["b" + str(i + 1)] = params["b" + str(i + 1)] - learning_rate * grads["db" + str(i + 1)]
            
    return params

def ryan_cal_grads(dZ, cache):
    m = cache[0].shape[1]
    dW = np.dot(dZ, cache[0].T)/m
    db = np.sum(dZ, axis=1, keepdims=True)/m
    dA = np.dot(cache[1].T, dZ)
    
    return dA, dW, db

def ryan_cal_Z(W,b,A):
    Z = np.dot(W, A) + b
    cache = [A,W,b,Z]
    return Z, cache
        
def ryan_initweight(topo, params):
    np.random.seed(1)
    temp = int(len(params)//2)
    for i in range(1, len(topo)):
        params['W' + str(i+temp)] = 0.01*np.random.randn(topo[i], topo[i-1])
        params['b' + str(i+temp)] = 0.01*np.zeros(shape=(topo[i], 1))
    return params
        
def ryan_onehot(labels, df=1):
    if df == 1:
        labels = labels.as_matrix()
    temp = []
    for val in labels:
        if val not in temp:
            temp.append(val)
    temp.sort()
    
    result = np.zeros(shape=(len(labels),len(temp)))
    for key, val in enumerate(labels):
        result[key][temp.index(val)] = 1
    return result
    
def ryan_compute_cost(lastA, Y):
    t1 = np.multiply(Y, np.log(lastA))
    t2 = np.multiply(1.0 - Y, np.log(1.0 - lastA))
    cost = (-1.0/Y.shape[1]) * np.sum(t1 + t2)
    return cost

# activation functions
def ryan_relu(Z):
    A = np.maximum(0, Z)
    return A

def ryan_relu_back(dA, Z):
    t = Z >= 0
    dZ = np.multiply(dA, t)
    return dZ

def ryan_sigmoid(Z):
    A = 1.0/(1 + np.exp(-1.0*Z))
    return A

def ryan_sigmoid_back(dA, Z):
    temp = 1.0/(1 + np.exp(-1*Z))
    dZ = np.multiply(dA, np.multiply(temp, (np.subtract(np.ones(temp.shape), temp))))
    return dZ

#new stuffs
def ryan_initweight_convol(fil_size):
    np.random.seed(1)
    params ={}
    temp = 2/math.sqrt(1020+6)
    for i in range(1, len(fil_size)+1):
        params['W' + str(i)] = temp*np.random.randn(fil_size[i-1][0],fil_size[i-1][1],fil_size[i-1][2],fil_size[i-1][3])
        params['b' + str(i)] = temp*np.random.randn(1,1,1,fil_size[i-1][3])
        
    return params

def ryan_save_model(params, path):
    f = h5py.File(path,'w')
    for key, value in params.items():
        f.create_dataset(key, data=value)
    f.close()
    return f

def ryan_load_model(path):
    dataset = h5py.File(path,'r')
    params = {}
    for i in dataset.keys():
        params[i] = np.array(dataset[i])
    
    dataset.close()
    return params

def ryan_padding(X, pads):
    result = np.pad(X,((pads[0],pads[0]),(pads[1],pads[1]),(pads[2],pads[2]),(pads[3],pads[3])),'constant');
    return result

def ryan_convol_step(one_slice, W, b):
    Z = np.multiply(one_slice, W) + b
    Z = np.sum(Z)
    return Z

# convol forward
def ryan_init_Z(A, W, s, p):

    (m, dim, _, _) = A.shape
    (f, _, _, c) = W.shape
    # init output dimensions
    dim = int((dim+2*p-f)/s)+1
    Z = np.zeros((m, dim, dim, c))
    A1 = np.zeros((m, dim, dim, c))
    return Z, A1, dim, f
    
def ryan_convol_forward(A, W, b, s, p):

    Z, A1, dim, f = ryan_init_Z(A, W, s, p)
    A_pad = ryan_padding(A, [0,p,p,0])
    
    for i in range(Z.shape[0]):
        a_prev_pad = A_pad[i]
        for h in range(dim):
            for w in range(dim):
                for c in range(Z.shape[3]):

                    vert_start = h * s
                    vert_end = vert_start + f
                    horiz_start = w * s
                    horiz_end = horiz_start + f

                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    Z[i, h, w, c] = ryan_convol_step(a_slice_prev, W[:,:,:,c], b[:,:,:,c])
                    A1[i, h, w, c] = ryan_relu(Z[i, h, w, c])
                    
    cache = [A, W, b, s, p, Z]
    return A1, cache

#pool forward
def ryan_pool_forward(A, s, f, pool = "max"):

    dim = int(1 + (A.shape[1] - f) / s)
    A_pool = np.zeros((A.shape[0], dim, dim, A.shape[3]))
    
    for i in range(A.shape[0]):
        for h in range(dim):
            for w in range(dim):
                for c in range (A.shape[3]):
                    
                    vert_start = h * s
                    vert_end = vert_start + f
                    horiz_start = w * s
                    horiz_end = horiz_start + f
                    
                    a_prev_slice = A[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    
                    if pool == "max":
                        A_pool[i, h, w, c] = np.max(a_prev_slice)
                    elif pool == "average":
                        A_pool[i, h, w, c] = np.mean(a_prev_slice)

    cache = (A, f, s)
    return A_pool, cache

def ryan_forward_all(X, params):
    A=X
    con_caches={}
    full_caches={}
    
    #l1
    A,cache=ryan_convol_forward(A,params['W' + str(1)],params['b' + str(1)], 2,1)
    A,cache1 = ryan_pool_forward(A, 1, 5, 'max')
    con_caches['con1'] = cache
    con_caches['pool1'] = cache1
    
    #l2
    A,cache=ryan_convol_forward(A,params['W' + str(2)],params['b' + str(2)], 2,0)
    A,cache1 = ryan_pool_forward(A, 1, 5, 'average')
    con_caches['con2'] = cache
    con_caches['pool2'] = cache1
    
    #flatten
    result = np.zeros((1020,1296))
    for i in range(A.shape[0]):
        result[i] = A[i].flatten()
    result = result.T
    
    #l3
    Z, cache = ryan_cal_Z(params['W' + str(3)], params['b' + str(3)], result)
    full_caches['full1'] = cache
    result = ryan_relu(Z)

    #l4
    Z, cache = ryan_cal_Z(params['W' + str(4)], params['b' + str(4)], result)
    full_caches['full2'] = cache
    result = ryan_sigmoid(Z)
    
    return result, con_caches, full_caches

#pool backward
def get_da_max(a_slice, da):
    
    mask = a_slice == np.max(a_slice)
    result = np.multiply(mask, da)
    return result

def get_da_avg(dz, shape):
    
    (n_H, n_W) = shape
    result = dz*np.ones(shape)/(n_H * n_W)
    return result

def ryan_pool_backward(dA, cache, pool = "max"):

    (A_prev, f, s) = cache
    m, n_H, n_W, n_C = dA.shape
    
    dA_prev = np.zeros(A_prev.shape)
    
    for i in range(m):

        a_prev = A_prev[i]
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
 
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f
                    
                    if pool == "max":
                        a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += get_da_max(a_prev_slice, dA[i, h, w, c])
                        
                    elif pool == "average":
                        dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += get_da_avg(dA[i, h, w, c], (f,f))
                            
    return dA_prev

#convol backward
def ryan_conv_backward(dZ, cache):
    
    (A_prev, W, _, s, p, _) = cache
    (m, n_H, n_W, n_C) = dZ.shape
    
    dA_prev = np.zeros(A_prev.shape)
    dW = np.zeros(W.shape)
    db = np.zeros((1, 1, 1, W.shape[3]))

    A_prev_pad = ryan_padding(A_prev, [0,p,p,0])
    dA_prev_pad = ryan_padding(dA_prev, [0,p,p,0])
    
    for i in range(m):
        current_a = A_prev_pad[i]
        current_da = dA_prev_pad[i]
        
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    
                    vert_start = h
                    vert_end = vert_start + W.shape[0]
                    horiz_start = w
                    horiz_end = horiz_start + W.shape[0]
                    
                    a_slice = current_a[vert_start:vert_end, horiz_start:horiz_end, :]

                    current_da[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        if(p==0):
            dA_prev[i, :, :, :] = current_da
        else:
            dA_prev[i, :, :, :] = current_da[p:-p, p:-p, :]
    
    return dA_prev, dW, db

def ryan_backward_all(lastA, Y, full_caches, con_caches):
    m = Y.shape[1]
    grads = {}
    temp1, temp2, temp3 = 0, 0, 0
    temp1 = np.divide(1 - Y, 1 - lastA) - np.divide(Y, lastA)
    
    #l4
    current_cache = full_caches['full2']
    dZ = ryan_sigmoid_back(temp1, current_cache[-1])
    temp1, temp2, temp3 = ryan_cal_grads(dZ, current_cache[0:-1])
    grads["dA4"] = temp1
    grads["dW4"] = temp2
    grads["db4"] = temp3

    #l3
    current_cache = full_caches['full1']
    dZ = ryan_relu_back(temp1, current_cache[-1])
    temp1, temp2, temp3 = ryan_cal_grads(dZ, current_cache[0:-1])
    grads["dA3"] = temp1
    grads["dW3"] = temp2
    grads["db3"] = temp3
    
    #un-flatten
    temp1 = temp1.T.reshape((1020,9,9,16))
    
    #l2
    dA_prev = ryan_pool_backward(temp1, con_caches['pool2'], 'average')
    dZ = ryan_relu_back(dA_prev, con_caches['con2'][-1])
    dA,dW,db = ryan_conv_backward(dZ, con_caches['con2'])
    grads["dA2"] = dA
    grads["dW2"] = dW
    grads["db2"] = db

    #l1
    dA_prev = ryan_pool_backward(dA, con_caches['pool1'], 'max')
    dZ = ryan_relu_back(dA_prev, con_caches['con1'][-1])
    dA,dW,db = ryan_conv_backward(dZ, con_caches['con1'])
    grads["dA1"] = dA
    grads["dW1"] = dW
    grads["db1"] = db
    
    return grads

def ryan_gd(X_train, Y_onehot, learning_rate, iterations, path='none'):
    if path == 'none':
        params = ryan_initweight_convol(fil_size=[(4,4,3,8),(4,4,8,16)])
        params = ryan_initweight([1296,108,6],params)
    else:
        params = ryan_load_model(path)
        
    for i in range(iterations):
        A, con_caches, full_caches = ryan_forward_all(X_train, params)
        cost = ryan_compute_cost(A, Y_onehot.T)
        print("Cost: ", cost)
        grads = ryan_backward_all(A, Y_onehot.T, full_caches, con_caches)
        params = ryan_update_params(params, grads, learning_rate)
    return params, con_caches, full_caches

In [3]:
X_train = np.load('ex5_train_x.npy')
X_train = X_train/255 - 0.5
Y_train = np.load('ex5_train_y.npy')
Y_onehot = ryan_onehot(Y_train, 0)

In [4]:
# input last parameter to 'none' if you wish to train from the begining
params, con_caches, full_caches = ryan_gd(X_train, Y_onehot, 1, 1, './para_Trung_Huynh')

Cost:  2.70327036482


In [5]:
print('______________________________________________________________________________________')
print('|layer|', 'type    |','size    |','channel    |','kernel    |','stride    |','padding    |','function|')
print('|     |', '        |','        |','           |','          |','          |','           |','        |')
print('|  0  |', 'input   |','64x64   |','3          |','          |','          |','           |','        |')
print('|  1  |', 'C1      |','32x32   |','8          |','4x4       |','2         |','1          |','ReLU    |')
print('|  1  |', 'P1      |','28x28   |','8          |','5x5       |','1         |','0          |','Max     |')
print('|  2  |', 'C2      |','13x13   |','16         |','4x4       |','2         |','0          |','ReLU    |')
print('|  2  |', 'P2      |','9x9     |','16         |','5x5       |','1         |','0          |','Avg     |')
print('|  3  |', 'F3      |','1296    |','           |','          |','          |','           |','        |')
print('|  4  |', 'F4      |','108     |','           |','          |','          |','           |','ReLU    |')
print('|  5  |', 'F5      |','6       |','           |','          |','          |','           |','Sigmoid |')
print('______________________________________________________________________________________')

______________________________________________________________________________________
|layer| type    | size    | channel    | kernel    | stride    | padding    | function|
|     |         |         |            |           |           |            |         |
|  0  | input   | 64x64   | 3          |           |           |            |         |
|  1  | C1      | 32x32   | 8          | 4x4       | 2         | 1          | ReLU    |
|  1  | P1      | 28x28   | 8          | 5x5       | 1         | 0          | Max     |
|  2  | C2      | 13x13   | 16         | 4x4       | 2         | 0          | ReLU    |
|  2  | P2      | 9x9     | 16         | 5x5       | 1         | 0          | Avg     |
|  3  | F3      | 1296    |            |           |           |            |         |
|  4  | F4      | 108     |            |           |           |            | ReLU    |
|  5  | F5      | 6       |            |           |           |            | Sigmoid |
_________________________________

In [6]:
print(con_caches['con1'][0].shape)
print(con_caches['pool1'][0].shape)
print(con_caches['con2'][0].shape)
print(con_caches['pool2'][0].shape)
print((1020, 9, 9, 16))
print(full_caches['full1'][0].T.shape)
print(full_caches['full2'][0].T.shape)


(1020, 64, 64, 3)
(1020, 32, 32, 8)
(1020, 28, 28, 8)
(1020, 13, 13, 16)
(1020, 9, 9, 16)
(1020, 1296)
(1020, 108)


In [7]:
print(params['W1'].shape)
print(params['b1'].shape)
print(params['W2'].shape)
print(params['b2'].shape)
print(params['W3'].shape)
print(params['b3'].shape)
print(params['W4'].shape)
print(params['b4'].shape)


(4, 4, 3, 8)
(1, 1, 1, 8)
(4, 4, 8, 16)
(1, 1, 1, 16)
(108, 1296)
(108, 1)
(6, 108)
(6, 1)


In [8]:
# ryan_save_model(params, './para_Trung_Huynh')