# Build my own deep learning functions

## Purpose

Learn by practicing. Notations follow Andrew Ng's Coursera deep learning course.

### Build the forward_prop function
Note: If there is error while importing python modules while running this notebook in vscode, make sure the both the vscode python interpreter and ipython kernal are both set properly. 

### Model structure
The model consist of L-1 relu layers and one sigmoid layer.

In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(1) 
# TODO: with certain seeds, e.g. seed=1, the cost generates NAN


In [2]:
# Below function taken from course assignments
def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [3]:
def relu(Z):
    '''
    Z: Input to the activate function

    A: Output of the relu function
    '''
    A = np.maximum(0, Z)
    assert(np.min(A) >= 0.0)
    return A


In [4]:
def forward_prop(X, Y, parameters):
    '''
    # TODO: What's the row # variable?
    X: Input data. (n0???, m). n0: feature #; m: # of examples
    Y: Labels. (1, m)
    parameters: The model parameters

    cost: The return
    '''
    L = len(parameters) // 2
    Xl = X
    m = Y.shape[1]
    for l in range(1, L):
        # print('l=', l)
        W = parameters['W'+str(l)]
        b = parameters['b'+str(l)]

        assert(W.shape[0] == b.shape[0])
        Z = np.dot(W, Xl) + b
        A = relu(Z)
        Xl = A
    ZL = np.dot(parameters['W'+str(L)], Xl) + parameters['b'+str(L)]
    AL = 1 / (1 + np.exp(-ZL)) # sigmoid
    # print('ZL=', ZL)
    # print('AL=', AL)
    print('### debug ###: AL.max(), AL.min()=', AL.max(), AL.min())

    J = - np.dot(Y, np.log(AL).T) - np.dot(1 - Y, np.log(1 - AL).T)
    cost = np.squeeze(np.sum(J)) / m
    assert(cost.shape == ())

    return cost


In [5]:
# load data and pre-processing
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_data()
# print('train_set_x_orig.shape=', train_set_x_orig.shape)

# plt.imshow(train_set_x_orig[7])
# plt.show()

train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

# print('train_set_x_flatten.shape=', train_set_x_flatten.shape)
# print('test_set_x_flatten.shape=', test_set_x_flatten.shape)
# print(train_set_x_flatten)
# print(train_set_x_flatten.max())

train_set_x = train_set_x_flatten / 255.0


### Lesson learned
When the weights are not initailized small, aka, without *0.01, the cost computation gives lots of NAN because the output are either too small or too large.

In [None]:
layer_dims = [train_set_x.shape[0], 8, 4, 1]
parameters = {}
for i in range(1, len(layer_dims)):
    parameters['W'+str(i)] = np.random.rand(layer_dims[i], layer_dims[i-1]) * 0.01 # the down-scaling is important
    parameters['b'+str(i)] = np.zeros((layer_dims[i], 1))
    # print(parameters['W'+str(i)].shape, parameters['b'+str(i)].shape)

tmp = forward_prop(train_set_x, train_set_y_orig, parameters)
# print('len(train_set_y_orig)=', len(train_set_y_orig))
# print('train_set_y_orig.shape', train_set_y_orig.shape)
print(tmp)
