In [6]:
import numpy as np
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s
def tanh(x):
    s = (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
    return s
def sigmoid_grad(x):
    s = sigmoid(x)
    s_grad = s * (1-s)
    return s_grad

In [7]:
import h5py
train_path = 'data/train_catvnoncat.h5'
test_path = 'data/test_catvnoncat.h5'

def load_data(train_path, test_path):
    train_dataset = h5py.File(train_path,'r')
    train_set_X = np.array(train_dataset['train_set_x'][:])
    train_set_Y = np.array(train_dataset['train_set_y'][:])

    test_dataset = h5py.File(test_path,'r')
    test_set_X = np.array(test_dataset['test_set_x'][:])
    test_set_Y = np.array(test_dataset['test_set_y'][:])
    classes = np.array(test_dataset['list_classes'][:])
    return train_set_X, train_set_Y, test_set_X, test_set_Y, classes

train_set_X, train_set_Y, test_set_X, test_set_Y, classes = load_data(train_path, test_path)
print (train_set_X.shape)

# reshape data to vector
def reshape_data(x_dataset, y_dataset):
    x_dataset_reshape = x_dataset.reshape((x_dataset.shape[1] * x_dataset.shape[2] * x_dataset.shape[3],x_dataset.shape[0]))
    y_dataset_reshape = y_dataset.reshape((1,y_dataset.shape[0]))
    return x_dataset_reshape, y_dataset_reshape

# Run the function to check the errors
train_set_X_reshape, train_set_Y_reshape = reshape_data(train_set_X, train_set_Y)
test_set_X_reshape, test_set_Y_reshape = reshape_data(test_set_X,test_set_Y)
print('Train x dataset: ' + (str(train_set_X_reshape.shape)))
print('Train y dataset: ' + (str(train_set_Y_reshape.shape)))
print('Test x dataset: ' + (str(test_set_X_reshape.shape)))
print('Test y dataset: ' + (str(test_set_Y_reshape.shape)))

(209, 64, 64, 3)
Train x dataset: (12288, 209)
Train y dataset: (1, 209)
Test x dataset: (12288, 50)
Test y dataset: (1, 50)


**Exercise 1:** Define three variables: n_x, n_h, n_y are the size of input layer, the size of
hidden layer and the size of the output layer, respective. In this exercise, we hardly set
the hidden layer size to be 4 (n_h=4)

In [9]:
#give the input parameters of model
def layer_sizes(X,Y,hidden_nodes = 4):
    n_x = X.shape[0]
    n_h = hidden_nodes
    n_y = Y.shape[0]
    return (n_x, n_h, n_y) 

# test the function:
input_size, hidden_nodes, output_size = layer_sizes(train_set_X_reshape,train_set_Y_reshape, hidden_nodes = 4)
print('Size of input: %s' %(input_size))
print('Hidden nodes: %s' %(hidden_nodes))
print('Size of output: %s' %(output_size))

Size of input: 12288
Hidden nodes: 4
Size of output: 1


**Exercise 2:** Implement *initialize_parameters()* function. You have to initialize w and
b parameters as random matrix using numpy library (not zeros)

In [10]:
#Initialzie the parameters values by using random value (instead of 0 values)

def initialize_parameters(n_x, n_h,n_y):
    np.random.seed(2)
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h) * 0.01
    b2 = np.zeros((n_y,1))
    params = {'W1': W1, 'b1':b1, 'W2':W2, 'b2':b2}
    return params

**Exercise 3:** Implement the *forward_propagation()* function.

In [11]:
def forward_propagation(X,params):
    # Load the parameters
    W1 = params['W1']
    W2 = params['W2']
    b1 = params['b1']
    b2 = params['b2']
    
    # Computing the values for the first layer
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    
    # Computing for the second layer
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {'Z1':Z1, 'A1':A1, 'Z2':Z2, 'A2':A2}
    return A2,cache

**Exercise 4:** Implement the *compute_cost()* function to compute the cost value 𝐽.

In [12]:
def compute_cost(A2, Y, params):
    m = Y.shape[1] # number of examples
    log_probs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1-A2), (1-Y))
    cost = - np.sum(log_probs)/m
    cost = np.squeeze(cost) #Remove single-dimensional entries from the shape of an array.
    return cost

**Exercise 5:** Implement the *backward_propagation()* function

In [15]:
def backward_propagation(X,Y, params, cache):
    m = X.shape[1] # number of examples
    W1, W2 = params['W1'], params['W2']
    A1, A2 = cache['A1'], cache['A2']
    
    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2, axis = 1, keepdims = True)/m
    d_gz = 1 - np.power(A1,2)
    dZ1 = np.dot(W2.T, dZ2) * d_gz
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1, axis = 1, keepdims = True)/m
    
    grads = {'dW1':dW1, 'db1':db1, 'dW2':dW2, 'db2':db2}
    return grads

**Exercise 6:** Implement the *update_rule()* function to update the parameters of your
network. This functions have to use (dW1, db1, dW2, db2) in order to update the value
of (W1, b1, W2, b2)

In [16]:
def update_rule(params, grads, learning_rate):
    W1, W2 = params['W1'], params['W2']
    b1, b2 = params['b1'], params['b2']
    dW1, dW2 = grads['dW1'], grads['dW2']
    db1, db2 = grads['db1'], grads['db2']
    
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    params = {'W1': W1, 'W2':W2, 'b1':b1, 'b2':b2}
    return params

**Exercise 7:** Merge together the functions that you have implemented for your network
into *nn_one_hidden_model()* function.

In [20]:
# Merge all step into a function

def nn_one_hidden_model(X,Y, hidden_nodes, learning_rate, num_iteration = 2000):
    np.random.seed(3)
    # get the number of input size, hidden nodes, output size
    n_x = layer_sizes(X,Y, hidden_nodes)[0]
    n_y = layer_sizes(X,Y,hidden_nodes)[2]
    
    # initialize the parameters of layers by using (input size, hidden nodes, output size)
    params = initialize_parameters(n_x, hidden_nodes, n_y)
    W1, W2 = params['W1'], params['W2']
    b1, b2 = params['b1'], params['b2']
    
    # begin to train in num_iteration round
    for i in range(0,num_iteration):
        # calculatge the forward direction
        A2, cache = forward_propagation(X, params)
        # compute the cost
        cost = compute_cost(A2, Y, params)
        # calculate the backward direction (derivatives of w(i) and b(i))
        grads = backward_propagation(X, Y, params, cache)
        # update the parameters (w1, b1, w2, b2)
        params = update_rule(params, grads, learning_rate)
        
        if i% 500 == 0:
            print('Cost after iteration %i: %f' %(i, cost))
    return params

In [21]:
# Traing the network with dataset
params = nn_one_hidden_model(train_set_X_reshape,train_set_Y_reshape,hidden_nodes = 4, learning_rate = 0.01, num_iteration = 3000)

Cost after iteration 0: 0.697198
Cost after iteration 500: 0.643974
Cost after iteration 1000: 0.643974
Cost after iteration 1500: 0.643974
Cost after iteration 2000: 0.643974
Cost after iteration 2500: 0.643974


**Exercise 8:** Implement the *prediction()* function to use your model to predict the test
examples. Then, comparing the result with the result of Logistic Regression.

In [25]:
def prediction(X_test, parameters):
    A2, cache = forward_propagation(X_test, parameters)
    predictions = A2 > 0.5
    return predictions

predictions = prediction(test_set_X_reshape, params)
print ('Accuracy: %d' % float((np.dot(test_set_Y_reshape,predictions.T) + np.dot(1-test_set_Y_reshape,1-predictions.T))/float(test_set_Y_reshape.size)*100) + '%')

Accuracy: 34%


**Exercise 9:** Change the size of hidden layer and re-run the model (i.e. 5, 10, 20, 50).
Then, comparing the results of model among different number of hidden units.

In [28]:
params = nn_one_hidden_model(train_set_X_reshape,train_set_Y_reshape,hidden_nodes = 100, learning_rate = 0.01, num_iteration = 3000)
predictions = prediction(test_set_X_reshape, params)
print ('Accuracy: %d' % float((np.dot(test_set_Y_reshape,predictions.T) + np.dot(1-test_set_Y_reshape,1-predictions.T))/float(test_set_Y_reshape.size)*100) + '%')

Cost after iteration 0: 0.693367
Cost after iteration 500: 0.626581
Cost after iteration 1000: 0.624817
Cost after iteration 1500: 0.617724
Cost after iteration 2000: 0.612264
Cost after iteration 2500: 0.608308
Accuracy: 34%
