In [5]:
import numpy as np

Given, <br>
    nx = 2, nh = 4, ny = 1 <br>
So, <br>
    W1.shape == (nh, nx) == (4,2) <br>
    b1.shape == (nh, 1) == (4,1) <br>
    w2.shape == (ny, nh) == (1, 4) <br>
    b1.shape == (ny, 1) == (1, 1)<br>
    

    

### 


## Param initialization

In [1]:
def init_params(nx, nh, ny):
    W1 = np.random.randn(nh, nx)*0.01
    b1 = np.zeros((nh, 1))
    W2 = np.random.randn(ny, nh) * 0.01
    b2 = np.zeros((ny, 1))

    assert(W1.shape == (nh, nx))
    assert(b1.shape == (nh, 1))
    assert(W2.shape == (ny, nh))
    assert(b2.shape == (ny,1))
    params = {'W1':W1, 'b1':b1, 'W2':W2, 'b2':b2}
    return params

## Forward Prop

In [28]:
def sigmoid(Z):
    A = 1 / (1+np.exp(-Z))
    cache = Z
    return A, cache
def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    assert(A.shape() == Z.shape())
    return A, cache
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    assert(Z.shape() == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    return Z, cache
def linear_forward_activation(A_prev, W, b, activation):
    if activation == 'sigmoid':
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    assert(A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)
    return A, cache

## Back Prop

In [27]:
def sigmoid_back(dA, cache):
    Z = cache
    s = 1 / (1+np.exp(-Z))
    dZ = dA * s * (1-s)
    assert(dZ.shape == Z.shape)
    return dZ
def relu_back(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z<=0] = 0
    assert(dZ.shape == Z.shape)
    return dZ
def linear_backward(dZ, cache):
    A_prev, W, b = cache;
    m = A_prev.shape[1];
    dW = (1/m) * np.dot(dZ ,A_prev.T)
    db = 1 / m * np.sum(dZ, axis = 1, keepdims = True) 
    dA_prev = np.dot(W.T, dZ)
    assert(dA_prev.shape == A_prev.shape)
    assert(dW.shape == W.shape)
    assert(db.shape == b.shape)
    return dA_prev, dW, db
def linear_backward_activation(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == 'sigmoid':
        Z = sigmoid_back(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db

## Cost function

In [2]:
def compute_cost(AL, Y):
    m = Y.shape[1];
    cost = -1 / m * (np.dot(Y, np.log(AL).T) + np.dot(1 - Y, np.log(1 - AL).T))
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    return cost

## Update Params

In [3]:
def update_params(params, grads, lr):
    L = len(params) // 2
    for l in range(L):
        params['W'+str(l+1)] -= lr*grads['dW'+str(l+1)]
        params['b'+str(l+1)] -= lr*grads['db'+str(l+1)]
    return params

# Model

In [4]:
def model(X, Y, layer_dims, num_iter=500, lr=0.01):
    grads={}
    cost=[]
    m = X.shape[1]
    (nx, nh, ny) = layer_dims
    params = init_params(nx, nh, ny)
    W1, b1, W2, b2 = params['W1'], params['b1'], params['W2'], params['b2']
    for i in range(0, num_iter):
        A1, cache1 = linear_forward_activation(X, W1, b1, activation='relu')
        A2, cache2 = linear_forward_activation(A1, W2, b2, activation = 'sigmoid')
        cost = compute_cost(A2, Y)
        dA2 = - (np.divide(Y, A2) - np.divide(1-Y, 1-A2))
        dA1, dW2, db2 = linear_backward_activation(dA2, cache2, activation='sigmoid')
        dA0, dW1, db1 = linear_backward_activation(dA1, cache1, activation='relu')
        grads['dW1'], grads['db1'], grads['dW2'], grads['db2'] = dW1, db1, dW2, db2
        params = update_params(params, grads, lr)
        W1, b1, W2, b2 = params['W1'], params['b1'], params['W2'], params['b2']
        print(cost)
    return params