In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases import *
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
from public_tests import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [2]:
from lr_utils import load_dataset
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

In [3]:
train_set_x_orig.shape

(209, 64, 64, 3)

In [4]:
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = test_set_x_orig[:,1].shape[1]

In [5]:
train_set_x_flatten = train_set_x_orig.reshape(m_train, -1).T
test_set_x_flatten = test_set_x_orig.reshape(m_test, -1).T

In [6]:
train_set_x = train_set_x_flatten / 255.
test_set_x = test_set_x_flatten / 255.

In [46]:
X = train_set_x
Y = train_set_y

<a name='2'></a>
## 2 - Outline

To build your neural network, you'll be implementing several "helper functions." These helper functions will be used in the next assignment to build a two-layer neural network and an L-layer neural network. 

Each small helper function will have detailed instructions to walk you through the necessary steps. Here's an outline of the steps in this assignment:

- Initialize the parameters for a two-layer network and for an $L$-layer neural network
- Implement the forward propagation module (shown in purple in the figure below)
     - Complete the LINEAR part of a layer's forward propagation step (resulting in $Z^{[l]}$).
     - The ACTIVATION function is provided for you (relu/sigmoid)
     - Combine the previous two steps into a new [LINEAR->ACTIVATION] forward function.
     - Stack the [LINEAR->RELU] forward function L-1 time (for layers 1 through L-1) and add a [LINEAR->SIGMOID] at the end (for the final layer $L$). This gives you a new L_model_forward function.
- Compute the loss
- Implement the backward propagation module (denoted in red in the figure below)
    - Complete the LINEAR part of a layer's backward propagation step
    - The gradient of the ACTIVATE function is provided for you(relu_backward/sigmoid_backward) 
    - Combine the previous two steps into a new [LINEAR->ACTIVATION] backward function
    - Stack [LINEAR->RELU] backward L-1 times and add [LINEAR->SIGMOID] backward in a new L_model_backward function
- Finally, update the parameters

<img src="images/final outline.png" style="width:800px;height:500px;">
<caption><center><b>Figure 1</b></center></caption><br>


**Note**:

For every forward function, there is a corresponding backward function. This is why at every step of your forward module you will be storing some values in a cache. These cached values are useful for computing gradients. 

In the backpropagation module, you can then use the cache to calculate the gradients. Don't worry, this assignment will show you exactly how to carry out each of these steps! 

In [47]:
def relu(Z:np.array):
    A = np.maximum(0, Z)
    assert A.shape == Z.shape
    return(A, Z)

In [48]:
def sigmoid(Z:np.array):
    denom = 1+np.exp(-(Z))
    return(np.divide(1,denom), Z)

In [49]:
def linear_proj(X, W, b):
    return(np.dot(W, X) + b)

In [50]:
n_x = train_set_x.shape[0]
n_h = 1000
n_y = 1

layer_dims = [n_x, n_h, 50, n_y]
def initialize(layer_dims):
    # need to initialize all the weights and biases that you are going to fit in the model.
    # this means you are probably going to have to set those ahead of time, since the structure of the model cannot change once it is created.
    
    L = len(layer_dims)
    params = {}
    for l in range(1,L): # skip l = 0, input
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        params['b'+str(l)] = np.random.randn(layer_dims[l],1)
        
        # should be clear, but check anyways
        assert(params['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
        assert(params['b' + str(l)].shape == (layer_dims[l], 1))
    return(params)

In [51]:
def linear_forward(A,W,b):
    # needs to calculate the linear transform given the weights and biases put into it
    # calculate actvation in first layer
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return(Z, cache)

In [52]:
def activation(Z, t:str):
    if t == 'relu':
        A, act_cache = relu(Z)
    if t == 'sigmoid':
        A, act_cache = sigmoid(Z)
    cache = (Z, act_cache)
    return(A, Z)

In [53]:
def linear_forward_activation(A_prev, W, b, t:str):
    Z, cache = linear_forward(A_prev, W, b)
    act_cache = []
    A = None
    if t == 'relu':
        A, act_cache = relu(Z)
    elif t == 'sigmoid':
        A, act_cache = sigmoid(Z)
    return(A,act_cache)

In [54]:
def propagation(X, layer_dims):
    # initialize weights and biases
    params = initialize(layer_dims) # get the weights and biases
    
    # for each layer calculate weights and biases by calling linear_forward_activation
    L = len(layer_dims)
    act_caches = []
    A_prev = X
    for i in range(1, L-1): # the last layer is a sigmoid
        A_prev, cache = linear_forward_activation(A_prev, params['W'+str(i)], params['b'+str(i)], 'relu')
        
    # once you hit the last year, you need to output a set of sigmoids to squash the signal into 0,1 for a binary problem.
    return(linear_forward_activation(A_prev, params['W'+str(L-1)], params['b'+str(L-1)], 'sigmoid'))

Compute the cross-entropy cost $J$, using the following formula: $$-\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right)) \tag{7}$$

In [55]:
Aout = propagation(X, layer_dims)

In [78]:
# GRADED FUNCTION: compute_cost
def compute_cost(AL, Y):
    """
    Implement the cost function defined by equation (7).

    Arguments:
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
    Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    """
    
    m = Y.shape[1]
    cost = -(1/m)*np.sum((Y*np.log(AL)+(1-Y)*np.log(1-AL)))
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).

    return cost

In [79]:
compute_cost(Aout[0], Y)

1.407241403857709

### forward code is complete, but now we need to go backwards

### Driver code

In [127]:
out = initialize(layer_dims)

In [129]:
linear_forward(X, out['W1'], out['b1']);

In [152]:
Aout = propagation(X, layer_dims)

In [149]:
compute_cost(Aout[0], np.array([1]))

IndexError: tuple index out of range