In [1]:
import numpy as np

In [3]:
def sigmoid(Z):
    """
    Compute the sigmoid of Z

    Arguments:
    Z -- A scalar or numpy array of any size.

    Return:
    A -- sigmoid of Z
    backprop_store -- returns Z for backpropagation
    """

    A = 1/(1 + np.exp(-Z))
    backprop_store = Z

    return A, backprop_store

def leaky_relu(Z):
    """
    Compute leaky_ReLU of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of leaky ReLU of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = np.maximum(0.01 * Z, Z)
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store

def relu(Z):
    """
    Compute regular ReLU of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of ReLU of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = np.maximum(0, Z)
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store


def tanh(Z):
    """
    Compute tanh of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of tanh of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = (np.exp(Z)-np.exp(-Z))  /  (np.exp(Z)+np.exp(-Z))
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store


def softmax(Z):
    """
    Compute softmax of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of softmax of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    e_Z = np.exp(Z - np.max(Z))
    A = e_Z / e_Z.sum()
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store

In [72]:
def initialize_parameters(initialize, dimension_of_layers):
    """
    Arguments:
    initialization -- activation used in this layer. 
        Stored as text string: "He", "Xavier", "Yoshua" "random"
    dimensions_of_layers -- array (list) of size in each layer

    Returns:
    parameters -- dictionary containing parameters "W1", "b1", "W2", "b2",...
                W[layer] -- shape (dimension_of_layers[layer], (dimension_of_layers[layer-1])
                b[layer] -- bias vector shape (dimension_of_layers[layer], 1) 
    """
    
    # np.random.seed(1)  # Use when you need to test that the different initializations are giving different numbers
    parameters = {}
    num_layers = len(dimension_of_layers)

    for layer in range(1, num_layers):  # this will loop through first hidden layer to final output layer

        if initialize == "He":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(2. / dimension_of_layers[layer - 1])
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "Yoshua":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(2. / (dimension_of_layers[layer - 1] + dimension_of_layers[layer]))
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "Xavier":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(1. / (dimension_of_layers[layer - 1]))
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "random":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], dimension_of_layers[layer - 1]) * 0.01
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        else:
            print("ERROR: YOU MUST CHOOSE AN INITIALIZATION TYPE: \"He\", \"Yoshua\", \"Xavier\", or \"random\"")

            assert parameters["W" + str(layer)].shape == (dimension_of_layers[layer], dimension_of_layers[layer - 1])
            assert parameters["b" + str(layer)].shape == (dimension_of_layers[layer], 1)

    return parameters

In [13]:
def linear_forward(A_prev, W, b):
    """
    Implement the linear part of a layer's forward propagation.

    Arguments:
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)

    Returns:
    A_post -- the input of the activation function, also called pre-activation parameter 
    backprop_store -- a python dictionary containing "W", "b", and "A_prev" ; stored for computing the backward pass efficiently
    """

    Z = np.dot(W, A_prev) + b
    
    assert(Z.shape == (W.shape[0], A_prev.shape[1]))
    backprop_store = (W, b, A_prev)
    
    return Z, backprop_store

In [76]:
def linear_activation_forward(A_prev, W, b, activation_type):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation_type -- the activation to be used in this layer, stored as a text string: "sigmoid", "relu",
                         "leaky relu", "tanh", "softmax"

    Returns:
    A -- the output of the activation function, also called the post-activation value 
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
             "linear_cache" and "activation_cache" are caching, storing, exactly what's being passed in it's function.
    """
    
    if activation_type == "sigmoid":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation_type == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation_type == "leaky relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = leaky_relu(Z)

    elif activation_type == "tanh":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = tanh(Z)

    elif activation_type == "softmax":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)
    
    else:
        print("ERROR: YOU MUST CHOOSE AN ACTIVATION TYPE: \"sigmoid\", \"relu\", \"leaky relu\", \"tanh\", or \"softmax\"")

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache

In [None]:
# GRADED FUNCTION: L_model_forward

def L_model_forward(X, parameters, dimension_of_layers, activation_type):
    """
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
    
    Arguments:
    X -- data, numpy array of shape (input size, number of examples)
    parameters -- output of initialize_parameters_deep()
    dimension_of_layers -- array (list) of size in each layer
    
    Returns:
    AL -- last post-activation value
    caches -- list of caches containing:
                every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)
    """

    caches = []
    A = X
    L = len(dimension_of_layers)
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for layer in range(1, L-1):    # This will loop through first hidden layer to last hidden layer (before output layer L)
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters["W" + str(l)], parameters["b" + str(l)], "relu")
        caches.append(cache)
        ### END CODE HERE ###
    
    # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
    ### START CODE HERE ### (≈ 2 lines of code)
    AL, cache = linear_activation_forward(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
    caches.append(cache)
    ### END CODE HERE ###
    
    assert(AL.shape == (1,X.shape[1]))
            
    return AL, caches

In [14]:
dimension_of_layers = [3,6,1]

In [58]:
np.random.seed(1)
X = np.random.randn(3,10)
W = np.random.randn(6,3)
b = np.zeros( (6,1) )

In [78]:
linear_activation_forward(X, W, b, "sigmoid")

(array([[ 0.27943278,  0.61155971,  0.46846374,  0.63395108,  0.15876591,
          0.92400914,  0.25846549,  0.82021806,  0.48667068,  0.39575125],
        [ 0.08783176,  0.8682329 ,  0.65732966,  0.76102554,  0.18184536,
          0.93656662,  0.20465886,  0.776344  ,  0.42687888,  0.45334612],
        [ 0.03560665,  0.8909622 ,  0.88194867,  0.87466511,  0.68870824,
          0.76474065,  0.10030055,  0.2873127 ,  0.31190863,  0.78508178],
        [ 0.87009434,  0.25448526,  0.24409583,  0.23711838,  0.40734488,
          0.29112047,  0.80797199,  0.60686931,  0.61453802,  0.3169969 ],
        [ 0.76941097,  0.04873223,  0.47371406,  0.54418778,  0.78885331,
          0.45595422,  0.16774118,  0.27597057,  0.45498753,  0.76846316],
        [ 0.04448311,  0.91687192,  0.89738116,  0.8409777 ,  0.82595416,
          0.45514117,  0.19742643,  0.16137053,  0.31912398,  0.79963234]]),
 ((array([[-0.69166075, -0.39675353, -0.6871727 ],
          [-0.84520564, -0.67124613, -0.0126646 ],
  

In [73]:
parameters = initialize_parameters("e", dimension_of_layers)

ERROR: YOU MUST CHOOSE AN INITIALIZATION TYPE: "He", "Yoshua", "Xavier", or "random"


KeyError: 'W1'

In [16]:
parameters

{'W1': array([[-0.48076113, -0.64897312, -0.72554173],
        [ 0.18495424, -0.32140178, -1.18464144],
        [ 0.4291821 ,  0.78888323, -0.31393061],
        [ 0.34485952,  1.33616299,  1.50199522],
        [ 0.50671875, -1.03611296, -0.7281231 ],
        [-0.04424519, -0.25345354, -0.29557688]]),
 'W2': array([[ 0.94738169, -0.00459634,  1.24610501, -0.11114517,  0.75611052,
          0.34187951]]),
 'b1': array([[ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.]]),
 'b2': array([[ 0.]])}