In [7]:
import numpy as np

In [5]:
dimension_of_layers = [3,6,1]

In [46]:
A_prev = np.array([ [2,2,2], [.2,.2,.2], [-.2, -.2, -.2], [-10, -10, -10] ])
W = np.ones( (2,4) )
b = np.zeros( (2,1) )

In [None]:
parameters = initialize_parameters("He", dimension_of_layers)

In [None]:
A, cache = linear_activation_forward(A_prev, W, b, "sigmoid")
A

In [3]:
def sigmoid(Z):
    """
    Compute the sigmoid of Z

    Arguments:
    Z -- A scalar or numpy array of any size.

    Return:
    A -- sigmoid of Z
    backprop_store -- returns Z for backpropagation
    """

    A = 1/(1 + np.exp(-Z))
    backprop_store = Z

    return A, backprop_store

def leaky_relu(Z):
    """
    Compute leaky_ReLU of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of leaky ReLU of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = np.maximum(0.01 * Z, Z)
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store

def relu(Z):
    """
    Compute regular ReLU of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of ReLU of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = np.maximum(0, Z)
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store


def tanh(Z):
    """
    Compute tanh of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of tanh of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    A = (np.exp(Z)-np.exp(-Z))  /  (np.exp(Z)+np.exp(-Z))
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store


def softmax(Z):
    """
    Compute softmax of Z

    arguments:
    Z -- A scalar of numpy array of any size

    return:
    A -- post-activation of softmax of Z, same shape as Z
    backprop_store -- returns Z for backpropagation
    """

    e_Z = np.exp(Z - np.max(Z))
    A = e_Z / e_Z.sum()
    assert(A.shape == Z.shape)

    backprop_store = Z

    return A, backprop_store

In [8]:
def initialize_parameters(initialize, dimension_of_layers):
    """
    Arguments:
    initialization -- activation used in this layer. 
        Stored as text string: "He", "Xavier", "Yoshua" "random"
    dimensions_of_layers -- array (list) of size in each layer

    Returns:
    parameters -- dictionary containing parameters "W1", "b1", "W2", "b2",...
                W[layer] -- shape (dimension_of_layers[layer], (dimension_of_layers[layer-1])
                b[layer] -- bias vector shape (dimension_of_layers[layer], 1) 
    """
    
    # np.random.seed(1)  # Use when you need to test that the different initializations are giving different numbers
    parameters = {}
    num_layers = len(dimension_of_layers)

    for layer in range(1, num_layers):  # this will loop through first hidden layer to final output layer

        if initialize == "He":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(2. / dimension_of_layers[layer - 1])
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "Yoshua":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(2. / (dimension_of_layers[layer - 1] + dimension_of_layers[layer]))
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "Xavier":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], 
                dimension_of_layers[layer - 1]) * np.sqrt(1. / (dimension_of_layers[layer - 1]))
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        elif initialize == "random":
            parameters["W" + str(layer)] = np.random.randn(dimension_of_layers[layer], dimension_of_layers[layer - 1]) * 0.01
            parameters["b" + str(layer)] = np.zeros( (dimension_of_layers[layer], 1) )

        else:
            print("ERROR: YOU MUST CHOOSE AN INITIALIZATION TYPE")

            assert parameters["weights" + str(layer)].shape == (dimension_of_layers[layer], dimension_of_layers[layer - 1])
            assert parameters["bias" + str(layer)].shape == (dimension_of_layers[layer], 1)

    return parameters

In [5]:
def linear_forward(A_prev, W, b):
    """
    Implement the linear part of a layer's forward propagation.

    Arguments:
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)

    Returns:
    A_post -- the input of the activation function, also called pre-activation parameter 
    backprop_store -- a python dictionary containing "W", "b", and "A_prev" ; stored for computing the backward pass efficiently
    """

    Z = np.dot(W, A_prev) + b
    
    assert(Z.shape == (W.shape[0], A_prev.shape[1]))
    backprop_store = (W, b, A_prev)
    
    return Z, backprop_store

In [70]:
def linear_activation_forward(A_prev, W, b, activation_type):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation_type -- the activation to be used in this layer, stored as a text string: "sigmoid", "relu",
                         "leaky relu", "tanh", "softmax"

    Returns:
    A -- the output of the activation function, also called the post-activation value 
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
             "linear_cache" and "activation_cache" are caching, storing, exactly what's being passed in it's function.
    """
    
    if activation_type == "sigmoid":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation_type == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation_type == "leaky relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = leaky_relu(Z)

    elif activation_type == "tanh":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = tanh(Z)

    elif activation_type == "softmax":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)
    
    else:
        print("ERROR: YOU MUST CHOOSE AN ACTIVATION TYPE")

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache

In [9]:
parameters = initialize_parameters("Xavier", dimension_of_layers)

In [14]:
len(parameters)//2

2

In [74]:
A, cache = linear_activation_forward(A_prev, W, b, "leaky relu")

In [69]:
parameters["W1"]

array([[ 0.3196312 ,  0.99876919,  0.15412092],
       [ 0.82931795, -1.22734271, -0.05869624],
       [-0.49346818,  0.41789699, -0.68197933],
       [ 0.29509439,  0.01623048,  0.15418425],
       [ 0.25720141,  0.22962094, -0.15783222],
       [ 0.88814178, -0.00657929,  0.52418399]])

In [3]:
#input_layer, hidden, hidden, output
#0,1,2,3
for i in range(4):
    print(i)

0
1
2
3


In [None]:
# GRADED FUNCTION: L_model_forward

def L_model_forward(X, parameters, dimension_of_layers):
    """
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
    
    Arguments:
    X -- data, numpy array of shape (input size, number of examples)
    parameters -- output of initialize_parameters_deep()
    
    Returns:
    AL -- last post-activation value
    caches -- list of caches containing:
                every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)
    """

    caches = []
    A = X
    L = len(parameters) // 2        # number of layers in the neural network EXCLUDING THE INPUT LAYER
    ##num_layers = len(dimension_of_layers) maybe have this instead
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for l in range(1, L):        ### this really prints 1, L-1.  Look at the cell above!!!!!
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters["W" + str(l)], parameters["b" + str(l)], "relu") # updating A each iteration
        caches.append(cache)
    
    # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
    AL, cache = linear_activation_forward(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (1,X.shape[1]))
            
    return AL, caches