In [4]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v4 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [5]:
plt.rcParams['figure.figsize']=(5.0, 4.0) 
plt.rcParams['image.interpolation']='nearest'
plt.rcParams['image.cmap']='gray'

%load_ext autoreload
%autoreload 2
np.random.seed(1)

## Initialization 

In [16]:
def initilize_parameters(n_x,n_h,n_y):
    """
    number of units in:
    n_x: input layer
    n_h: hidden layer
    n_y: output layer
    Returns: 
    dictionary of parameters: W1, b1, W2, b2
    """
    
    W1=np.random.randn(n_h, n_x)*0.01
    b1=np.zeros((n_h,1))
    W2=np.random.randn(n_y, n_h)*0.01
    b2=np.zeros((n_y, 1))
    
    assert(W1.shape==(n_h, n_x))
    assert(b1.shape==(n_h,1))
    assert(W2.shape==(n_y, n_h))
    assert(b2.shape==(n_y,1))
    
    parameters={'W1':W1, 'b1':b1,'W2':W2, 'b2':b2}
    
    return parameters

In [20]:
# test: 
parameters=initilize_parameters(5,3,1)
print('Parameter W1:\n{0}\nParameter b1:\n{1}\nParameter W2:\n{2}\nParameter b2\n:{3}'.format(parameters['W1'], parameters['b1'], 
                                                  parameters['W2'], parameters['b2']))


Parameter W1:
[[ 0.00698032 -0.00447129  0.01224508  0.00403492  0.00593579]
 [-0.01094912  0.00169382  0.00740556 -0.00953701 -0.00266219]
 [ 0.00032615 -0.01373117  0.00315159  0.00846161 -0.00859516]]
Parameter b1:
[[0.]
 [0.]
 [0.]]
Parameter W2:
[[ 0.00350546 -0.01312283 -0.00038696]]
Parameter b2
:[[0.]]


#### for other layers

In [26]:
def init_param_deep(layer_dims):
    '''
    input: size of each layer
    output: dictionary of parameters for each layer
    '''
    
    parameters={}
    L=len(layer_dims)
    
    for i in range(1,L):
        parameters['W{}'.format(i)]=np.random.randn(layer_dims[i], layer_dims[i-1])*0.01
        parameters['b{}'.format(i)]=np.zeros((layer_dims[i], 1))
        
        assert(parameters['W{}'.format(i)].shape==(layer_dims[i], layer_dims[i-1]))
        assert(parameters['b{}'.format(i)].shape==(layer_dims[i],1))
        
    return parameters       


In [27]:
#test
parameters=init_param_deep([5,4,1])
print('Parameter W1:\n{0}\nParameter b1:\n{1}\nParameter W2:\n{2}\nParameter b2\n:{3}'.format(parameters['W1'], parameters['b1'], 
                                                  parameters['W2'], parameters['b2']))

Parameter W1:
[[-0.00997027  0.00248799 -0.00296641  0.00495211 -0.00174703]
 [ 0.00986335  0.00213534  0.021907   -0.01896361 -0.00646917]
 [ 0.00901487  0.02528326 -0.00248635  0.00043669 -0.00226314]
 [ 0.01331457 -0.00287308  0.0068007  -0.00319802 -0.01272559]]
Parameter b1:
[[0.]
 [0.]
 [0.]
 [0.]]
Parameter W2:
[[ 0.00313548  0.00503185  0.01293226 -0.00110447]]
Parameter b2
:[[0.]]


## Forward propagation 

In [31]:
def linear_forward (A,W,b):
    ''' input: 
       A - activation from previous layer, shape: (size of prev. layer, number of examples)
       W - weights matrix, shape: (size of current layer, size of previous layer)
       b - bias vector, shape: (size of current layer, 1)
       returns:
       Z - pre-activation parameter
       cache - dictionary, stores A, W, b for backpropagation  
   '''
    Z=np.dot(W,A)+b
    
    assert(Z.shape==(W.shape[0], A.shape[1]))
    cache=(A,W,b)
    
    return Z, cache

In [32]:
#test
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1) 

a,b=linear_forward(A,W,b)
print('parameter Z:\n{0}\ncache: {1}'.format(a,b))

parameter Z:
[[0.80822865 2.69075905]]
cache: (array([[-0.61736206,  0.5627611 ],
       [ 0.24073709,  0.28066508],
       [-0.0731127 ,  1.16033857]]), array([[0.36949272, 1.90465871, 1.1110567 ]]), array([[0.6590498]]))


#### activation functions 

In [59]:
def sigmoid (Z):
    '''
    inputs: Z - activation function
    returns: 
    A - matrix, shape same as Z
    cache - Z (for backpropagation)
    '''
    A=1/(1+np.exp(-Z))
    cache = Z
    assert (A.shape==Z.shape)
    return A, cache


def relu (Z):
    A = np.maximum(0,Z)
    cache=Z
    assert(A.shape==Z.shape)
    return A, cache


#### activation 

In [77]:
def linear_activation_forward(A_prev, W,b,activation):
    '''
    inputs: 
    A_prev: activation from previous layer, shape: (size of prev. layer, number of examples)
    W - weights, shape: (size of current layer, size of previous layer)
    b - bias vector, shape: (size of current layer, 1)
    activation: string 'sigmoid' or 'relu'
    returns: 
    A - activation function matrix,
    cache - dictionary, stores 'linear_cache' and 'activation_cache' for backpropagation
    '''
    
    Z,linear_cache=linear_forward(A_prev, W, b)
    
    if activation=='sigmoid': 
        A, activation_cache = sigmoid(Z)
        
    elif activation=='relu':
        A, activation_cache=relu (Z)
        
    assert(A.shape==(W.shape[0], A_prev.shape[1]))
    cache=(linear_cache, activation_cache)
    
    return A, cache
        
    

In [78]:
#test
a = np.random.randn(3,2)
b = np.random.randn(1,3)
c = np.random.randn(1,1) 

A, linear_activation_cache=linear_activation_forward(a,b,c, activation='sigmoid')
print ('With sigmoid A={0}'.format(A))
A,linear_activation_cache=linear_activation_forward(a,b,c,activation='relu')
print ('With ReLu A={0}'.format(A))


With sigmoid A=[[0.58224635 0.73673706]]
With ReLu A=[[0.33200174 1.02907776]]


#### model 

In [80]:
def L_model_forward(X, parameters):
    '''
    input: 
    X - data, shape(input size, number of examples)
    parameters: from init_param_deep()
    return:
    AL - last activation function
    caches - list of caches (from linear_activation_forward())
    '''
    
    caches=[]
    A=X
    L=len(parameters)//2
    
    #relu first
    for i in range (1,L):
        A_prev=A
        A,cache=linear_activation_forward(A_prev, parameters['W{}'.format(i)],
                                          parameters['b{}'.format(i)], 
                                          activation='relu')
        caches.append(cache)
        
    AL, cache=linear_activation_forward(A, parameters['W{}'.format(i)],
                                          parameters['b{}'.format(i)], 
                                          activation='sigmoid')
    caches.append(cache)
    assert(AL.shape==(1,X.shape[1]))
        
    return AL, caches

In [81]:
# test
def L_model_forward_test_case_2hidden():
    np.random.seed(6)
    X = np.random.randn(5,4)
    W1 = np.random.randn(4,5)
    b1 = np.random.randn(4,1)
    W2 = np.random.randn(3,4)
    b2 = np.random.randn(3,1)
    W3 = np.random.randn(1,3)
    b3 = np.random.randn(1,1)
  
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3} 
    return X, parameters

X, parameters = L_model_forward_test_case_2hidden()
AL, caches = L_model_forward(X, parameters)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))

ValueError: shapes (3,4) and (3,4) not aligned: 4 (dim 1) != 3 (dim 0)