In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

# 加载自定义工具函数
from testCases import *
from dnn_utils import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.random.seed(1)

In [3]:
def initialize_parameters_deep(layer_dims):
#     np.random.seed(1)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        parameters['W'+ str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1])
        parameters['b'+ str(l)] = np.zeros((layer_dims[l],1))
        
        assert(parameters['W'+ str(l)].shape == (layer_dims[l], layer_dims[l-1]))
    return parameters;

In [4]:
parameters = initialize_parameters_deep([5,4,3])
print("W1 = ",parameters['W1'],"\nb1 = ",parameters['b1'],"\nW2 = ",parameters['W2'],"\nb2 = ",parameters['b2'])

W1 =  [[ 1.62434536 -0.61175641 -0.52817175 -1.07296862  0.86540763]
 [-2.3015387   1.74481176 -0.7612069   0.3190391  -0.24937038]
 [ 1.46210794 -2.06014071 -0.3224172  -0.38405435  1.13376944]
 [-1.09989127 -0.17242821 -0.87785842  0.04221375  0.58281521]] 
b1 =  [[0.]
 [0.]
 [0.]
 [0.]] 
W2 =  [[-1.10061918  1.14472371  0.90159072  0.50249434]
 [ 0.90085595 -0.68372786 -0.12289023 -0.93576943]
 [-0.26788808  0.53035547 -0.69166075 -0.39675353]] 
b2 =  [[0.]
 [0.]
 [0.]]


In [5]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) +b
#     print("W.shape[0] = ", W.shape[0], "; A.shape[1] = ", A.shape[1])
#     print("W = ", W,"\nA = ", A)
    assert(Z.shape == (W.shape[0], A.shape[1]))
    
    return Z,(A,W,b)

In [6]:
A,W,b = linear_forward_test_case()

Z, linear_cache = linear_forward(A,W,b)
print("Z = ", Z)

Z =  [[ 3.26295337 -1.23429987]]


In [7]:
def linear_activation_forward(A_prev, W, b, activation):
    
    Z, linear_cache = linear_forward(A_prev, W, b)
    
    if activation == 'sigmoid':
        A = sigmoid(Z)
    elif activation == 'relu':
        A = relu(Z)
    
    assert(A.shape == (W.shape[0], A_prev.shape[1]))
    
    return A, (linear_cache, Z)

In [8]:
A_prev, W, b = linear_activation_forward_test_case()
A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = 'sigmoid')
print("With sigmoid: A = ", A)

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = 'relu')
print("With Relu: A = ", A)

With sigmoid: A =  [[0.96890023 0.11013289]]
With Relu: A =  [[3.43896131 0.        ]]


In [9]:
def L_model_forward(X, parameters):
    
    caches = []
    A = X
    
    L = len(parameters) // 2
    
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation= 'relu')
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b'+ str(L)], activation = 'sigmoid')
    caches.append(cache)
    
    assert(AL.shape == (1, X.shape[1]))
    
    return AL, caches

In [10]:
X, parameters = L_model_forward_test_case()
AL, caches = L_model_forward(X, parameters)
print("AL = ", AL)
print("Length of caches list = ", len(caches))

AL =  [[0.17007265 0.2524272 ]]
Length of caches list =  2


In [11]:
def compute_cost(AL, Y):
    
    m = Y.shape[1]
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1-Y, np.log(1 - AL)))
    
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    return cost

In [12]:
Y, AL = compute_cost_test_case()

print("cost = ", compute_cost(AL, Y))

cost =  0.41493159961539694


In [13]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = np.dot(dZ, cache[0].T) / m
    db = np.sum(dZ, axis = 1, keepdims = True) / m
    dA_prev = np.dot(cache[1].T, dZ)
    
    assert(dA_prev.shape == A_prev.shape)
    assert(dW.shape == W.shape)
    assert(db.shape == b.shape)
    return dA_prev, dW, db

In [14]:
dZ, linear_cache = linear_backward_test_case()

dA_prev, dW, db = linear_backward(dZ, linear_cache)
print("dA_prev = ",dA_prev,"\ndW = ",dW,"\ndb = ",db)

dA_prev =  [[ 0.51822968 -0.19517421]
 [-0.40506361  0.15255393]
 [ 2.37496825 -0.89445391]] 
dW =  [[-0.10076895  1.40685096  1.64992505]] 
db =  [[0.50629448]]
