In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import testCases
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
import lr_utils

np.random.seed(1)

In [4]:
#初始化参数(2 layers nn)

def initialize_parameters(n_x, n_h, n_y):
    
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    
    return parameters

#测试
parameters = initialize_parameters(3,2,1)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.01744812 -0.00761207  0.00319039]
 [-0.0024937   0.01462108 -0.02060141]]
b1 = [[0.]
 [0.]]
W2 = [[-0.00322417 -0.00384054]]
b2 = [[0.]]


In [5]:
#初始化参数 （deep nn）

def initialize_parameters_deep(layers_dims):
    """
    parameters:
        layers_dims: 包含网络中每个图层的节点数量的列表
    return：
        parameters: 包含"W1","b1","W2","b2"..的字典
                    Wl -  (layers_dims[l],layers_dims[l-1])
                    bl -  (layers_dims[l], 1)
    """
    
    np.random.seed(3)
    parameters = {}
    L = len(layers_dims)
    
    for l in range(1, L):
        parameters["W" + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1])
        parameters["b" + str(l)] = np.random.randn(layers_dims[l], 1)
    
    return parameters

#测试
layers_dims = [5,4,3]
parameters = initialize_parameters_deep(layers_dims)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 1.78862847  0.43650985  0.09649747 -1.8634927  -0.2773882 ]
 [-0.35475898 -0.08274148 -0.62700068 -0.04381817 -0.47721803]
 [-1.31386475  0.88462238  0.88131804  1.70957306  0.05003364]
 [-0.40467741 -0.54535995 -1.54647732  0.98236743 -1.10106763]]
b1 = [[-1.18504653]
 [-0.2056499 ]
 [ 1.48614836]
 [ 0.23671627]]
W2 = [[-1.02378514 -0.7129932   0.62524497 -0.16051336]
 [-0.76883635 -0.23003072  0.74505627  1.97611078]
 [-1.24412333 -0.62641691 -0.80376609 -2.41908317]]
b2 = [[-0.92379202]
 [-1.02387576]
 [ 1.12397796]]


In [9]:
# 前向传播 (2 layers nn)

def linear_forward(A,W,b):
    
    Z = np.dot(W,A) + b
    
    cache = (A,W,b)
    
    return Z, cache

def linear_activation_forward(A_prev,W,b,activation):
    """
    parameters: 
        A_prev: 来自上一层的激活
        W：     偏重矩阵
        b:      偏向量
        activation: 本层使用的激活函数名
    return：
        A： 激活函数的输出
        cache：一个包含“linear_cache”和“activation_cache”的字典
    """
    
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = relu(Z)
    
    cache = (linear_cache,activation_cache)
    
    return A, cache

#测试
A_prev, W,b = testCases.linear_activation_forward_test_case()

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "sigmoid")
print("sigmoid，A = " + str(A))

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "relu")
print("ReLU，A = " + str(A))


sigmoid，A = [[0.03109977 0.88986711]]
ReLU，A = [[3.43896131 0.        ]]


In [13]:
# 前向传播 （deep layers nn）
def L_model_forward(X, parameters):
    """
    parameter：
        X - 数据，numpy数组，维度为（输入节点数量，示例数）
        parameters - initialize_parameters_deep（）的输出
    
    返回：
        AL - 最后的激活值
        caches - 包含以下内容的缓存列表：
                 linear_relu_forward（）的每个cache（有L-1个，索引为从0到L-2）
                 linear_sigmoid_forward（）的cache（只有一个，索引为L-1）
    

    """
    
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev,parameters["W"+str(l)],parameters["b"+str(l)],"relu")
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, parameters["W"+str(L)],parameters["b"+str(l)],"sigmoid")
    caches.append(cache)
    
    return AL,caches
#测试
X,parameters = testCases.L_model_forward_test_case()
AL,caches = L_model_forward(X,parameters)
print("AL = " + str(AL))
print("caches 的长度为 = " + str(len(caches)))

AL = [[0.92843149 0.8872984 ]
 [0.57871451 0.4546487 ]
 [0.63659865 0.51530219]]
caches 的长度为 = 2


In [14]:

# 计算成本

def compute_cost(AL, Y):
    
    m = Y.shape[1]
    cost = -np.sum(np.multiply(np.log(AL),Y) + np.multiply(np.log(1 - AL), 1 - Y)) / m
    
    cost = np.squeeze(cost)
    
    return cost

#测试
Y,AL = testCases.compute_cost_test_case()
print("cost = " + str(compute_cost(AL, Y)))

cost = 0.414931599615397


In [None]:
# 反向传播

def linear_backward(dZ,cache):
    
    A_prev,W,b = cache
    m = A_pre.shape[1]
    dW = np.dot(dZ, A_pre.T) / m
    db = np.sum(dZ,axis=1,keepdims=True) / m
    dA_pre = np.dot(W.T, dZ)
    
    return dA_prev,dW,db


    