In [4]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import testCases #参见资料包，或者在文章底部copy
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward #参见资料包
import lr_utils #参见资料包，或者在文章底部copy

In [5]:
def initialize_parameters(n_x,n_h,n_y):
    W1=np.random.randn(n_h,n_x)*0.01
    b1=np.zeros((n_h,1))
    W2=np.random.randn(n_y,n_h)*0.01
    b2=np.zeros((n_y,1))
    parameters={"W1":W1,
                "b1":b1,
                "W2":W2,
                "b2":b2}
    return parameters

def initialize_parameters_deep(layers_dims):
    parameters={}
    L=len(layers_dims)
    for l in range(1,L):
        parameters["W"+str(l)]=np.random.randn(layers_dims[l],layers_dims[l-1])/ np.sqrt(layers_dims[l - 1])
        parameters["b"+str(l)]=np.zeros((layers_dims[l],1))
    return parameters
def linear_forward(A,W,b):
    Z=np.dot(W,A)+b
    cache=(A,W,b)
    return Z,cache
def linear_activation_forward(A_prev,W,b,activation):
    if activation=="sigmoid":
        Z,linear_cache=linear_forward(A_prev,W,b)
        A,activation_cache=sigmoid(Z)
    if activation=="relu":
        Z,linear_cache=linear_forward(A_prev,W,b)
        A,activation_cache=relu(Z)
    cache=(linear_cache,activation_cache)
    return A,cache
def L_model_forward(X,parameters):
    A=X
    L=len(parameters)//2
    caches=[]
    for l in range(1,L):
        A_prev=A
        A,cache=linear_activation_forward(A_prev,parameters["W"+str(l)],parameters["b"+str(l)],"relu")
        caches.append(cache)
    AL,cache=linear_activation_forward(A,parameters["W"+str(L)],parameters["b"+str(L)],"sigmoid")
    caches.append(cache)
    return AL,caches
def compute_cost(AL,Y):
    m=Y.shape[1]
    cost=-np.sum(np.multiply(np.log(AL),Y)+np.multiply(np.log(1-AL),1-Y))/m;
    cost=np.squeeze(cost)
    return cost
def linear_backward(dZ,cache):
    A_prev,W,b=cache
    m=A_prev.shape[1]
    dW=np.dot(dZ,A_prev.T)/m
    db=np.sum(dZ,axis=1,keepdims=True)/m
    dA_prev=np.dot(W.T,dZ)
    return dA_prev, dW, db
def linear_activation_backward(dA,cache,activation="relu"):
    linear_cache,activation_cache=cache
    if activation=="sigmoid":
        dZ=sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
    if activation=="relu":
        dZ=relu_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
    return dA_prev,dW,db
def L_model_backward(AL,Y,caches):
    grads={}
    L=len(caches)
    cache=caches[L-1]
    AL.reshape(Y.shape)
    dAL=-(np.divide(Y,AL)-np.divide(1-Y,1-AL))
    grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL,cache,"sigmoid")
    for l in reversed(range(L-1)):
        cache=caches[l]
        dA_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+2)],cache,"relu")
        grads["dA"+str(l+1)],grads["dW"+str(l+1)],grads["db"+str(l+1)]=dA_prev_temp,dW_temp,db_temp
    return grads
def update_parameters(parameters, grads, learning_rate):
    L=len(parameters)//2
    for l in range(L):
        parameters["W"+str(l+1)]=parameters["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
        parameters["b"+str(l+1)]=parameters["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
    return parameters
def two_layer_model(X,Y,layers_dims,learning_rate=0.0075,num_iterations=3000,print_cost=False,isPlot=True):
    (n_x,n_h,n_y)=layers_dims
    costs=[]
    grads = {}
    parameters=initialize_parameters(n_x,n_h,n_y)
    for i in range(num_iterations):
        W1=parameters["W1"]
        b1=parameters["b1"]
        W2=parameters["W2"]
        b2=parameters["b2"]
        A1,cache1=linear_activation_forward(X,W1,b1,"relu")
        A2,cache2=linear_activation_forward(A1,W2,b2,"sigmoid")
        cost=compute_cost(A2,Y)
        Y = Y.reshape(A2.shape)
        dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
        dA1,dW2,db2=linear_activation_backward(dA2,cache2,"sigmoid")
        dA0,dW1,db1=linear_activation_backward(dA1,cache1,"relu")
        grads={"dW2":dW2,
                    "dW1":dW1,
                    "db2":db2,
                    "db1":db1}
        parameters=update_parameters(parameters,grads,learning_rate)
        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print("第", i ,"次迭代，成本值为：" ,np.squeeze(cost))
    return parameters
def predict(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 # 神经网络的层数
    p = np.zeros((1,m))
    probas, caches = L_model_forward(X, parameters)
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0

    print("准确度为: "  + str(float(np.sum((p == y))/m)))

    return p
def L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False,isPlot=True):
    parameters=initialize_parameters_deep(layers_dims)
    costs=[]
    for i in range(num_iterations):
        AL,caches =L_model_forward(X,parameters)
        cost=compute_cost(AL,Y)
        grads=L_model_backward(AL,Y,caches)
        parameters=update_parameters(parameters,grads,learning_rate)
        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print("第", i ,"次迭代，成本值为：" ,np.squeeze(cost))
    return parameters

In [None]:
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T 
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x,n_h,n_y)

parameters = two_layer_model(train_x, train_set_y, layers_dims = (n_x, n_h, n_y), num_iterations = 2500, print_cost=True,isPlot=True)
print(predict(test_x,test_y,parameters))

In [6]:
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T 
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
layers_dims = [12288, 20, 7, 5, 1] #  5-layer model
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True,isPlot=True)


第 0 次迭代，成本值为： 0.6931472742228205
第 100 次迭代，成本值为： 0.6780103488766552
第 200 次迭代，成本值为： 0.6675993040306714
第 300 次迭代，成本值为： 0.6604214282401469
第 400 次迭代，成本值为： 0.6554574039916994
第 500 次迭代，成本值为： 0.6520130925489744
第 600 次迭代，成本值为： 0.6496154961263259
第 700 次迭代，成本值为： 0.6479414595763608
第 800 次迭代，成本值为： 0.6467693997424374
第 900 次迭代，成本值为： 0.6459467841145865
第 1000 次迭代，成本值为： 0.6453681781174686
第 1100 次迭代，成本值为： 0.6449604393320985
第 1200 次迭代，成本值为： 0.6446726427574252
第 1300 次迭代，成本值为： 0.6444692214156432


KeyboardInterrupt: 

In [100]:
print(predict(test_x,test_y,parameters))

准确度为: 0.74
[[1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1.
  1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1.
  1. 0.]]


In [7]:
train_x.shape

(12288, 209)