In [16]:
import numpy as np

In [26]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A,cache

def Relu(Z):
    A = np.maximum(0,Z)
    cache = Z
    
    assert(A.shape == Z.shape)
    return A,cache

def sigmoid_backward(dA,cache):
    Z = cache
    s = 1/1+(np.exp(-Z))
    dZ = dA*s*(1-s)
    
    assert (dZ.shape == Z.shape)
    return dZ

def Relu_backward(dA,cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    
    dZ[Z<=0]=0
    assert (dZ.shape == Z.shape)
    return dZ

In [27]:
def initialize_params(layers_dims):
    L = len(layers_dims)
    params = {}
    for l in range(1,L):
        params["W"+str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])/np.sqrt(layers_dims[l-1])
        params["b"+str(l)] = np.zeros((layers_dims[l],1))
        
        assert(params["W" + str(l)].shape == (layers_dims[l], layers_dims[l-1]))
        assert(params["b" + str(l)].shape == (layers_dims[l], 1))
    return params

def compute_linear_forward(A,W,b):
    Z = np.dot(W,A) + b
    assert(Z.shape == (W.shape[0],A.shape[1]))
    cache = (A,W,b)
    return Z,cache

def compute_forward(A_pre,W,b,activation):
    if activation == 'sigmoid':
        Z, linear_cache = compute_linear_forward(A_pre,W,b)
        A, activation_cache = sigmoid(Z)
    elif activation == 'Relu':
        Z, linear_cache = compute_linear_forward(A_pre,W,b)
        A, activation_cache = Relu(Z)
        
    assert(A.shape == (W.shape[0],A_pre.shape[1]))
    cache = (linear_cache,activation_cache)
    return A,cache

def L_Layer_forward(x,params):
    L = len(params)//2
    caches = []
    A = x
    for l in range(1,L):
        A_pre = A
        A, cache = compute_forward(A_pre,params["W"+str(l)],params["b"+str(l)],'Relu')
        caches.append(cache)
    
    AL,cache = compute_forward(A,params["W"+str(L)],params["b"+str(L)],'sigmoid')
    caches.append(cache)
    
    assert(AL.shape == (1,x.shape[1]))
    return AL,caches


In [28]:
def compute_cost(AL,y):
    m = y.shape[1]
    cost = -np.sum(np.multiply(np.log(AL),y) + np.multiply(np.log(1 - AL), 1 - y)) / m
        
    cost = np.squeeze(cost)
    assert(cost.shape == ())

    return cost

In [29]:
def linear_backward(dZ,cache):
    A_pre,W,b = cache
    m = A_pre.shape[1]
    dW = np.dot(dZ,A_pre.T)/m
    db = np.sum(dZ,axis=1,keepdims=True)/m
    dA_pre = np.dot(W.T,dZ)
    
    assert (dA_pre.shape == A_pre.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    return dA_pre,dW,db

def linear_activation_backward(dA,cache,activation="Relu"):
    linear_cache,activation_cache = cache
    if activation == "Relu":
        dZ = Relu_backward(dA,activation_cache)
        dA_pre,dW,db = linear_backward(dZ,linear_cache)
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA,activation_cache)
        dA_pre,dW,db = linear_backward(dZ,linear_cache)
    
    
    return dA_pre,dW,db

def L_Layer_backward(AL,y,caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    y = y.reshape(AL.shape)
    dAL = - (np.divide(y, AL) - np.divide(1 - y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)] = linear_activation_backward(dAL,current_cache,activation="sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_t,dW_t,db_t = current_cache = linear_activation_backward(grads["dA"+str(l+1)],current_cache,activation="Relu")
        grads["dA"+str(l)] = dA_prev_t
        grads["dW"+str(l+1)] = dW_t
        grads["db"+str(l+1)] = db_t
    
    return grads

In [30]:
def ungrade_params(params,grads,Learning_Rate):
    L = len(params)//2
    
    for l in range(L):
        params["W"+str(l+1)] = params["W"+str(l+1)] - Learning_Rate*grads["dW"+str(l+1)]
        params["b"+str(l+1)] = params["b"+str(l+1)] - Learning_Rate*grads["db"+str(l+1)]
    
    return params    

In [31]:
def nn_model(x,y,layers_dims,Learning_Rate=0.0075,num_iterations=3000,print_cost=False):
    
    params = initialize_params(layers_dims)
    costs=[]
    
    for i in range(num_iterations):
        AL,caches = L_Layer_forward(x,params)
        cost = compute_cost(AL,y)
        grads = L_layer_backward(AL,y,caches)
        params = ungrade_params(params,grads,Learning_Rate)
        
        if print_cost:
            if i%100 == 0:
                costs.append(cost)
                
                print("The ",i," time's iteration，the cost is ：" + str(cost))
    
    return params

In [32]:
import numpy as np
import h5py
    
    
def load_dataset():
    train_dataset = h5py.File('F:/datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('F:/datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [33]:
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T 
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y

In [None]:
layers_dims = [12288, 20, 7, 5, 1]
parameters = nn_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)

The  0  time's iteration，the cost is ：0.69844023733373


  cost = -np.sum(np.multiply(np.log(AL),y) + np.multiply(np.log(1 - AL), 1 - y)) / m
  cost = -np.sum(np.multiply(np.log(AL),y) + np.multiply(np.log(1 - AL), 1 - y)) / m
  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dZ = dA*s*(1-s)
  dZ[Z<=0]=0


The  100  time's iteration，the cost is ：nan
The  200  time's iteration，the cost is ：nan
The  300  time's iteration，the cost is ：nan
The  400  time's iteration，the cost is ：nan
The  500  time's iteration，the cost is ：nan
The  600  time's iteration，the cost is ：nan
The  700  time's iteration，the cost is ：nan
The  800  time's iteration，the cost is ：nan
The  900  time's iteration，the cost is ：nan
The  1000  time's iteration，the cost is ：nan
The  1100  time's iteration，the cost is ：nan
The  1200  time's iteration，the cost is ：nan


In [11]:
def linear_backward(dZ,cache):
    A_pre,W,b = cache
    m = A_pre.shape[1]
    dW = np.dot(dZ,A_pre.T)/m
    db = np.sum(dZ,axis=1,keepdims=True)/m
    dA_pre = np.dot(W.T,dZ)
    
    assert (dA_pre.shape == A_pre.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    return dA_pre,dW,db

def linear_activation_backward(dA,cache,activation="Relu"):
    linear_cache,activation_cache = cache
    if activation == "Relu":
        dZ = Relu_backward(dA,activation_cache)
        dA_pre,dW,db = linear_backward(dZ,linear_cache)
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA,activation_cache)
        dA_pre,dW,db = linear_backward(dZ,linear_cache)
    
    
    return dA_pre,dW,db

def L_Layer_backward(AL,y,caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    y = y.reshape(AL.shape)
    dAL = - (np.divide(y, AL) - np.divide(1 - y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)] = linear_activation_backward(dAL,current_cache,activation="sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_t,dW_t,db_t = current_cache = linear_activation_backward(grads["dA"+str(l+1)],current_cache,activation="Relu")
        grads["dA"+str(l)] = dA_prev_t
        grads["dW"+str(l+1)] = dW_t
        grads["db"+str(l+1)] = db_t
    
    return grads