In [1]:
import numpy as np

In [2]:
def define_structure(X, Y):
    input_unit = X.shape[0] 
    hidden_unit = 2 
    output_unit = Y.shape[0]
    #print(output_unit)
    return (input_unit, hidden_unit, output_unit)

In [3]:
def parameters_initialization(input_unit, hidden_unit, output_unit):
    W1 = np.ones([hidden_unit, input_unit])
    b1 = np.zeros((hidden_unit, 1))
    W2 = np.ones([output_unit, hidden_unit])
    b2 = np.zeros((output_unit, 1))
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    print('W1' + str(W1))
    
    return parameters

In [4]:
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    Z1 = np.dot(W1, X) + b1
    A1 = Z1
    #print("Z1")
    #print()
    Z2 = np.dot(W2, A1) + b2
    A2 = 1/(1 + np.exp(-Z2))
    cache = {"Z1": Z1,"A1": A1,"Z2": Z2,"A2": A2}
    #print("Z2")
    #print(np.dot(W1,A1))
    return A2, cache

In [5]:
def cross_entropy_cost(A2, Y, parameters):
    # number of training example
    m = Y.shape[1] 
    #print(m)
    # Compute the cross-entropy cost
    #print(A2)
    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1-Y), np.log(1 - A2))
    #print(logprobs)
    cost = - np.sum(logprobs) / m
    cost = float(np.squeeze(cost))
                                    
    return cost

In [6]:
def backward_propagation(parameters, cache, X, Y):
    #number of training example
    m = X.shape[1]
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']
   
    dZ2 = A2-Y
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1/m) * np.dot(dZ1, X.T) 
    db1 = (1/m)*np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {"dW1": dW1, "db1": db1, "dW2": dW2,"db2": db2}
    
    return grads

In [7]:
def gradient_descent(parameters, grads, learning_rate = 0.001):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
   
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {"W1": W1, "b1": b1,"W2": W2,"b2": b2}
    
    return parameters

In [8]:
def neural_network_model(X, Y, hidden_unit, num_iterations = 1000):
    np.random.seed(3)
    input_unit = define_structure(X, Y)[0]
    output_unit = define_structure(X, Y)[2]
    
    parameters = parameters_initialization(input_unit, hidden_unit, output_unit)
    #print(parameters)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    for i in range(num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = cross_entropy_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = gradient_descent(parameters, grads)
        if i % 5 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    return parameters
X = np.array([[1],[0]])
#print(X)
#print(X)
Y = np.array([1])
Y = Y.reshape(1, Y.shape[0])
#Y = Y.T
#print(Y.shape)

parameters = neural_network_model(X, Y, 2, num_iterations=100)
print(parameters)

W1[[1. 1.]
 [1. 1.]]
Cost after iteration 0: 0.126928
Cost after iteration 5: 0.126715
Cost after iteration 10: 0.126503
Cost after iteration 15: 0.126291
Cost after iteration 20: 0.126081
Cost after iteration 25: 0.125870
Cost after iteration 30: 0.125661
Cost after iteration 35: 0.125452
Cost after iteration 40: 0.125244
Cost after iteration 45: 0.125036
Cost after iteration 50: 0.124829
Cost after iteration 55: 0.124623
Cost after iteration 60: 0.124417
Cost after iteration 65: 0.124212
Cost after iteration 70: 0.124008
Cost after iteration 75: 0.123804
Cost after iteration 80: 0.123601
Cost after iteration 85: 0.123399
Cost after iteration 90: 0.123197
Cost after iteration 95: 0.122996
{'W1': array([[1., 1.],
       [1., 1.]]), 'b1': array([[0.],
       [0.]]), 'W2': array([[1.01173796, 1.01173796]]), 'b2': array([[0.01173796]])}
