In [1]:
import numpy as np

In [7]:
def sigmoid(X):
    return 1 / (1 + np.exp(-X))

In [3]:
def layer_sizes(input_nodes, hidden_nodes, output_nodes):
    input_layer = input_nodes
    hidden_layer = hidden_nodes
    output_layer = output_nodes
    
    return (input_layer, hidden_layer, output_layer) 

In [20]:
def initialize(I, H, O):
    # I: size of input layer
    # H: size of hidden layer
    # O: size of output layer
    W1 = np.random.randn(H, I) * 0.01
    b1 = np.zeros(shape=(H, 1))
    W2 = np.random.randn(I, H) * 0.01
    b2 = np.zeros(shape=(O, 1))
    
    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
    }
    
    return parameters
    

In [21]:
def forward_propagation(X, parameters):
    # X is input data size (inputs, m)
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2,
    }
    
    return A2, cache

In [22]:
def compute_cost(A2, Y, parameters):
    # A2: sigmoid output of activation
    # Y: expected outputs
    
    m = Y.shape[1]
    
    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
    cost = -np.sum(logprobs) / m
    
    cost = np.squeeze(cost) # turns [[x]] into x
    
    return cost

In [23]:
def backward_propogation(parameters, cache, X, Y):
    # X: input data of shape
    # Y: expected outputs
    
    m = X.shape[1]
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    A1 = cache['A1']
    A2 = cache['A2']
    
    dZ2 = A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {
        'dW1': dW1,
        'db1': db1,
        'dW2': dW2,
        'db2': db2,
    }
    
    return grads

In [24]:
def update_parameters(parameters, grads, learning_rate=1.2):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW1 = grads['dW2']
    db1 = grads['db2']
    
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
    }
    
    return parameters

In [28]:
def model(X, Y, hidden_nodes, iterations=100, print_cost=True):
    
    np.random.seed(3)
#     input_nodes = layer_sizes(X, 4, Y)[0]
#     output_nodes = layer_sizes(X, 4, Y)[2]
    input_nodes = 3
    output_nodes = 1
    parameters = initialize(input_nodes, hidden_nodes, output_nodes)
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    for i in range(0, iterations):
        A2, cache = forward_propagation(X, parameters)
        
        cost = compute_cost(A2, Y, parameters)
        
        grads = backward_propagation(parameters, cache, X, Y)
        
        parameters = update_parameters(parameters, grads)
        
        if print_cost and i % 100 == 0:
            print(f'Cost after iteration {i}: {cost}')
    
    return parameters

In [29]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    predictions = A2 > 0.5
    return predictions

In [31]:
X = np.array([0, 1, 2])
Y = np.array([3, 3, 3])
model(X, Y, 4)

TypeError: only integer scalar arrays can be converted to a scalar index