In [1]:
import numpy as np


def sigmoid(x, derivative=False):

    if (derivative == True):
        return sigmoid(x, derivative=False) * (1 - sigmoid(x, derivative=False))
    else:
        return 1/(1+np.exp(-x))


In [2]:
np.random.seed(1)

alpha = 0.1

num_hidden = 3

X = np.array([
    [0, 0, 1],
    [0, 1, 1],
    [1, 0, 0],
    [1, 1, 0],
    [1, 0, 1],
    [1, 1, 1],
])

y = np.array([[0, 1, 0, 1, 1, 0]]).T

hidden_weights = 2*np.random.random((X.shape[1] + 1, num_hidden)) - 1
output_weights = 2*np.random.random((num_hidden + 1, y.shape[1])) - 1


In [3]:
num_iterations = 10000


# for each iteration of the gradient descent
for i in range(num_iterations):

    # forward phase
    input_layer_outputs = np.hstack((np.ones((X.shape[0], 1)), X))

    hidden_layer_outputs = np.hstack((np.ones((X.shape[0], 1)), sigmoid(
        np.dot(input_layer_outputs, hidden_weights))))

    output_layer_outputs = np.dot(hidden_layer_outputs, output_weights)

    # backward phase
    output_error = output_layer_outputs - y

    # hidden layer error term and [:, 1:] removes the bias term from the backpropagation
    hidden_error = hidden_layer_outputs[:, 1:] * (
        1 - hidden_layer_outputs[:, 1:] * np.dot(output_error, output_weights.T[:, 1:]))

    # partial derivatives

    hidden_pd = input_layer_outputs[:, :,
                                    np.newaxis] * hidden_error[:, np.newaxis, :]
    output_pd = hidden_layer_outputs[:, :,
                                     np.newaxis] * output_error[:, np.newaxis, :]

    total_hidden_gradient = np.average(hidden_pd, axis=0)
    total_output_gradient = np.average(output_pd, axis=0)

    hidden_weights += - alpha * total_hidden_gradient
    output_weights += - alpha * total_output_gradient

print("Output after Training: \n{}".format(output_layer_outputs))


Output after Training: 
[[0.49898049]
 [0.50028935]
 [0.49913911]
 [0.5003266 ]
 [0.50059768]
 [0.50066887]]
