## Settings

In [1]:
import numpy as np

def sigmoid(x): # sigmoid function
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x): # derivative of sigmoid function
    return x * (1 - x)

inputs, hiddens, outputs = 2, 2, 1 # number of neurons in each layer
learning_rate = 0.2 # learning rate

In [2]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) # input
T = np.array([[0], [1], [1], [0]]) # output

W1 = np.random.uniform(size=(inputs, hiddens)) # weights between input and hidden layer
W2 = np.random.uniform(size=(hiddens, outputs)) # weights between hidden and output layer
B1 = np.random.uniform(size=(1, hiddens)) # bias between input and hidden layer
B2 = np.random.uniform(size=(1, outputs)) # bias between hidden and output layer

def predict(x): # predict output
    layer1 = sigmoid(np.dot(x, W1) + B1) # feed forward input to hidden layer
    layer2 = sigmoid(np.dot(layer1, W2) + B2) # feed forward hidden layer to output
    return x, layer1, layer2

## Back Propagation

In [3]:
def fit(): # train the model
    global W1, W2, B1, B2
    for i in range(100000): # number of epochs
        for x, y in zip(X, T):
            x = np.reshape(x, (1, -1)) # reshape input to 1 x 2 matrix (1 row, 2 columns) in this case, [0, 0] will be [[0, 0]]
            y = np.reshape(y, (1, -1)) # reshape output to 1 x 1 matrix (1 row, 1 column) in this case, [0] will be [[0]]

            # feed forward
            layer0, layer1, layer2 = predict(x) # predict output
            layer2_error = y - layer2 # calculate error
            layer2_delta = layer2_error * sigmoid_derivative(layer2) # calculate output layer delta
            layer1_error = np.dot(layer2_delta, W2.T) # calculate hidden layer error
            layer1_delta = layer1_error * sigmoid_derivative(layer1) # calculate hidden layer delta

            W2 += learning_rate * np.dot(layer1.T, layer2_delta) # update weights between hidden and output layer
            W1 += learning_rate * np.dot(layer0.T, layer1_delta) # update weights between input and hidden layer
            B2 += learning_rate * layer2_delta # update bias between hidden and output layer
            B1 += learning_rate * layer1_delta # update bias between input and hidden layer

In [4]:
def test(): # test the model
    results = []
    for x, y in zip(X, T):
        x = np.reshape(x, (1, -1))
        _, _, output = predict(x)
        results.append("Input: {}, Output: {}, Expected: {}".format(x, output, y))
    for result in results:
        print(result)


In [5]:
fit()
test()

Input: [[0 0]], Output: [[0.00898517]], Expected: [0]
Input: [[0 1]], Output: [[0.99238253]], Expected: [1]
Input: [[1 0]], Output: [[0.99237592]], Expected: [1]
Input: [[1 1]], Output: [[0.00780033]], Expected: [0]
