In [1]:
import numpy as np

In [2]:
class FCLayer:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) / np.sqrt(input_size + output_size)
        self.bias = np.random.randn(1, output_size) / np.sqrt(input_size + output_size)

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [3]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(input)

    def backward(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [4]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

In [5]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_pred.size


In [6]:
# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
network = [
    FCLayer(2, 3),
    ActivationLayer(tanh, tanh_prime),
    FCLayer(3, 1),
    ActivationLayer(tanh, tanh_prime)
]

epochs = 1000
learning_rate = 0.1

# training
for epoch in range(epochs):
    error = 0
    for x, y_true in zip(x_train, y_train):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)

        # error (display purpose only)
        error += mse(y_true, output)

        # backward
        output_error = mse_prime(y_true, output)
        for layer in reversed(network):
            output_error = layer.backward(output_error, learning_rate)

    error /= len(x_train)
    print('%d/%d, error=%f' % (epoch + 1, epochs, error))


1/1000, error=0.703819
2/1000, error=0.341610
3/1000, error=0.306824
4/1000, error=0.300306
5/1000, error=0.298148
6/1000, error=0.297107
7/1000, error=0.296442
8/1000, error=0.295927
9/1000, error=0.295477
10/1000, error=0.295058
11/1000, error=0.294652
12/1000, error=0.294253
13/1000, error=0.293857
14/1000, error=0.293462
15/1000, error=0.293066
16/1000, error=0.292669
17/1000, error=0.292269
18/1000, error=0.291866
19/1000, error=0.291458
20/1000, error=0.291046
21/1000, error=0.290627
22/1000, error=0.290202
23/1000, error=0.289770
24/1000, error=0.289329
25/1000, error=0.288879
26/1000, error=0.288419
27/1000, error=0.287947
28/1000, error=0.287463
29/1000, error=0.286965
30/1000, error=0.286453
31/1000, error=0.285925
32/1000, error=0.285380
33/1000, error=0.284818
34/1000, error=0.284235
35/1000, error=0.283632
36/1000, error=0.283007
37/1000, error=0.282358
38/1000, error=0.281685
39/1000, error=0.280984
40/1000, error=0.280256
41/1000, error=0.279499
42/1000, error=0.278710
4

In [7]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

print(predict(network, [[0,0], [0,1], [1,0], [1,1]]))

[[ 8.58962325e-04]
 [ 9.78481308e-01]
 [ 9.76418412e-01]
 [-1.54543035e-03]]
