In [1]:
# This code is heavily influenced by Valerio Velardo's machine learning course.

In [2]:
import numpy as np
from random import random

In [3]:
class MLP(object):

    def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # Create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # Create random connection weights for the layers
        weights = []
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            weights.append(w)
        self.weights = weights

        # Save derivatives per layer
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # Save activations per layer
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations


    def forward_propagate(self, inputs):
        # The input layer activation is just the input itself
        activations = inputs

        # Save the activations for backpropogation
        self.activations[0] = activations

        # Iterate through the network layers
        for i, w in enumerate(self.weights):
            # Calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)
            # Apply sigmoid activation function
            activations = self._sigmoid(net_inputs)
            # Save the activations for backpropogation
            self.activations[i + 1] = activations

        return activations


    def back_propagate(self, error):
        # Iterate backwards through the network layers
        for i in reversed(range(len(self.derivatives))):
            # Get activation for previous layer
            activations = self.activations[i+1]
            # Apply sigmoid derivative function
            delta = error * self._sigmoid_derivative(activations)
            # Reshape delta as to have it as a 2d array
            delta_re = delta.reshape(delta.shape[0], -1).T
            # Get activations for current layer
            current_activations = self.activations[i]
            # Reshape activations as to have them as a 2d column matrix
            current_activations = current_activations.reshape(current_activations.shape[0],-1)
            # Save derivative after applying matrix multiplication
            self.derivatives[i] = np.dot(current_activations, delta_re)
            # Backpropogate the next error
            error = np.dot(delta, self.weights[i].T)


    def train(self, inputs, targets, epochs, learning_rate):
        # Now enter the training loop
        for i in range(epochs):
            sum_errors = 0
            # Iterate through all the training data
            for j, input in enumerate(inputs):
                target = targets[j]
                
                # Activate the network
                output = self.forward_propagate(input)
                error = target - output
                self.back_propagate(error)

                # Now perform gradient descent on the derivatives
                # This will update the weights
                self.gradient_descent(learning_rate)

                # Keep track of the MSE for reporting later
                sum_errors += self._mse(target, output)

            # Epoch complete, report the training error
            print(f"Error: {sum_errors / len(items)} at epoch {i+1}")

        print("Training complete!")
        print("=====")


    def gradient_descent(self, learningRate=1):
        # Update the weights by stepping down the gradient
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learningRate


    def _sigmoid(self, x):
        y = 1.0 / (1 + np.exp(-x))
        return y


    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)


    def _mse(self, target, output):
        return np.average((target - output) ** 2)

In [4]:
# Create a dataset to train a network for the sum operation
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in items])

# Create an MLP with one hidden layer
mlp = MLP(2, [5], 1)

# Train network
mlp.train(items, targets, 100, 0.1)

Error: 0.05333976149860325 at epoch 1
Error: 0.041223595480222724 at epoch 2
Error: 0.040862046803050604 at epoch 3
Error: 0.04053365903969467 at epoch 4
Error: 0.04020691907306425 at epoch 5
Error: 0.03985137991845316 at epoch 6
Error: 0.03943511537935198 at epoch 7
Error: 0.038922593941165366 at epoch 8
Error: 0.03827311904681521 at epoch 9
Error: 0.03744030394135217 at epoch 10
Error: 0.036373450782118615 at epoch 11
Error: 0.03502205406903589 at epoch 12
Error: 0.03334454644669547 at epoch 13
Error: 0.03132116806853059 at epoch 14
Error: 0.028968020011979314 at epoch 15
Error: 0.026345992476765362 at epoch 16
Error: 0.02355756130321417 at epoch 17
Error: 0.020729338989667948 at epoch 18
Error: 0.01798658283839708 at epoch 19
Error: 0.015430583629316787 at epoch 20
Error: 0.013126882529086803 at epoch 21
Error: 0.011105169045539555 at epoch 22
Error: 0.009366629487659943 at epoch 23
Error: 0.007893666421957832 at epoch 24
Error: 0.0066586205334461844 at epoch 25
Error: 0.00563015739

In [18]:
# Create dummy data
input = np.array([0.3, 0.1])

# Get a prediction
output = mlp.forward_propagate(input)
print(f"Our network believes that {input[0]} + {input[1]} is equal to {output[0]}")

# Create dummy data
input = np.array([0.4, 0.2])

# Get a prediction
output = mlp.forward_propagate(input)
print(f"Our network believes that {input[0]} + {input[1]} is equal to {output[0]}")

Our network believes that 0.3 + 0.1 is equal to 0.39741702677622653
Our network believes that 0.4 + 0.2 is equal to 0.6202472984095848
