In [1]:
import numpy as np
from random import random

In [39]:
class MLP(object):
    def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # create random connection weights for the layers
        weights = []
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            weights.append(w)
        self.weights = weights

        # save derivatives per layer
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # save activations per layer
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations


    def forward_propagate(self, inputs):
        # the input layer activation is just the input itself
        activations = inputs

        # save the activations for backpropogation
        self.activations[0] = activations
        # iterate through the network layers
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)
            # apply sigmoid activation function
            activations = self.sigmoid(net_inputs)
            # save the activations for backpropogation
            self.activations[i + 1] = activations

        # return output layer activation
        return activations


    def back_propagate(self, error):
        # dE/dW_i = (y - a_[i+1]) * s'(h_[i+1]) * a_i <- formula
        # dE/dW_[i-1] = (y - a_[i+1]) * s'(h_[i+1]) * W_i * s'(h_[i+1]) * a_i <- formula
        for i in reversed(range(len(self.derivatives))):
            # get activation for previous layer
            activations = self.activations[i+1]
            # apply sigmoid derivative function
            delta = error * self.sigmoid_derivative(activations)

            # reshape delta as to have it as a 2d array and transpose it
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            # get activations for current layer
            current_activations = self.activations[i]
            # reshape activations as to have them as a 2d column matrix
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            # activations shouldnt be tranposed since activations represents the structure of the layer
            # (in this case is neuron), so instead we tranpose the delta (error) to maintain the 
            # structure
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            # backpropogate the next error
            error = np.dot(delta, self.weights[i].T)


    def train(self,inputs,targets,epochs,learning_rate):
        for i in range(epochs):
            sum_errors = 0
            # iterate through all the training data
            for j, input in enumerate(inputs):
                target = targets[j]
                # activate the network!
                output = self.forward_propagate(input)
                error = target - output
                self.back_propagate(error)
                # now perform gradient descent on the derivatives
                # (this will update the weights
                self.gradient_descent(learning_rate)
                # keep track of the MSE for reporting later
                sum_errors += self.MSE(target, output)
            # divide by all items since every error consisted on every items
            print("Error: {} at epoch {}".format(sum_errors / len(items), i+1))

    def gradient_descent(self, learningRate=1):
        # update the weights by stepping down the gradient
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learningRate

    def sigmoid(self, x):
        y = 1.0 / (1 + np.exp(-x))
        return y

    def sigmoid_derivative(self, x):
        return x * (1.0 - x)

    def MSE(self, target, output):
        return np.average((target - output) ** 2)

In [41]:
# create a dataset to train a network for the sum operation
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in items])

# create a Multilayer Perceptron with one hidden layer and each neuron on the layer
mlp = MLP(2, [5], 1)

# train network
mlp.train(items, targets, 20, 0.1)

Error: 0.043105932286494536 at epoch 1
Error: 0.04031586935110494 at epoch 2
Error: 0.040170498600483294 at epoch 3
Error: 0.03998592189168139 at epoch 4
Error: 0.03974662305986018 at epoch 5
Error: 0.03943293851684384 at epoch 6
Error: 0.03901999093694101 at epoch 7
Error: 0.03847681265406439 at epoch 8
Error: 0.0377660888335759 at epoch 9
Error: 0.036845297180482024 at epoch 10
Error: 0.035670369815816526 at epoch 11
Error: 0.034203036014616356 at epoch 12
Error: 0.032422091753967625 at epoch 13
Error: 0.030336421250265786 at epoch 14
Error: 0.02799431664364863 at epoch 15
Error: 0.025482349992743254 at epoch 16
Error: 0.022910940849942055 at epoch 17
Error: 0.020391591126220614 at epoch 18
Error: 0.01801592924608311 at epoch 19
Error: 0.01584446536057823 at epoch 20


Prediction

In [38]:
input = np.array([0.3, 0.1])
target = np.array([0.4])

# get a prediction
output = mlp.forward_propagate(input)

print()
print("Our network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))


Our network believes that 0.3 + 0.1 is equal to 0.4423588287292325
