In [16]:
import numpy as np
from random import random
# save activations and derivatives
# implement back prop
# implement gradient descent
# implement training method
# train with dummy data

class MLP:
    def __init__(self, num_inputs=3, num_hidden=[3,5], num_outputs=2):
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs

        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        print("layers: {}".format(layers))
        
        #initiate random weights
        self.weights = []
        for i in range(len(layers)-1):
            w = np.random.rand(layers[i], layers[i+1])
            self.weights.append(w)
        print("weights: {}".format(self.weights))

        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations

        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward_propogate(self, inputs):
        activations = inputs
        self.activations[0] = activations
        for i, w in enumerate(self.weights):
            # calcuate net inputs
            net_inputs = np.dot(activations, w)

            #calculate activations
            activations = self.sigmoid(net_inputs)
            self.activations[i+1] = activations
        return activations

    def back_propogate(self, error, verbose=False):

        # formulae
        # error derivative per weight: dE/dW_1 = (y - a_[i+1]) * s'((h_[i+1])) * a_i
        # sigmoid derivative: s'(h_[i+1]) = s(h_[i+1])(1 - s(h+[i+1]))
        # s(h_[i+1]) = a_[i+1]

        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            delta = error * self._sigmoid_derivative(activations) # we want to reshape the array to fit inputs/desired outputs ndarray([0.1, 0.2]) --> ndarray([[0.1], [0.2]])
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i] # we want to reshape the array to fit inputs/desired outputs ndarray([0.1, 0.2]) --> ndarray([[0.1], [0.2]])
            current_activations = current_activations.reshape(current_activations.shape[0], -1)

            self.derivatives[i] = np.dot(current_activations, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)

            if verbose:
                print("Derivatives for W{}: {}".format(i, self.derivatives[i]))
        return error

    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learning_rate

    def train(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            sum_error = 0
            for j, input in enumerate(inputs):
                target = targets[j]
                # forward prop
                output = self.forward_propogate(input)

                # calculate error
                error = target - output

                # back prop
                self.back_propogate(error)

                # apply gradient descent
                self.gradient_descent(learning_rate)

                sum_error += self.mse(target, output)
            # report error
            print("Error: {} at epoch {}".format(sum_error / len(inputs), i))

    def mse(self, target, output):
        return np.average((target - output)**2)
    
    def _sigmoid_derivative(self, x):
        return x * (1.0 -x)

In [19]:
#create dataset
items = np.array([[random() / 2 for _ in range(2)] for _ in range(1000)]) 
targets = np.array([[i[0] + i[1]] for i in items])

# create an MLP
mlp = MLP(2, [5], 1)

# train
mlp.train(items, targets, 50, 0.1)

# create dummy data
input = np.array([0.3, 0.1])
target = np.array([0.4])

# get a prediction
output = mlp.forward_propogate(input)

print()
print("Our network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))

layers: [2, 5, 1]
weights: [array([[0.15184358, 0.18056597, 0.95689458, 0.60730177, 0.83672168],
       [0.97754456, 0.76610433, 0.45501204, 0.72714731, 0.01648435]]), array([[0.25459764],
       [0.54976649],
       [0.15821119],
       [0.78592048],
       [0.15251895]])]
Error: 0.04422337418290172 at epoch 0
Error: 0.040444808973632174 at epoch 1
Error: 0.04023678871251103 at epoch 2
Error: 0.03999182510650238 at epoch 3
Error: 0.039692096594333606 at epoch 4
Error: 0.039316985070717826 at epoch 5
Error: 0.03884235697613675 at epoch 6
Error: 0.03824009789629594 at epoch 7
Error: 0.03747813164324307 at epoch 8
Error: 0.03652129566585495 at epoch 9
Error: 0.03533360728062457 at epoch 10
Error: 0.033882546618159565 at epoch 11
Error: 0.032145783633849505 at epoch 12
Error: 0.030119952153569725 at epoch 13
Error: 0.02782944346567982 at epoch 14
Error: 0.025331347101543847 at epoch 15
Error: 0.022712325496581177 at epoch 16
Error: 0.020076032590426618 at epoch 17
Error: 0.017524761226599