In [21]:
import numpy as np
from random import random

In [22]:
class MLP:

    def __init__(self, num_inputs=3, hidden_layers=[2], num_outputs=2):

        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # create random connection weights for the layers
        self.weights = [np.random.rand(layers[i] + 1, layers[i + 1]) for i in range(len(layers) - 1)]

        # save derivatives per layer
        self.derivatives = [np.zeros((layers[i] + 1, layers[i + 1])) for i in range(len(layers) - 1)]

        # save activations per layer

        #print(layers)
        #self.activations = [np.zeros(layers[i] + 1) if i!= len(layers)-1 else np.zeros(layers[i]) for i in range(len(layers))]
        self.activations = [np.zeros(layers[i] + 1) for i in range(len(layers))]

    
    def back_propagate(self, error, debug = False):

        for i in reversed(range(len(self.derivatives))):
            # If w2, take derivative of a3
            # define delta as: dE/da_(i+1) * s'(a_i+1)
            delta = error * self._sigmoid_derivative(self.activations[i+1])

            # Converts delta from array [0.1, 0.2] to 1x2 matrix [[0.1, 0.2]]
            delta_reshaped = delta.reshape(delta.shape[0],-1).T

            # get activations for current layer
            #current_activations = self.activations[i]

            # Converts a_i from array [0.1, 0.2] to 2x1 matrix [[0.1], [0.2]]
            current_activations = self.activations[i].reshape(self.activations[i].shape[0], -1)

            # Get the matrix of derivatives for W_i
            self.derivatives[i] = np.dot(current_activations, delta_reshaped)

            # error for prev layer as: dE/da_(i+1) * s'(a_i+1) * W_i or delta * W_i
            error = np.dot(delta, self.weights[i].T)

            if debug:
                print(f"Derivatives for W{i}: {self.derivatives[i]}")

    def forward_propagate(self, inputs):

        # Store the input layer activations
        activations = inputs
        self.activations[0] = activations

        for i, w in enumerate(self.weights):
            # Calculate inputs h
            net_inputs = np.dot(activations, w)

            # Calculate the activations
            activations = self._sigmoid(net_inputs)

            # Save the activation for the current layer (if weights at W1, store a2)
            self.activations[i+1] = activations
            
        return activations


    def gradient_descent(self, learning_rate, debug = False):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * self.derivatives[i]


    def train(self, inputs, targets, epochs = 10, learning_rate = 0.1, batch_size = 0):

        for i in range(epochs):
            sum_error = 0
            for j in range(len(inputs)):

                # Forward propagate
                output = self.forward_propagate(inputs[j])

                # Find derivative of loss function with respect to output layer
                loss_derivative = output - targets[j]

                # Back propagate to find the derivatives
                self.back_propagate(loss_derivative)

                # Gradient descent to update weights
                self.gradient_descent(learning_rate)

                # Calculate the error at this iteration
                sum_error += self._mse(targets[j], output)
            
            # report error
            print(f"Epoch {i}, Mean Squared Error: {sum_error/len(inputs)}")


    @staticmethod
    def _mse(target, output):
        return np.average((target - output) ** 2)

    @staticmethod
    def _sigmoid_derivative(x):
        return x * (1.0 - x)

    @staticmethod
    def _sigmoid(x):
        return 1.0 / (1 + np.exp(-x))


In [23]:
# Define data
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] * i[1]] for i in items])

items[0]

array([0.15617142, 0.12616124])

In [24]:
mlp = MLP(num_inputs = 2, hidden_layers=[2], num_outputs = 1)
mlp.activations

[array([0., 0.]), array([0., 0.]), array([0.])]

In [25]:
mlp.train(items, targets, epochs=50, learning_rate=0.1)

Epoch 0, Mean Squared Error: 0.05705631649368361
Epoch 1, Mean Squared Error: 0.007070253526140477
Epoch 2, Mean Squared Error: 0.004745998727957247
Epoch 3, Mean Squared Error: 0.004110631768882561
Epoch 4, Mean Squared Error: 0.0038501056274325473
Epoch 5, Mean Squared Error: 0.0037179745766159544
Epoch 6, Mean Squared Error: 0.0036401280860459333
Epoch 7, Mean Squared Error: 0.0035882623997721443
Epoch 8, Mean Squared Error: 0.003549853053061957
Epoch 9, Mean Squared Error: 0.003518776657805114
Epoch 10, Mean Squared Error: 0.0034918177773068195
Epoch 11, Mean Squared Error: 0.003467196114032446
Epoch 12, Mean Squared Error: 0.0034438855356632037
Epoch 13, Mean Squared Error: 0.0034212760151423076
Epoch 14, Mean Squared Error: 0.0033989961116732475
Epoch 15, Mean Squared Error: 0.0033768154571243852
Epoch 16, Mean Squared Error: 0.003354589198193887
Epoch 17, Mean Squared Error: 0.003332225386643308
Epoch 18, Mean Squared Error: 0.0033096653685644934
Epoch 19, Mean Squared Error: 0.

In [26]:
mlp.forward_propagate(np.array([0.2853, 0.623]))

array([0.07460465])