In [525]:
import numpy as np
from random import random

In [526]:
class MLP:

    def __init__(self, num_inputs=3, hidden_layers =[2], num_outputs=2):

        # Initiate array of layers size
        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # create random connection weights for the layers
        self.weights = [np.random.rand(layers[i] + 1, layers[i + 1]) for i in range(len(layers) - 1)]

        # save derivatives per layer
        self.derivatives = [np.zeros((layers[i] + 1, layers[i + 1])) for i in range(len(layers) - 1)]

        # save activations per layer

        #print(layers)
        #self.activations = [np.zeros(layers[i] + 1) if i!= len(layers)-1 else np.zeros(layers[i]) for i in range(len(layers))]
        self.activations = [np.zeros(layers[i] + 1) for i in range(len(layers))]

    def forward_propagate(self, inputs):

        # Store the input layer activations
        activations = inputs
        self.activations[0][:-1] = activations
        self.activations[0][-1] = 1
        activations = np.append(activations, 1)

        #print(self.activations)

        for i, w in enumerate(self.weights):
            # Calculate inputs h
            #print(activations, i)
            net_inputs = np.dot(activations, w)

            # Calculate the activations
            activations = self._sigmoid(net_inputs)

            # Save the activation for the current layer (if weights at W1, store a2)
            self.activations[i+1][:-1] = activations
            self.activations[i+1][-1] = 1
            activations = np.append(activations, 1)

            
            # if i != len(self.weights) - 1:
            #     self.activations[i+1][:-1] = activations
            #     self.activations[i+1][-1] = 1
            #     activations = np.append(activations, 1)
            # else:
            #     self.activations[i+1] = activations
            
        return self.activations[-1][:-1]


    def gradient_descent(self, learning_rate, debug = False):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * self.derivatives[i]


    def train(self, inputs, targets, epochs = 10, learning_rate = 0.1, batch_size = 0):

        for i in range(epochs):
            sum_error = 0
            for j in range(len(inputs)):

                # Forward propagate
                output = self.forward_propagate(inputs[j])

                # Find derivative of loss function with respect to output layer
                loss_derivative = output - targets[j]

                # Back propagate to find the derivatives
                self.back_propagate(loss_derivative)

                # Gradient descent to update weights
                self.gradient_descent(learning_rate)

                # Calculate the error at this iteration
                sum_error += self._mse(targets[j], output)
            
            # report error
            print(f"Epoch {i}, Mean Squared Error: {sum_error/len(inputs)}")


    @staticmethod
    def _mse(target, output):
        return np.average((target - output) ** 2)

    @staticmethod
    def _sigmoid_derivative(x):
        return x * (1.0 - x)

    @staticmethod
    def _sigmoid(x):
        return 1.0 / (1 + np.exp(-x))



    def back_propagate(self, error, debug = False):

        for i in reversed(range(len(self.derivatives))):
            # If w2, take derivative of a3
            # define delta as: dE/da_(i+1) * s'(a_i+1)
            #print("At index:", i)
            #print("Error", error)

            #if i == len(self.derivatives) - 1:
            delta = error * self._sigmoid_derivative(self.activations[i+1][:-1])
            #else:
                #delta = error * self._sigmoid_derivative(self.activations[i+1])

            # Converts delta from array [0.1, 0.2] to 1x2 matrix [[0.1, 0.2]]
            delta_reshaped = delta.reshape(delta.shape[0],-1).T

            # get activations for current layer
            #current_activations = self.activations[i]

            #print("delta r: ", delta_reshaped)

            # Converts a_i from array [0.1, 0.2] to 2x1 matrix [[0.1], [0.2]]
            #if i == len(self.derivatives) -1:
            current_activations = self.activations[i].reshape(self.activations[i].shape[0], -1)
            #else:
                #current_activations = self.activations[i][:-1].reshape(self.activations[i][:-1].shape[0], -1)

            #print("currenct: ", current_activations)

            # Get the matrix of derivatives for W_i
            self.derivatives[i] = np.dot(current_activations, delta_reshaped)

            # error for prev layer as: dE/da_(i+1) * s'(a_i+1) * W_i or delta * W_i
            error = np.dot(delta, self.weights[i][:-1].T)

            if debug:
                print(f"Derivatives for W{i}: {self.derivatives[i]}")


In [527]:
# Define data
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in items])

items[0]

array([0.04666414, 0.21413907])

In [528]:
targets[0]

array([0.2608032])

In [529]:
mlp = MLP(num_inputs = 2, hidden_layers = [2], num_outputs = 1)
mlp.activations

[array([0., 0., 0.]), array([0., 0., 0.]), array([0., 0.])]

In [530]:
mlp.forward_propagate(np.array([3, 4]))

array([0.77562787])

In [531]:
mlp.derivatives

[array([[0., 0.],
        [0., 0.],
        [0., 0.]]),
 array([[0.],
        [0.],
        [0.]])]

In [532]:
mlp.train(items, targets, epochs=50, learning_rate=0.1)

Epoch 0, Mean Squared Error: 0.04483743491999746
Epoch 1, Mean Squared Error: 0.041473861304714296
Epoch 2, Mean Squared Error: 0.041006684268530644
Epoch 3, Mean Squared Error: 0.0404720658201423
Epoch 4, Mean Squared Error: 0.0398426606453356
Epoch 5, Mean Squared Error: 0.039085531115876294
Epoch 6, Mean Squared Error: 0.03816068736705501
Epoch 7, Mean Squared Error: 0.03702040364806856
Epoch 8, Mean Squared Error: 0.03561065668091661
Epoch 9, Mean Squared Error: 0.0338768140821938
Epoch 10, Mean Squared Error: 0.03177567119538672
Epoch 11, Mean Squared Error: 0.029293480005309132
Epoch 12, Mean Squared Error: 0.026464291176186393
Epoch 13, Mean Squared Error: 0.023378546490676378
Epoch 14, Mean Squared Error: 0.020174322883443063
Epoch 15, Mean Squared Error: 0.017012434859381483
Epoch 16, Mean Squared Error: 0.014044480751582173
Epoch 17, Mean Squared Error: 0.01138542539164104
Epoch 18, Mean Squared Error: 0.009099514913805616
Epoch 19, Mean Squared Error: 0.007201623635081255
Ep

In [535]:
mlp.forward_propagate(np.array([0.123, 0.321]))

array([0.43067377])