In [23]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np

### 1. Preparation
Implement a function **sigmoid(x)** and a function **sigmoidprime(x)**.

In [24]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoidprime(x):
    return x * (1 - x)

In [25]:
def mean_squared_error(x, y):
    # return mean squared error between x and y
    return sum(map(lambda a: (a[0] - a[1])**2, zip(x, y)))/len(x)


### 2.  Data Sets
We are training the network on logical gates (**and, or, not and, not or, xor == exclusive or**)

In [26]:
possible_inputs = [(0, 0), (0, 1), (1, 0), (1, 1)]
labels = {
    'AND': [0, 0, 0, 1],
    'OR': [0, 1, 1, 1],
    'NAND': [1, 1, 1, 0],
    'NOR': [1, 0, 0, 0],
    'XOR': [0, 1, 1, 0]
}

### 3. Perceptron

In [27]:
class Perceptron:
    """This is a perceptron class.
    
    :param input_units: number of incoming connections. The bias will be added
        internally and doesn't have to be considered.
    :type input_units: integer
    :param alpha: alpha is the learning rate which determines the step size in
        gradient direction
    :type alpha: float
    :param activation_func: function that should be applied to the weighted sum
        of the inputs, defaults to sigmoid
    :type activation_func: function, optional
    :param activation_func_prime: derivative of the activation function, defaults
        to the derivative of sigmoid
    :type activation_func_prime: function, optional
    """
    def __init__(self, input_units, alpha, activation_func=sigmoid,
                 activation_func_prime=sigmoidprime):
        """Constructor function"""
        # Set weights by drawing samples from the normal distribution.
        # To account for the bias we add one more weight to the perceptron.
        self.weights = np.random.normal(size=input_units + 1)
        self.act_func = activation_func
        self.act_func_prime =  activation_func_prime
        self.alpha = alpha

    def forward_pass(self, x):
        """Passes the inputs through this Perceptron
        
        :param inputs: array of inputs for the perceptron
        :type inputs: numpy.array
        :return: activated, weighted sum of the inputs
        :rtype: numpy.array
        """
        # One is added at the end of the array to account for the bias
        self.inputs = np.append(x, [1])
        inp =  self.weights @ self.inputs
        self.out = self.act_func(inp)
        return self.out

    def update(self, delta):
        """Updates the weights of this perceptron
        
        :param delta:
        :type delta: float
        """
        self.weights -= self.inputs * self.alpha * delta

### 4. Multi-Layer Perceptron

In [28]:
class MLP:
    """This is a Multi-Layer Perceptron Class.
    
    :param dim: array that describes the dimensions of the MLP, defaults to (2,4,1)
    :type dim: list / tuple, optional
    :param alpha: specifies the learning rate of the MLP, defaults to 1
    :type alpha: float, optional
    """
    def __init__(self, dim=(2,4,1), alpha=1):
        """Constructor function"""
        self.layers = []
        for i, perceptron_amount in enumerate(dim[1:]):
            # A layer consits of perceptron_amount perceptrons. Each perceptron receives as many
            # weights as there are percpetrons in the previous layer equiv. i.
            weights_in = dim[i]
            layer = [Perceptron(weights_in, alpha) for _ in range(perceptron_amount)]
            # add layer to layers-list
            self.layers.append(layer)
        # Variable to store the activations during forward propagation
        self.net_activations = []

    def forward_step(self, X):
        """Feeds the input X into the network and returns the final prediction.
        
        :param X: Input for which the MLP should create a prediction
        :type X: np.array
        :return: predictied label for X
        :rtype: np.array
        """
        layer_activations = X
        for layer in self.layers:
            # compute activations of the perceptrons in the current layer
            layer_activations = np.array([perceptron.forward_pass(layer_activations) for perceptron in layer])
            # save the activations for the backpropagation
            self.net_activations.append(layer_activations)
        return layer_activations
    
    def backprop_step(self, target):
        """Update weights using gradient descent to minimize loss. This function should be called after running the forward step.
        
        :param target: target label for the input provided in the forward step.
        :type target: np.array / list
        """
        # convert target to numpy array
        target = np.array(target)
        # check that target has the right shape
        assert target.shape != self.net_activations[-1].shape, "The provided target has a different shape than the network output"
        # calculate partial derivative of total error (MSE) with repect to the network output
        error = - (target - self.net_activations[-1])
        # propagate error backwards through the network
        for layer_i, layer in enumerate(self.layers[::-1]):
            # multiply error by the partial derivative of the activations with respect to the total perceptrons input
            delta = error * self.net_activations[-layer_i-1] * (1 - self.net_activations[-layer_i-1])
            # calc error for next layer
            error = delta @ np.array([perceptron.weights[:-1] for perceptron in layer])
            # update weights using delta
            for i, perceptron in enumerate(layer):
                perceptron.update(delta[i])       

### 5. Training

In [29]:
# Initialize the multi-layer perceptron
net = MLP()
# choose logical gate that the network should be trained to estimate
logical_gate = 'XOR'

EPOCHS = 1000
losses = []
accuracies = []

for e in range(EPOCHS):
    loss = 0
    correct_predictions = 0
    # choose a random order to feed the inputs to the net
    order = np.random.permutation(4)
    for i in order:
        # one of the four inputs
        X = possible_inputs[i]
        # corresponding output
        y = labels[logical_gate][i]
        # perform forward propagation
        y_hat = net.forward_step(possible_inputs[i])
        # perform backward propagation
        net.backprop_step(y)
        # performance assesment
        correct_predictions += 1 if abs(y-y_hat) < 0.5 else 0
        loss += mean_squared_error([y], y_hat)

    losses.append(loss/4)
    accuracies.append(correct_predictions / 4)

### 6. Visualisation

In [30]:
fig, axs = plt.subplots(2, sharex=True)
fig.set_size_inches(10, 5)
fig.suptitle(f'Training Progress for {logical_gate}')
axs[0].plot(losses, color='orange')
axs[0].set(ylabel='Losses')
axs[1].plot(accuracies, color='green')
axs[1].set(xlabel='Epochs', ylabel='Accuracies')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[Text(0.5, 0, 'Epochs'), Text(0, 0.5, 'Accuracies')]