In [1]:
import numpy as np
from random import random

In [2]:
#TODO:
#save activations and derivatives
#implement backpropogation
#implement gradient descent
#implement train
#train
#make predictions!

In [4]:
class MLP:
    def __init__(self, num_inputs = 3, hidden = [3,3], num_outputs = 2):
        self.num_inputs = num_inputs
        self.hidden = hidden
        self.num_outputs = num_outputs
        
        layers = [num_inputs] + hidden + [num_outputs]
        
        #initiate random weights
        weights = []
        for i in range(len(layers)-1):
            #weight matrices are always between two subsequent layers(ie. (3,3), (3,5), (5,3)
            w = np.random.rand(layers[i], layers[i+1])
            weights.append(w)
        self.weights = weights
            
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations
        
        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives
        
            
    def forward_propagate(self, inputs):
        #todo: learn about forward propogation
        activations = inputs
        self.activations[0] = activations
        
        for i, w in enumerate(self.weights):
            #TODO: calculate net inputs
            #performs dot product between two vector(matrix mult if given matrix)
            net_inputs = np.dot(activations,w)
            
            #TODO: calculate activations
            activations = self._sigmoid(net_inputs)
            self.activations[i+1] = activations
            #ex) activation of the third layer = sigmoid(h(3)) = a_2*w_2
        
        return activations

    def back_propagate(self,error):
        #reversed to go from right to left
        
        #derivative of the error with respect to the current weight = 
        #          error        sigmoid
        #dE/dW_i = (y - a[i+1]) s'((h[i+1])) a_i
        #s'((h[i+1])) = s(h[i+1])(1 - s((h[i-1])))
        #s((h[i+1])) = a[i+1]
        #               |           error           |
        #dE/dW[i-1] =  (y - a[i+1]) s'((h[i+1])) W_i s'(h_i) a_i-1
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            delta = error * self._sigmoid_derivative(activations)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i]
            #ndarry([x,y]) --> ndarray([[x], [y]])
            current_activations = current_activations.reshape(current_activations.shape[0], -1)
            self.derivatives[i] = np.dot(current_activations, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)
        
        return error
    
    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learning_rate
            
    def train(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs,targets):
                output = self.forward_propagate(input)
                error = target-output
                self.back_propagate(error)
                self.gradient_descent(learning_rate)
                
                sum_error += self._mse(target, output)
            
            print("Error: {} at epoch: {}".format(sum_error/len(inputs), i))
            
    def _mse(self, target, output):
        return np.average((target - output)**2)
            
    def _sigmoid_derivative(self,x):
        return x * (1 - x)
    
    def _sigmoid(self,x):
        return 1/(1 + np.exp(-x))

In [44]:
if __name__ == "__main__":

    # create a dataset to train a network for the sum operation
    items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in items])

    # create a Multilayer Perceptron with one hidden layer
    mlp = MLP(2, [5], 1)

    # train network
    mlp.train(items, targets, 50, 0.1)

    # create dummy data
    input = np.array([.7, .1])
    target = np.array([.8])

    # get a prediction
    output = mlp.forward_propagate(input)

    print()
    print("Our network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))

Error: 0.16945334653763067 at epoch: 0
Error: 0.16170981203447435 at epoch: 1
Error: 0.16108268337811438 at epoch: 2
Error: 0.16071730683742064 at epoch: 3
Error: 0.16041793032694296 at epoch: 4
Error: 0.16015484152045076 at epoch: 5
Error: 0.15992089427748377 at epoch: 6
Error: 0.15971272056035396 at epoch: 7
Error: 0.159527587728769 at epoch: 8
Error: 0.1593629658739354 at epoch: 9
Error: 0.15921652258840452 at epoch: 10
Error: 0.159086145161129 at epoch: 11
Error: 0.15896994430075945 at epoch: 12
Error: 0.15886624469555655 at epoch: 13
Error: 0.158773569021031 at epoch: 14
Error: 0.1586906192351259 at epoch: 15
Error: 0.15861625731445822 at epoch: 16
Error: 0.15854948665521468 at epoch: 17
Error: 0.1584894347982106 at epoch: 18
Error: 0.15843533778055932 at epoch: 19
Error: 0.15838652619554477 at epoch: 20
Error: 0.15834241291330922 at epoch: 21
Error: 0.15830248234620495 at epoch: 22
Error: 0.15826628111155847 at epoch: 23
Error: 0.1582334099359961 at epoch: 24
Error: 0.15820351664