In [5]:
import numpy as np

In [77]:
class MLP:
    def __init__(self, layers):
        
        # Save everything to analyse later
        self.layers = layers
        self.weights = []
        self.bias = []
        self.activations = []
        self.z_list = []
        
        # Initialize parameters with random values
        # Weight Shape = [l, l-1]
        # We do layers[i+1] because layers also has input size in it
        for i in range(len(layers)-1):
            self.weights.append(np.random.random([layers[i+1], layers[i]]))
            self.bias.append(np.random.random([layers[i+1], 1]))
    
    def forward_pass(self, a):
        z_list = []
        activations = [a] # activation also include inputs so we can use it later
        for i in range(len(self.layers)-1):
            z = np.matmul(self.weights[i], a) + self.bias[i] # z(l,1) = W(l,l-1)*a(l-1,1) + b(l,1)
            a = self.sigmoid(z) # a(l,1)
            z_list.append(z)
            activations.append(a)
        self.z_list = z_list
        self.activations = activations
        return activations
    
    def backward_pass(self, target, learning_rate):
        
        # First the output layer calculations
        output = self.activations[-1]
        _, del_loss_out = mlp.cross_entropy_loss(target, output)
        del_l = del_loss_out * self.sigmoid_derivative(self.z_list[-1]) # del_l(1,1)
        
        # For other layers
        for layer in range(len(self.layers)-1,0,-1):
            
            del_w = np.matmul(del_l, self.activations[layer-1].T) # del_w(l,l-1) = del_l(l,1)*a(l-1,1).T = del_l(l,1)*a(1,l-1)
            del_b = del_l # del_b(l,1) = del_l(l,1)
            self.weights[layer-1] -= learning_rate * del_w
            self.bias[layer-1] -= learning_rate * del_b
            
            # del_l for previous layer, del_l(l-1, 1) = weights(l, l-1).T*del_l(l,1) ** derivative(l-1)
            del_l = np.matmul(self.weights[layer-1].T, del_l) * self.sigmoid_derivative(self.z_list[layer-2])
    
    # Print functions
    def get_weights(self):
        for i in range(len(self.weights)):
            print(f'=============Layer {i+1} Weights================')
            print(f'Weights: {self.weights[i]} {self.weights[i].shape}')
            print(f'Bias: {self.bias[i]} {self.bias[i].shape}')
    
    def get_activations(self):
        print(f'=============Input ================')
        print(f'Weights: {self.activations[0]} {self.activations[0].shape}')
        for i in range(1, len(self.activations)):
            print(f'=============Layers {i} activations================')
            print(f'Weights: {self.activations[i]} {self.activations[i].shape}')
            
    def get_z_values(self):
        for i in range(len(self.z_list)):
            print(f'=============Layers {i+1} z values================')
            print(f'Weights: {self.z_list[i]} {self.z_list[i].shape}')
    
    # Utility functions
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
    def cross_entropy_loss(self, target, output):
        loss = -target*np.log(output) -(1-target)*np.log(1-output)
        d_loss_out = (output - target)/(output - output**2)
        return loss, d_loss_out

In [82]:
mlp = MLP([2,3,1])
inputs = np.array([[0,0],[1,0],[0,1],[1,1]])
targets = np.array([0,0,0,1])

for _ in range(1000):
    for inp, target in zip(inputs, targets):
        inp = inp.reshape(-1,1)
        activations = mlp.forward_pass(inp)
        mlp.backward_pass(target, 0.1)

# mlp.get_weights()
# mlp.get_activations()
# mlp.get_z_values()

In [83]:
for inp in inputs:
    inp = inp.reshape(-1,1)
    activations = mlp.forward_pass(inp)
    activations = mlp.forward_pass(inp)
    output = activations[-1]
    print(f'For input {inp.reshape(1,-1)[0]}: {output[0][0]:.2f}')

For input [0 0]: 0.00
For input [1 0]: 0.01
For input [0 1]: 0.01
For input [1 1]: 0.98
