In [2]:
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, List

import numpy as np

In [3]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_derivative(z: float) -> float:
    return z * (1 - z)

In [4]:
@dataclass
class Layer:
    weights: np.array
    bias: np.array
    outputs: np.array
    deltas: np.array

In [6]:
@dataclass
class Network:
    layers: List[Layer]
    learning_rate: Optional[int] = 0.5

    @property
    def length(self) -> int:
        return len(self.layers)
    
    @property
    def outputs(self) -> np.array:
        return self.layers[-1].outputs
    
    @staticmethod
    def create(layers: List[int]) -> Network:
        layers = [
            Layer(
                # layers[i] is the number of neurons in layer i (row), layers[i - 1] is the number of weights, matching with number of neurons in layer i - 1 (column)
                weights=np.random.rand(layers[i], layers[i - 1]),
                bias=np.random.rand(layers[i]),
                outputs=np.zeros(layers[i]),
                deltas=np.zeros(layers[i]),
            )
            for i in range(1, len(layers))
        ]
    
        return Network(layers=layers)
    
def feed_forward(self, inputs: np.array) -> np.array:
        for layer in self.layers:
            # layer.outputs is a (3,1) - dimension we expect
            # layer.weights is a (3,2), inputs is a (2,1) - multiply to get (3,1)
            layer.outputs = sigmoid(layer.weights @ inputs + layer.bias)  # == np.matmul, https://stackoverflow.com/a/34142617
            inputs = layer.outputs
        return self.layers[-1].outputs

def back_propagate(self, inputs: np.array, expected: np.array) -> None:
    for idx in reversed(range(self.length)):
        layer = self.layers[idx]
        if idx == len(self.layers) - 1:  # if last layer (output layer)
            layer.deltas = (layer.outputs - expected) * sigmoid_derivative(
                layer.outputs
            )
        else:
            next_layer = self.layers[idx + 1]
            # layer.deltas is a (3,1) - the dimension we expect
            # next_layer.weights is a (2,3), next_layer.deltas is a (2,1)
            # need to transpose next_layer.weights to get (3,2) then multiply by next_layer.deltas (2,1) to get (3,1)
            layer.deltas = (
                next_layer.weights.T @ next_layer.deltas
                * sigmoid_derivative(layer.outputs)
            ) * sigmoid_derivative(layer.outputs)

    self.update_weights(inputs)

def update_weights(self, inputs: np.array) -> None:
    for idx in range(self.length):
        layer = self.layers[idx]
        previous_layer_outputs = self.layers[idx - 1].outputs if idx > 0 else inputs
        # deltas (3,) -> deltas[np.newaxis] (1, 3) -> .T (3, 1)
        # previous_layer_outputs (2,) -> previous_layer_outputs[np.newaxis] (1, 2)
        # (3,1) @ (1,2) = (3,2) for weights
        layer.weights -= (
            layer.deltas[np.newaxis].T
            @ previous_layer_outputs[np.newaxis]
            * self.learning_rate
        )
        layer.bias -= layer.deltas * self.learning_rate

def train(self, inputs: np.array, expected: np.array, epochs: int) -> None:
    for epoch in range(epochs):
        sum_error = 0.0
        for idx, row in enumerate(inputs):
            actual = self.feed_forward(row)
            self.back_propagate(row, expected[idx])
            sum_error += self.mse(actual, expected[idx])
        print(f"Mean squared error: {sum_error}")
        print(f"epoch={epoch}")

def mse(self, actual: np.array, expected: np.array) -> float:
    return np.power(actual - expected, 2).mean()

def predict(self, inputs: np.array) -> int:
    outputs = self.feed_forward(inputs)
    return np.where(outputs == outputs.max())[0][0]