In [42]:
import numpy as np
from pprint import pprint

np.random.seed(42)
np.set_printoptions(suppress=True)

In [43]:
activation_functions = {
    "sigmoid": lambda x: 1 / (1 + np.exp(-x)),
    "relu": lambda x: np.maximum(0, x),
    "softmax": lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True),
}

activation_derivatives = {
    "sigmoid": lambda x: activation_functions["sigmoid"](x)
    * (1 - activation_functions["sigmoid"](x)),
    "relu": lambda x: np.where(x > 0, 1, 0),
    "softmax": lambda x: activation_functions["softmax"](x)
    * (1 - activation_functions["softmax"](x)),
}

In [44]:
class Layer():
    def __init__(self, input_s: int, output_s: int, activation: str):
        self.weights = np.random.randn(input_s, output_s) * np.sqrt(1. / input_s)
        self.biases = np.zeros((1, output_s))
        self.activation = activation

    def forward(self, x):
        x = x.reshape(1, self.weights.shape[0])
        self.input = x
        self.linear_output = np.dot(x, self.weights) + self.biases
        self.layer_output = activation_functions[self.activation](self.linear_output)
        return self.layer_output

    def backward(self, dA):
        activation_derivative = activation_derivatives[self.activation]
        dZ = dA * activation_derivative(self.layer_output)
        dW = np.dot(self.input.T, dZ) / self.input.shape[0]
        db = np.sum(dZ, axis=0, keepdims=True) / self.input.shape[0]
        dA_prev = np.dot(dZ, self.weights.T)

        self.dW = dW
        self.db = db

        return dA_prev

    def update(self, learning_rate):
        self.weights -= learning_rate * self.dW
        self.biases -= learning_rate * self.db

In [45]:
class NN():
    def __init__(self, layers: list, lr: float = 0.01):
        self.layers = layers
        self.learning_rate = lr

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, dA):
        for layer in reversed(self.layers):
            dA = layer.backward(dA)

    def update(self):
        for layer in self.layers:
            layer.update(self.learning_rate)

    def __call__(self, x):
        return self.forward(x)
    
    def train(self, X, epochs):
        for epoch in range(epochs):
            loss = []
            for x in X:
                y_hat = self.forward(x)
                error = y_hat - x
                loss.append(np.sum((error) ** 2))
                
                self.backward(error)
                self.update()
                
            loss = np.mean(loss)

            if epoch % (epochs / 10) == 0:
                print(f"Epoch {epoch} - Loss: {loss}")


In [46]:
# Data
X = np.eye(8)

hidden_layer_size = 3

layers = [
    Layer(8, hidden_layer_size, "sigmoid"),
    Layer(hidden_layer_size, 8, "sigmoid"),
]

nn = NN(layers = layers, lr = 0.2)
nn.train(X, 10000)
# nn(X[0])

Epoch 0 - Loss: 1.716961070071439
Epoch 1000 - Loss: 0.14427103033022026
Epoch 2000 - Loss: 0.09796534340643209
Epoch 3000 - Loss: 0.07302802801901134
Epoch 4000 - Loss: 0.05310799243427378
Epoch 5000 - Loss: 0.03820422270295609
Epoch 6000 - Loss: 0.027820842281425832
Epoch 7000 - Loss: 0.020709993425841563
Epoch 8000 - Loss: 0.015855199935970777
Epoch 9000 - Loss: 0.012449082191792782


In [47]:
for i in range(8):
    pprint(np.round(nn(X[i])[0]))   

array([1., 0., 0., 0., 0., 0., 0., 0.])
array([0., 1., 0., 0., 0., 0., 0., 0.])
array([0., 0., 1., 0., 0., 0., 0., 0.])
array([0., 0., 0., 1., 0., 0., 0., 0.])
array([0., 0., 0., 0., 1., 0., 0., 0.])
array([0., 0., 0., 0., 0., 1., 0., 0.])
array([0., 0., 0., 0., 0., 0., 1., 0.])
array([0., 0., 0., 0., 0., 0., 0., 1.])


In [48]:
i = 3
pprint(X[i])
pprint(np.round(nn(X[i])[0], 2))

array([0., 0., 0., 1., 0., 0., 0., 0.])
array([0.  , 0.02, 0.  , 0.95, 0.01, 0.01, 0.  , 0.02])
