In [12]:
import numpy as np
import matplotlib.pyplot as plt

In [13]:
class MSE:
    def __init__(self):
        pass

    def get_loss(self, y_pred, y_true):
        errors = y_pred - y_true
        return np.mean(errors**2)

    def get_coeff_gradient(self, y_pred, y_true, X):
        errors = y_pred - y_true
        return (2 / len(X)) * np.dot(errors, X)

    def get_bias_gradient(self, y_pred, y_true, X):
        errors = y_pred - y_true
        return (2 / len(X)) * np.sum(errors, axis=0)

In [14]:
class Adam:
    def __init__(
        self,
        param,
        lr=0.01,
        beta_1=0.9,
        beta_2=0.999,
    ):
        self.param = param
        self.beta_m = beta_1
        self.beta_v = beta_2
        self.lr = lr

        # other parameters
        self.momentum = 0
        self.prev_momentum = 0
        self.v = 0
        self.prev_v = 0

    def step(self, gradient, epoch):
        self.prev_momentum = self.momentum
        self.prev_v = self.v

        # calculate the moving average of the momentum and the squared gradients
        self.momentum = (self.beta_m * self.prev_momentum) + (
            1 - self.beta_m
        ) * gradient
        self.v = (self.beta_v * self.prev_v) + (1 - self.beta_v) * (gradient**2)

        # bias correction
        momentum_hat = self.momentum / (1 - self.beta_m ** (epoch + 1))
        v_hat = self.v / (1 - self.beta_v ** (epoch + 1))

        learning_rate = self.lr / (np.sqrt(v_hat) + 1e-8)
        self.param -= learning_rate * momentum_hat

    def get_param(self):
        return self.param



In [15]:
class DenseLayer:
    def __init__(self, verbose=True):
        self.verbose = verbose
        self.weights = None
        self.bias = None

        # add bool to check if the layer has been initialized
        self.initialized = False

    def forward(self, X):
        if not self.initialized:
            raise Exception("Layer not initialized")
        return np.dot(self.weights, X.T) + self.bias

    def initialize(self, input_dim, output_dim=1):
        self.input_dim = input_dim
        self.output_dim = 1

        self.weights = np.random.randn(self.input_dim)
        self.bias = np.random.randn(1)
        if self.verbose:
            print("Weights: ", self.weights, "Shape: ", self.weights.shape)
            print("Bias: ", self.bias, "Shape: ", self.bias.shape)
        
        self.initialized = True

In [16]:
class Model:
    def __init__(self, layer, loss, otimizer):
        self.layer = layer(verbose=False)
        self.loss = loss()
        self.optimizer = otimizer
        self.initialized = False

    def forward(self, X):
        if not self.initialized:
            raise Exception("Model not initialized. Please call the fit method")
        return self.layer.forward(X)

    def backward(self, y_pred, y_true, X):
        if not self.initialized:
            raise Exception("Model not initialized. Please call the fit method")
        # Get the gradient
        coeff_gradient = self.loss.get_coeff_gradient(y_pred, y_true, X)
        bias_gradient = self.loss.get_bias_gradient(y_pred, y_true, X)
        return coeff_gradient, bias_gradient

    def fit(self, X, y, epochs=100):
        # Initialize the layer
        input_dim = X.shape[-1]
        self.layer.initialize(input_dim)

        # initialize the optimizers
        self.optimizers = [
            self.optimizer(param) for param in [*self.layer.weights, self.layer.bias]
        ]
        self.initialized = True

        for epoch in range(epochs):
            for idx, x_i in enumerate(X):

                self.layer.weights = [
                    optimizer.get_param() for optimizer in self.optimizers[:-1]
                ]
                self.layer.bias = self.optimizers[-1].get_param()

                y_i = y[idx]
                # Forward pass
                # [0] IS BECAUSE of bias = [b] the float turns into [float] so we extract float by [float][0]
                y_i_pred = self.forward(x_i)[0]

                loss = self.loss.get_loss(y_i_pred, y[idx])
                print(f"\nEpoch: {epoch}.{idx} | Loss: {loss}")

                # Backward pass
                coeff_gradients, bias_gradient = self.backward(y_i_pred, y_i, x_i)

                # Update the weights
                for idx, optimizer in enumerate(self.optimizers[:-1]):
                    optimizer.step(coeff_gradients[idx], epoch)

                self.optimizers[-1].step(bias_gradient, epoch)

In [None]:
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y = np.array([0.5 * x1 + 1.5 * x2 + 3 for x1, x2 in X])

model = Model(DenseLayer, MSE, Adam)
model.fit(X, y, epochs=100)


Epoch: 0.0 | Loss: 7.963379690707535

Epoch: 0.1 | Loss: 5.508233636045702

Epoch: 0.2 | Loss: 3.2590931017218865

Epoch: 0.3 | Loss: 1.411517304048891

Epoch: 0.4 | Loss: 0.2409884110619407

Epoch: 1.0 | Loss: 6.409892370527004

Epoch: 1.1 | Loss: 3.573567114555549

Epoch: 1.2 | Loss: 1.4184831558821376

Epoch: 1.3 | Loss: 0.18768315905070324

Epoch: 1.4 | Loss: 0.14092918741545174

Epoch: 2.0 | Loss: 5.15207637664007

Epoch: 2.1 | Loss: 2.297517694477029

Epoch: 2.2 | Loss: 0.5177376230225498

Epoch: 2.3 | Loss: 0.013571754423542628

Epoch: 2.4 | Loss: 0.9570414957185491

Epoch: 3.0 | Loss: 4.364239400205943

Epoch: 3.1 | Loss: 1.6160840140648713

Epoch: 3.2 | Loss: 0.1834371262832343

Epoch: 3.3 | Loss: 0.19094305089282873

Epoch: 3.4 | Loss: 1.7157835645923747

Epoch: 4.0 | Loss: 3.945187732538418

Epoch: 4.1 | Loss: 1.303830304815382

Epoch: 4.2 | Loss: 0.08119467194866739

Epoch: 4.3 | Loss: 0.3390324114169334

Epoch: 4.4 | Loss: 2.0951000159211555

Epoch: 5.0 | Loss: 3.75617408