In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class MSE:
    def __init__(self):
        pass

    def get_loss(self, y_pred, y_true):
        errors = y_pred - y_true
        return np.mean(errors**2)

    def get_coeff_gradient(self, y_pred, y_true, X):
        errors = y_pred - y_true
        return (2 / len(X)) * np.dot(errors, X)

    def get_bias_gradient(self, y_pred, y_true, X):
        errors = y_pred - y_true
        return (2 / len(X)) * np.sum(errors, axis=0)

In [3]:
class Adam:
    def __init__(
        self,
        param,
        lr=0.01,
        beta_1=0.9,
        beta_2=0.999,
    ):
        self.param = param
        self.beta_m = beta_1
        self.beta_v = beta_2
        self.lr = lr

        # other parameters
        self.momentum = 0
        self.prev_momentum = 0
        self.v = 0
        self.prev_v = 0

    def step(self, gradient, epoch):
        self.prev_momentum = self.momentum
        self.prev_v = self.v

        # calculate the moving average of the momentum and the squared gradients
        self.momentum = (self.beta_m * self.prev_momentum) + (
            1 - self.beta_m
        ) * gradient
        self.v = (self.beta_v * self.prev_v) + (1 - self.beta_v) * (gradient**2)

        # bias correction
        momentum_hat = self.momentum / (1 - self.beta_m ** (epoch + 1))
        v_hat = self.v / (1 - self.beta_v ** (epoch + 1))

        learning_rate = self.lr / (np.sqrt(v_hat) + 1e-8)
        self.param -= learning_rate * momentum_hat

    def get_param(self):
        return self.param



In [4]:
class DenseLayer:
    def __init__(self, verbose=True):
        self.verbose = verbose
        self.weights = None
        self.bias = None

        # add bool to check if the layer has been initialized
        self.initialized = False

    def forward(self, X):
        if not self.initialized:
            raise Exception("Layer not initialized")
        return np.dot(self.weights, X.T) + self.bias

    def initialize(self, input_dim, output_dim=1):
        self.input_dim = input_dim
        self.output_dim = 1

        self.weights = np.random.randn(self.input_dim)
        self.bias = np.random.randn(1)
        if self.verbose:
            print("Weights: ", self.weights, "Shape: ", self.weights.shape)
            print("Bias: ", self.bias, "Shape: ", self.bias.shape)
        
        self.initialized = True

In [5]:
class Model:
    def __init__(self, layer, loss, otimizer):
        self.layer = layer(verbose=False)
        self.loss = loss()
        self.optimizer = otimizer
        self.initialized = False

    def forward(self, X):
        if not self.initialized:
            raise Exception("Model not initialized. Please call the fit method")
        return self.layer.forward(X)

    def backward(self, y_pred, y_true, X):
        if not self.initialized:
            raise Exception("Model not initialized. Please call the fit method")
        # Get the gradient
        coeff_gradient = self.loss.get_coeff_gradient(y_pred, y_true, X)
        bias_gradient = self.loss.get_bias_gradient(y_pred, y_true, X)
        return coeff_gradient, bias_gradient

    def fit(self, X, y, epochs=100):
        # Initialize the layer
        input_dim = X.shape[-1]
        self.layer.initialize(input_dim)

        # initialize the optimizers
        self.optimizers = [
            self.optimizer(param) for param in [*self.layer.weights, self.layer.bias]
        ]
        self.initialized = True

        for epoch in range(epochs):
            for idx, x_i in enumerate(X):

                self.layer.weights = [
                    optimizer.get_param() for optimizer in self.optimizers[:-1]
                ]
                self.layer.bias = self.optimizers[-1].get_param()

                y_i = y[idx]
                # Forward pass
                # [0] IS BECAUSE of bias = [b] the float turns into [float] so we extract float by [float][0]
                y_i_pred = self.forward(x_i)[0]

                loss = self.loss.get_loss(y_i_pred, y[idx])
                print(f"\nEpoch: {epoch}.{idx} | Loss: {loss}")

                # Backward pass
                coeff_gradients, bias_gradient = self.backward(y_i_pred, y_i, x_i)

                # Update the weights
                for optimizer in self.optimizers[:-1]:
                    optimizer.step(coeff_gradients, epoch)

                self.optimizers[-1].step(bias_gradient, epoch)

In [6]:
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y = np.array([0.5 * x1 + 1.5 * x2 + 3 for x1, x2 in X])

model = Model(DenseLayer, MSE, Adam)
model.fit(X, y, epochs=100)


Epoch: 0.0 | Loss: 25.122801957282302

Epoch: 0.1 | Loss: 64.69233191048336

Epoch: 0.2 | Loss: 101.48389780516958

Epoch: 0.3 | Loss: 144.85553436419826

Epoch: 0.4 | Loss: 193.8395391758756

Epoch: 1.0 | Loss: 32.1291277087665

Epoch: 1.1 | Loss: 57.871981181077594

Epoch: 1.2 | Loss: 90.22501436115556

Epoch: 1.3 | Loss: 128.5280016120291

Epoch: 1.4 | Loss: 172.02443560247963

Epoch: 2.0 | Loss: 29.20247219715777

Epoch: 2.1 | Loss: 52.13107982056416

Epoch: 2.2 | Loss: 80.89901649320863

Epoch: 2.3 | Loss: 115.01683325057843

Epoch: 2.4 | Loss: 153.91788200155085

Epoch: 3.0 | Loss: 26.734683176024312

Epoch: 3.1 | Loss: 47.360217031219356

Epoch: 3.2 | Loss: 73.2143788116336

Epoch: 3.3 | Loss: 103.91084396712614

Epoch: 3.4 | Loss: 139.00048140855125

Epoch: 4.0 | Loss: 24.6706061577018

Epoch: 4.1 | Loss: 43.39076143940077

Epoch: 4.2 | Loss: 66.83463888879184

Epoch: 4.3 | Loss: 94.68677738264314

Epoch: 4.4 | Loss: 126.57830702517076

Epoch: 5.0 | Loss: 22.927141253028186

E

In [7]:
w = np.array([0.5, 1.5])
x = np.array([[1,2], [2,3], [4,5]])
y = np.array([0.5 * x1 + 1.5 * x2 + 3 for x1, x2 in x])
b =np.array([1])

(np.dot(w, x.T) + b)[0], np.dot(w, x[0].T) +b

(np.float64(4.5), array([4.5]))