In [80]:
import numpy as np

In [81]:
class Dense_SingleOutput:
    def __init__(self, input_dim, output_dim=1, verbose=False):

        self.input_dim = input_dim
        self.output_dim = 1

        # 0.1 ~ 1.1 | 0.1 is to avoid 0 weights and biases
        self.bias = np.round(np.random.random() + 0.1, 2)
        self.weights = np.round(np.random.random(input_dim) + 0.1, 2)

        if verbose:
            print(
                f"Dense_SingleOutput layer with input_dim: {self.input_dim} and output_dim: 1"
            )
            print(f"Initial weights: {self.weights}")
            print(f"Initial bias: {self.bias}")

    def forward(self, x):
        return np.dot(self.weights, x.T) + self.bias

In [82]:
# X = np.array([[1, 4], [2, 3], [3, 5]])
# y = np.array([10, 9, 14])

# model = Dense_SingleOutput(input_dim=2, verbose=True)


# print(model.forward(X))

In [83]:
class Adam:
    def __init__(self, learning_rate=0.01, beta_1=0.85, beta_2=0.99):
        self.lr = learning_rate
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        
        self.m = 0 
        self.v = 0
        
        self.m_prev = 0
        self.v_prev = 0
        
    def adam(self, gradients:list, epoch):
        self.prev_m = self.m
        self.prev_v = self.v
        
        self.m = self.beta_1 * self.prev_m + (1 - self.beta_1) * gradients
        self.v = self.beta_2 * self.prev_v + (1 - self.beta_2) * (gradients**2)
        
        m_hat = self.m / (1 - self.beta_1 ** (epoch+1))
        v_hat = self.v / (1 - self.beta_2 ** (epoch+1))
        
        
        learning_rate = self.lr / (np.sqrt(v_hat) + 1e-8)
        return learning_rate * m_hat      
        
        
    def step(self, parameters:list, gradients:list, epoch):
        parameters = np.array(parameters).flatten()
        gradients = np.array(gradients).flatten()
        
        new_parameters = []
        for param, gradient in zip(parameters, gradients):
            update = self.adam(gradient, epoch)
            param -= update
            new_parameters.append(param)
        return np.array(new_parameters)
        
        

In [84]:
class MSELoss:
    def __init__(self):
        pass

    def calculate_loss(self, y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)

    def calculate_gradient_coeff(self, y_pred, y_true, X):
        errors = y_pred - y_true
        return (2 / len(y_true)) * np.dot(errors, X)

    def calculate_gradient_bias(self, y_pred, y_true):
        errors = y_pred - y_true
        return (2 / len(y_true)) * np.sum(errors)

In [85]:
class Model:
    def __init__(self, layer):
        self.layer = layer  # not initialized
        self.initiated = False
        self.compiled = False

    def compile(self, loss, optimizer):
        self.loss = loss
        self.optimizer = optimizer
        self.compiled = True

    def _forward(self, x):
        if not self.initiated:
            raise Exception("Model not initiated - call `fit()` method first")

        return self.layer.forward(x)

    def _backward(self, y_pred, y_real, X):
        if not self.initiated:
            raise Exception("Model not initiated - call `fit()` method first")

        loss = self.loss.calculate_loss(y_pred, y_real)
        gradient_coeff = self.loss.calculate_gradient_coeff(y_pred, y_real, X)
        gradient_bias = self.loss.calculate_gradient_bias(y_pred, y_real)

        return loss, gradient_coeff, gradient_bias

    def fit(self, X, y, epochs=100):
        if not self.compiled:
            raise Exception("Model not compiled - call `compile()` method first")
        input_dim = X.shape[-1]
        self.layer = self.layer(input_dim=input_dim, verbose=True)
        self.initiated = True

        ### ------------------- ###

        for epoch in range(epochs):
            # ---Forward pass---------
            y_pred = self._forward(X)

            # ---------Backward pass---------
            loss, gradient_coeff, gradient_bias = self._backward(y_pred, y, X)

            # Update weights and biases
            self.layer.weights = self.optimizer.step(
                self.layer.weights, gradient_coeff, epoch
            )
            self.layer.bias = self.optimizer.step(
                [self.layer.bias], [gradient_bias], epoch
            )

            print(f"Epoch: {epoch} | Loss: {loss:.4f}")


model = Model(layer=Dense_SingleOutput)
model.compile(loss=MSELoss(), optimizer=Adam(learning_rate=0.01))

X_train = np.array([[1, 2], [3, 4], [5, 6]])
y_train = np.array([4, 8, 12])

model.fit(X_train, y_train, epochs=100)

Dense_SingleOutput layer with input_dim: 2 and output_dim: 1
Initial weights: [0.71 0.42]
Initial bias: 0.74
Epoch: 0 | Loss: 13.9209
Epoch: 1 | Loss: 13.1665
Epoch: 2 | Loss: 12.4514
Epoch: 3 | Loss: 11.8172
Epoch: 4 | Loss: 11.2551
Epoch: 5 | Loss: 10.7524
Epoch: 6 | Loss: 10.2979
Epoch: 7 | Loss: 9.8825
Epoch: 8 | Loss: 9.4988
Epoch: 9 | Loss: 9.1413
Epoch: 10 | Loss: 8.8056
Epoch: 11 | Loss: 8.4882
Epoch: 12 | Loss: 8.1863
Epoch: 13 | Loss: 7.8979
Epoch: 14 | Loss: 7.6213
Epoch: 15 | Loss: 7.3550
Epoch: 16 | Loss: 7.0980
Epoch: 17 | Loss: 6.8494
Epoch: 18 | Loss: 6.6085
Epoch: 19 | Loss: 6.3747
Epoch: 20 | Loss: 6.1476
Epoch: 21 | Loss: 5.9268
Epoch: 22 | Loss: 5.7119
Epoch: 23 | Loss: 5.5028
Epoch: 24 | Loss: 5.2991
Epoch: 25 | Loss: 5.1007
Epoch: 26 | Loss: 4.9075
Epoch: 27 | Loss: 4.7192
Epoch: 28 | Loss: 4.5359
Epoch: 29 | Loss: 4.3574
Epoch: 30 | Loss: 4.1835
Epoch: 31 | Loss: 4.0143
Epoch: 32 | Loss: 3.8497
Epoch: 33 | Loss: 3.6896
Epoch: 34 | Loss: 3.5339
Epoch: 35 | Loss: 3