This version is not dynamic at all. It was created purely for testing and learning purposes.
### NO ACTIVATION - so it is just a linear transformation

In [None]:
import numpy as np
from utils import MSE, Adam

np.random.seed(42)

In [19]:
class Dense:
    def __init__(self, units: int):
        self.units: int = units
        self.w = None
        self.b = None
        self.initialized = False

    def initialize_params(self, input_dim):
        self.w = np.random.rand(self.units, input_dim)
        self.b = np.random.rand(self.units)
        self.initialized = True

    def forward(self, X):
        if not self.initialized:
            raise ValueError("Initiate params first")
        X = np.asarray(X)
        return np.dot(X, self.w.T) + self.b


class Input:
    def __init__(self, units):
        self.units = units

In [20]:
# Test the dense layer
input_dim = 2
X = [[1, 2], [2, 3], [3, 4]]
dense_1 = Dense(4)  # Output dim is 4
dense_1.initialize_params(input_dim=input_dim)


print("Shape Check:", "+" if dense_1.w.shape == (dense_1.units, input_dim) and dense_1.b.shape == (dense_1.units,) else "-")
print("Output dim Check:", "+" if dense_1.forward(X).shape == (len(X), dense_1.units) else "-")



del input_dim, dense_1, X

Shape Check: +
Output dim Check: +


In [21]:

class Sequential:
    def __init__(self, layers: list, verbose=False):
        self.layers = layers

        self.verbose = verbose
        self.cost_function = None
        self.optimizer = None
        self.lr = 0.01

        self._validate_input()

        self.is_compiled = False

    def __str__(self):
        output = "Sequential Model:\n\n"
        for idx, layer in enumerate(self.layers):
            output += f"[{idx}] {layer.__class__.__name__} ({layer.units})\n"

        return output

    def _validate_input(self):
        if not isinstance(self.layers[0], Input):
            raise TypeError(
                f"The first layer must be an object of class `Input`, got object of class `{self.layers[0].__class__.__name__}`"
            )

    def compile(self, cost_function, optimizer):

        self.cost_function = cost_function
        self.optimizer = optimizer
        if self.verbose:
            print(self)
            print(f"Loss: {self.cost_function.__class__.__name__}")
            print(f"Optimizer: {self.optimizer.__class__.__name__}\n")

        self.is_compiled = True

    def initialize_layers(self):
        i = 1  # start from first layer (excluding Input)
        while i < len(self.layers):

            layer = self.layers[i]

            input_dim = self.layers[i - 1].units
            layer.initialize_params(input_dim)
            print(
                f"{layer.__class__.__name__} ({layer.units}) | w: {layer.w.shape} | b: {layer.b.shape}"
            )
            i += 1

    def fit(self, X, y, epochs=1):
        if not self.is_compiled:
            raise ValueError("Call `compile()` method first.")

        self.initialize_layers()
        for epoch in range(epochs):
            print(f"\nEpoch: {epoch}")

            # Forward pass (full_batch)
            y_pred = X
            layer_outputs = [None]  # to pad for Input layer
            for layer in self.layers[1:]:  # eclude Input layer
                y_pred = layer.forward(y_pred)
                layer_outputs.append(y_pred)
            print(f"Y pred: {y_pred.shape}")  # [500, 2]


            w1, w2, w3 = [layer.w for layer in self.layers[1:]]
            a1, a2, a3 = [out for out in layer_outputs[1:]]
            n_samples = len(X)
            errors = y_pred - y # (500, 2)
            
            ### third layer
            # g3 = dL/dw2
            g3 = 2/n_samples * np.dot(errors.T, a2) # (2, 5)
            bg3 = 2/n_samples * np.sum(errors, axis=0) # (2,)
            
            
            ### second layer 
            # g2 = dL/dw2
            # dL/dw2 = dL/da2 @ da2/dw2
            # dL/da2 = dL/da3 @ da3/da2
            
            # dL/da2
            dLoss_da3 = 2/n_samples * errors # (500, 2)
            da3_da2 = w3 #(2, 5)
            dLoss_da2 = dLoss_da3 @ da3_da2 # (500, 5)
            
            # da2/dw2
            da2_dw2 = a1 # (500, 4)
            
            g2 = dLoss_da2.T @ da2_dw2 # (5, 4)
            bg2 = (2 / n_samples) * np.sum(dLoss_da2, axis=0)  # (5,)

            
            ### first layer
            # g1 = dL/dw1
            # dL/dw1 = dL/da1 @ da1/dw1
            # dL/da1 = dL/da2 @ da2/da1
            
            # dL/da1
            da2_da1 = w2 # (5, 4)
            dLoss_da1 = dLoss_da2 @ da2_da1 # (500, 4)
            
            # da1/dw1
            da1_dw1 = X # (500, 2)
            
            g1 = dLoss_da1.T @ da1_dw1 # (4, 2)
            bg1 = (2 / n_samples) * np.sum(dLoss_da1, axis=0)  # (4,)
        
            self.layers[1].w -= g1 * self.lr
            self.layers[1].b -= bg1 * self.lr

            self.layers[2].w -= g2 * self.lr
            self.layers[2].b -= bg2 * self.lr

            self.layers[3].w -= g3 * self.lr
            self.layers[3].b -= bg3 * self.lr
            
            loss = np.mean((y_pred - y)**2)
            print(loss)

    def predict(self, X):
        y_pred = X
        for layer in self.layers[1:]:  # eclude Input layer
            y_pred = layer.forward(y_pred)
        return y_pred
        
            
            
        


In [22]:
# X got 2 features [n, 2]
# Y got 2 features [n, 2]

n_samples = 500
X_data = np.random.normal(0, 1, size=(n_samples, 2))
y_data = np.array([[x1 * 0.3 + 4, x2 * 1.9 - 8] for x1, x2 in X_data])

print(f"X shape: {X_data.shape}\nY shape: {y_data.shape}")

X shape: (500, 2)
Y shape: (500, 2)


In [23]:
model = Sequential([Input(2), 
                    Dense(4), 
                    Dense(5), 
                    Dense(2)])
print(model)

Sequential Model:

[0] Input (2)
[1] Dense (4)
[2] Dense (5)
[3] Dense (2)



In [24]:
model.compile(cost_function=MSE(), optimizer=Adam(0.01))
model.fit(X_data, y_data, epochs=1000)

Dense (4) | w: (4, 2) | b: (4,)
Dense (5) | w: (5, 4) | b: (5,)
Dense (2) | w: (2, 5) | b: (2,)

Epoch: 0
Y pred: (500, 2)
99.15637054899187

Epoch: 1
Y pred: (500, 2)
42.5510062324608

Epoch: 2
Y pred: (500, 2)
33.132259481854895

Epoch: 3
Y pred: (500, 2)
27.331096770303468

Epoch: 4
Y pred: (500, 2)
23.140559720210664

Epoch: 5
Y pred: (500, 2)
19.60095544489116

Epoch: 6
Y pred: (500, 2)
16.267270893910034

Epoch: 7
Y pred: (500, 2)
12.988651145075615

Epoch: 8
Y pred: (500, 2)
9.780055890659165

Epoch: 9
Y pred: (500, 2)
6.776001082751717

Epoch: 10
Y pred: (500, 2)
4.20259590495544

Epoch: 11
Y pred: (500, 2)
2.2895916305685557

Epoch: 12
Y pred: (500, 2)
1.1162496216304776

Epoch: 13
Y pred: (500, 2)
0.534902944165106

Epoch: 14
Y pred: (500, 2)
0.29201321625967036

Epoch: 15
Y pred: (500, 2)
0.19594594619928743

Epoch: 16
Y pred: (500, 2)
0.15513276454908975

Epoch: 17
Y pred: (500, 2)
0.1353706683245967

Epoch: 18
Y pred: (500, 2)
0.1243744558446086

Epoch: 19
Y pred: (500, 2)

In [27]:
model.predict(X_data[:2])

array([[ 3.85915768, -6.96913592],
       [ 3.86097469, -8.88488653]])

In [28]:
y_data[:2]

array([[ 3.85915768, -6.96913592],
       [ 3.86097469, -8.88488653]])