In [1]:
from torchic import Sigmoid, TanH, ReLU, LeakyReLU, Linear, Softmax
from torchic import CrossEntropyLoss
from torchic import SGD, Adam
from torchic import Activation, Cost, Optimizer
from torchic import Layer
from torchic import Dataloader

from utils.datasets import Datasets
from utils.visualization import (
    plot_train_val_curve, 
    plot_activations_histogram,
    plot_gradients_histogram,
    plot_weights_histogram
)

import numpy as np

In [2]:
class MLP:
    def __init__(self):
        self.layers = [
            Layer(input_size=784, output_size=64, activation=ReLU(), initialization='normal'),
            Layer(input_size=64, output_size=64, activation=ReLU(), initialization='normal'),
            Layer(input_size=64, output_size=10, activation=Softmax(), initialization='normal')
        ]
        self.criterion = CrossEntropyLoss()
        self.optimizer = Adam(parameters=self.layers, lr=1e-3, weight_decay=1e-5)
        
    def forward(self, X):
        z = X
        for layer in self.layers:
            z = layer(z)
        return z
    
    def backward(self):
        dJ_dA = self.criterion.backward()
        for layer in reversed(self.layers):
            dJ_dA = layer.backward(dJ_dA)
        
    def train(self, dataloader: Dataloader):
        epoch_losses = []
        for batch, (X, y_true) in dataloader:
            self.optimizer.zero_grad()
            y_pred = self.forward(X)
            loss = self.criterion(y_pred, y_true)
            epoch_losses.append(loss)
            self.backward()
            self.optimizer.step()
            if batch % 100 == 0:
                batch_size = X.shape[0]
                total_samples = dataloader.num_samples
                current = (batch + 1) * batch_size
                print(f'Loss: {loss:>7f}, [{current:>5d}/{total_samples}]')
        avg_loss = np.mean(epoch_losses) 
        return avg_loss
    
    def val(self, dataloader: Dataloader):
        epoch_losses = []
        correct = 0
        total = 0
        
        for _, (X, y_true) in dataloader:
            y_pred = self.forward(X)
            epoch_losses.append(self.criterion(y_pred, y_true))
            y_pred_labels = y_pred.argmax(axis=1)
            correct += np.sum(y_pred_labels == y_true)
            total += X.shape[0]
        avg_loss = np.mean(epoch_losses)
        accuracy = correct / total
        print(f'Validation Error: \n Accuracy: {100*accuracy:>0.1f}, Avg loss: {avg_loss:>0.8f}\n')
        
        return avg_loss
    
    def test(self, dataloader: Dataloader):
        epoch_losses = []
        correct = 0
        total = 0
        
        for _, (X, y_true) in dataloader:
            y_pred = self.forward(X)
            epoch_losses.append(self.criterion(y_pred, y_true))
            y_pred_labels = y_pred.argmax(axis=1)
            correct += np.sum(y_pred_labels == y_true)
            total += X.shape[0]
        avg_loss = np.mean(epoch_losses)
        accuracy = correct / total
        print(f'Test Error: \n Accuracy: {100*accuracy:>0.1f}, Avg loss: {avg_loss:>0.8f}\n')
        
    def fit(self, train_dataloader: Dataloader, val_dataloader: Dataloader, test_dataloader: Dataloader, epochs: int):
        train_loss_per_epoch = []
        val_loss_per_epoch = []
        
        print('Training MLP...\n')
        for epoch in range(epochs):
            print(f'Epoch: {epoch+1}')
            train_loss_per_epoch.append(self.train(train_dataloader))
            val_loss_per_epoch.append(self.val(val_dataloader))
            plot_activations_histogram(self.layers)
            plot_gradients_histogram(self.layers)
        plot_train_val_curve(train_loss_per_epoch, val_loss_per_epoch)
        self.test(test_dataloader)
        
    def get_topology(self) -> str:
        architecture = ''
        for i, layer in enumerate(self.layers):
            architecture += (
                f'Layer: {i+1}\n'
                f'    Input: {layer.input_size} | Output: {layer.output_size}\n'
                f'    Theta.shape: {layer.theta.shape} | Bias.shape: {layer.bias.shape}\n'
                f'    Activation: {layer.activation}\n\n'
            )
        architecture += f'Loss function: {self.criterion} | Optimizer: {self.optimizer}'
        return architecture   

    def save_model(self, file_path: str):
        parameters_to_save = {}
        for i, layer in enumerate(self.layers):
            parameters_to_save[f'layer_{i}_theta'] = layer.theta
            parameters_to_save[f'layer_{i}_bias'] = layer.bias
        np.savez(file_path, **parameters_to_save)
        print('Model saved.')
        
    def load_model(self, file_path):
        parameters = np.load(file_path)
        for i, layer in enumerate(self.layers):
            layer.theta = parameters[f'layer_{i}_theta']
            layer.bias = parameters[f'layer_{i}_bias']
        print(f"Model weights loaded from {file_path}")
    
    def __str__(self) -> str:
        return self.get_topology()

In [3]:
datasets = Datasets()
batch_size = 32

X_train, X_val, X_test, y_train, y_val, y_test = datasets('mnist')

train_dataloader = Dataloader(X_train, y_train, batch_size=batch_size)
val_dataloader = Dataloader(X_val, y_val, batch_size=batch_size)
test_dataloader = Dataloader(X_test, y_test, batch_size=batch_size)

In [4]:
mlp = MLP()
print(mlp)

Layer: 1
    Input: 784 | Output: 64
    Theta.shape: (64, 784) | Bias.shape: (1, 64)
    Activation: ReLU

Layer: 2
    Input: 64 | Output: 64
    Theta.shape: (64, 64) | Bias.shape: (1, 64)
    Activation: ReLU

Layer: 3
    Input: 64 | Output: 10
    Theta.shape: (10, 64) | Bias.shape: (1, 10)
    Activation: Softmax

Loss function: Cross Entropy | Optimizer: Adam(learning_rate=0.001, beta=0.9, gamma=0.999, epsilon=1e-15, weight_decay=1e-05)


In [5]:
plot_weights_histogram(mlp.layers)

In [6]:
mlp.fit(
    train_dataloader=train_dataloader, 
    val_dataloader=val_dataloader,
    test_dataloader=test_dataloader,
    epochs=5
)

Training MLP...

Epoch: 1
Loss: 2.385992, [   32/45000]
Loss: 0.366114, [ 3232/45000]
Loss: 0.244146, [ 6432/45000]
Loss: 0.475439, [ 9632/45000]
Loss: 0.341249, [12832/45000]
Loss: 0.226139, [16032/45000]
Loss: 0.296850, [19232/45000]
Loss: 0.161109, [22432/45000]
Loss: 0.176029, [25632/45000]
Loss: 0.144457, [28832/45000]
Loss: 0.075032, [32032/45000]
Loss: 0.309227, [35232/45000]
Loss: 0.415420, [38432/45000]
Loss: 0.229582, [41632/45000]
Loss: 0.104528, [44832/45000]
Validation Error: 
 Accuracy: 94.6, Avg loss: 0.17464248



Epoch: 2
Loss: 0.106334, [   32/45000]
Loss: 0.177440, [ 3232/45000]
Loss: 0.075468, [ 6432/45000]
Loss: 0.214905, [ 9632/45000]
Loss: 0.278401, [12832/45000]
Loss: 0.161202, [16032/45000]
Loss: 0.178009, [19232/45000]
Loss: 0.052669, [22432/45000]
Loss: 0.077425, [25632/45000]
Loss: 0.092284, [28832/45000]
Loss: 0.062753, [32032/45000]
Loss: 0.204276, [35232/45000]
Loss: 0.396114, [38432/45000]
Loss: 0.134300, [41632/45000]
Loss: 0.048792, [44832/45000]
Validation Error: 
 Accuracy: 95.7, Avg loss: 0.14169698



Epoch: 3
Loss: 0.066074, [   32/45000]
Loss: 0.103745, [ 3232/45000]
Loss: 0.046408, [ 6432/45000]
Loss: 0.080323, [ 9632/45000]
Loss: 0.304225, [12832/45000]
Loss: 0.105228, [16032/45000]
Loss: 0.121834, [19232/45000]
Loss: 0.036498, [22432/45000]
Loss: 0.039466, [25632/45000]
Loss: 0.052905, [28832/45000]
Loss: 0.052930, [32032/45000]
Loss: 0.150679, [35232/45000]
Loss: 0.343425, [38432/45000]
Loss: 0.102265, [41632/45000]
Loss: 0.084914, [44832/45000]
Validation Error: 
 Accuracy: 96.2, Avg loss: 0.12668369



Epoch: 4
Loss: 0.041368, [   32/45000]
Loss: 0.059615, [ 3232/45000]
Loss: 0.023017, [ 6432/45000]
Loss: 0.044279, [ 9632/45000]
Loss: 0.323723, [12832/45000]
Loss: 0.057305, [16032/45000]
Loss: 0.124257, [19232/45000]
Loss: 0.049210, [22432/45000]
Loss: 0.027255, [25632/45000]
Loss: 0.022095, [28832/45000]
Loss: 0.025189, [32032/45000]
Loss: 0.085929, [35232/45000]
Loss: 0.300708, [38432/45000]
Loss: 0.048289, [41632/45000]
Loss: 0.062764, [44832/45000]
Validation Error: 
 Accuracy: 96.5, Avg loss: 0.11707556



Epoch: 5
Loss: 0.081336, [   32/45000]
Loss: 0.018002, [ 3232/45000]
Loss: 0.023812, [ 6432/45000]
Loss: 0.041516, [ 9632/45000]
Loss: 0.259808, [12832/45000]
Loss: 0.039799, [16032/45000]
Loss: 0.113941, [19232/45000]
Loss: 0.051037, [22432/45000]
Loss: 0.011266, [25632/45000]
Loss: 0.010633, [28832/45000]
Loss: 0.020894, [32032/45000]
Loss: 0.038663, [35232/45000]
Loss: 0.233787, [38432/45000]
Loss: 0.050680, [41632/45000]
Loss: 0.021515, [44832/45000]
Validation Error: 
 Accuracy: 96.7, Avg loss: 0.11852698



Test Error: 
 Accuracy: 96.6, Avg loss: 0.11361117



In [7]:
file_path = './models/mnist_model.npz'
mlp.save_model(file_path)

Model saved.


In [8]:
mlp_2 = MLP()
mlp_2.test(test_dataloader)

Test Error: 
 Accuracy: 11.4, Avg loss: 2.34974642



In [9]:
mlp_2.load_model(file_path)
mlp_2.test(test_dataloader)

Model weights loaded from ./models/mnist_model.npz
Test Error: 
 Accuracy: 96.6, Avg loss: 0.11361117

