In [7]:
import sys
principal_path = '../'
if principal_path not in sys.path:
    sys.path.append('../')

In [18]:
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
import torchmetrics
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from pytorch_lightning.loggers import WandbLogger
from lightning.pytorch.loggers import CSVLogger

from models.MNISTModel import MNISTModel
from models.MNISTModelWithBottelNeck import MNISTModelWithBottelNeck

In [9]:
SEED = 2024
pl.seed_everything(SEED)

Seed set to 2024


2024

In [10]:
LEARING_RATE = 1e-3
BATCH_SIZE = 64
EPOCHS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {DEVICE}')

Using cuda


## Logger

In [23]:
def get_number_of_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    unit_scale = {
        '': 1,
        'K': 10 ** 3,
        'M': 10 ** 6,
        'B': 10 ** 9
    }
    for unit, scale in sorted(unit_scale.items(), key=lambda x: x[1], reverse=True):
        if num_params >= scale:
            return f'{round(num_params / scale, 1)}{unit}'
    return str(num_params)

def log_csv(model, model_name, folder = 'lightning_logs'):
    number_of_parameters = get_number_of_parameters(model)
    return CSVLogger(
        save_dir=folder,
        name=f"{model_name}_{number_of_parameters}",
    )

## Download Dataset

In [11]:
def load_data(batch_size=BATCH_SIZE, num_workers=4):
    # Transformaciones para los datos
    transform = transforms.ToTensor()

    # Carga de datos de entrenamiento
    mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
    
    # División entre entrenamiento y validación
    train_size = int(0.8 * len(mnist_train))
    val_size = len(mnist_train) - train_size
    mnist_train, mnist_val = random_split(mnist_train, [train_size, val_size])

    # DataLoader para entrenamiento y validación
    train_loader = DataLoader(mnist_train, batch_size=batch_size, num_workers=num_workers, shuffle=True, persistent_workers=True)
    val_loader = DataLoader(mnist_val, batch_size=batch_size, num_workers=num_workers, shuffle=False, persistent_workers=True)

    # Carga de datos de test
    mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
    test_loader = DataLoader(mnist_test, batch_size=batch_size, num_workers=num_workers, persistent_workers=True)

    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data()

## Training

### MNISTmodel

In [20]:
model = MNISTModel(lr=LEARING_RATE)

In [24]:
trainer = pl.Trainer(max_epochs=EPOCHS, logger=log_csv(model, 'MLP'))
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                | Params
--------------------------------------------------
0 | train_acc | MulticlassAccuracy  | 0     
1 | val_acc   | MulticlassAccuracy  | 0     
2 | test_acc  | MulticlassAccuracy  | 0     
3 | precision | MulticlassPrecision | 0     
4 | recall    | MulticlassRecall    | 0     
5 | f1        | MulticlassF1Score   | 0     
6 | layer_1   | Linear              | 100 K 
7 | layer_2   | Linear              | 33.0 K
8 | layer_3   | Linear              | 2.6 K 
--------------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Epoch 4: 100%|██████████| 750/750 [00:03<00:00, 192.21it/s, v_num=0, train_loss=0.0845, train_acc=0.969, val_loss=0.0918, val_acc=0.975]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 750/750 [00:03<00:00, 191.91it/s, v_num=0, train_loss=0.0845, train_acc=0.969, val_loss=0.0918, val_acc=0.975]


In [25]:
trainer.test(model, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:00<00:00, 189.15it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
           f1               0.9676509499549866
        precision           0.9702486395835876
         recall             0.9705873727798462
        test_acc            0.9722999930381775
        test_loss           0.08604538440704346
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.08604538440704346,
  'test_acc': 0.9722999930381775,
  'precision': 0.9702486395835876,
  'recall': 0.9705873727798462,
  'f1': 0.9676509499549866}]

### Bottleneck

In [26]:
bottel_neck_model = MNISTModelWithBottelNeck(lr=LEARING_RATE)

In [28]:
trainer = pl.Trainer(max_epochs=EPOCHS, logger=log_csv(bottel_neck_model, 'MLP_BottelNeck'))
trainer.fit(bottel_neck_model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                | Params
--------------------------------------------------
0 | train_acc | MulticlassAccuracy  | 0     
1 | val_acc   | MulticlassAccuracy  | 0     
2 | test_acc  | MulticlassAccuracy  | 0     
3 | precision | MulticlassPrecision | 0     
4 | recall    | MulticlassRecall    | 0     
5 | f1        | MulticlassF1Score   | 0     
6 | layer_1   | Linear              | 200 K 
7 | layer_2   | Linear              | 32.9 K
8 | layer_3   | Linear              | 33.0 K
9 | layer_4   | Linear              | 2.6 K 
--------------------------------------------------
269 K     Trainable params
0         Non-trainable params
269 K     Total params
1.078     Total estimated model params size (MB)


Epoch 4: 100%|██████████| 750/750 [00:03<00:00, 227.10it/s, v_num=0, train_loss=0.295, train_acc=0.953, val_loss=0.124, val_acc=0.965]  

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 750/750 [00:03<00:00, 226.42it/s, v_num=0, train_loss=0.295, train_acc=0.953, val_loss=0.124, val_acc=0.965]


In [29]:
trainer.test(bottel_neck_model, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:00<00:00, 210.93it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
           f1               0.9616830348968506
        precision           0.9655238389968872
         recall             0.9645422697067261
        test_acc             0.965499997138977
        test_loss           0.11361905187368393
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.11361905187368393,
  'test_acc': 0.965499997138977,
  'precision': 0.9655238389968872,
  'recall': 0.9645422697067261,
  'f1': 0.9616830348968506}]