In [1]:
import sys
principal_path = '../'
if principal_path not in sys.path:
    sys.path.append('../')

In [2]:
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
import torchmetrics
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from pytorch_lightning.loggers import WandbLogger
from lightning.pytorch.loggers import CSVLogger
from pytorch_lightning.loggers import TensorBoardLogger

from models.MNISTModel import MNISTModel
from models.MNISTModelWithBottelNeck import MNISTModelWithBottelNeck

In [3]:
SEED = 2024
pl.seed_everything(SEED)

Global seed set to 2024


2024

In [4]:
LEARING_RATE = 1e-3
BATCH_SIZE = 64
EPOCHS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
FOLDER_CHEKPOINTS = 'checkpoints'
FOLDER_ACTIVATIONS = 'activations'
print(f'Using {DEVICE}')

Using cuda


## Loggers

In [5]:
def get_number_of_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    unit_scale = {
        '': 1,
        'K': 10 ** 3,
        'M': 10 ** 6,
        'B': 10 ** 9
    }
    for unit, scale in sorted(unit_scale.items(), key=lambda x: x[1], reverse=True):
        if num_params >= scale:
            return f'{round(num_params / scale, 1)}{unit}'
    return str(num_params)

def log_csv(model, model_name, folder = 'csv_logs'):
    number_of_parameters = get_number_of_parameters(model)
    return CSVLogger(
        save_dir=folder,
        name=f"{model_name}_{number_of_parameters}",
    )

def log_tensorboard(model, model_name, folder = 'tb_logs'):
    number_of_parameters = get_number_of_parameters(model)
    return TensorBoardLogger(
        save_dir=folder,
        name=f"{model_name}_{number_of_parameters}",
    )

def log_wandb(model, model_name, folder = 'wandb_logs'):
    number_of_parameters = get_number_of_parameters(model)
    return WandbLogger(
        save_dir=folder,
        name=f"{model_name}_{number_of_parameters}",
    )

## Download Dataset

In [6]:
def load_data(batch_size=BATCH_SIZE, num_workers=4):
    # Transformaciones para los datos
    transform = transforms.ToTensor()

    # Carga de datos de entrenamiento
    mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
    
    # División entre entrenamiento y validación
    train_size = int(0.8 * len(mnist_train))
    val_size = len(mnist_train) - train_size
    mnist_train, mnist_val = random_split(mnist_train, [train_size, val_size])

    # DataLoader para entrenamiento y validación
    train_loader = DataLoader(mnist_train, batch_size=batch_size, num_workers=num_workers, shuffle=True, persistent_workers=True)
    val_loader = DataLoader(mnist_val, batch_size=batch_size, num_workers=num_workers, shuffle=False, persistent_workers=True)

    # Carga de datos de test
    mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
    test_loader = DataLoader(mnist_test, batch_size=batch_size, num_workers=num_workers, persistent_workers=True)

    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data()

## Training

### MNISTmodel

In [7]:
model = MNISTModel(lr=LEARING_RATE)

In [8]:
loggers = [
    log_tensorboard(model, 'MLP'),
    log_csv(model, 'MLP'),
]

In [9]:
trainer = pl.Trainer(max_epochs=EPOCHS, logger=loggers)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                | Params
--------------------------------------------------
0 | train_acc | MulticlassAccuracy  | 0     
1 | val_acc   | MulticlassAccuracy  | 0     
2 | test_acc  | MulticlassAccuracy  | 0     
3 | precision | MulticlassPrecision | 0     
4 | recall    | MulticlassRecall    | 0     
5 | f1        | MulticlassF1Score   | 0     
6 | layer_1   | Linear              | 100 K 
7 | layer_2   | Lin

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [10]:
trainer.test(model, test_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.08122822642326355,
  'test_acc': 0.9753000140190125,
  'precision': 0.9733837842941284,
  'recall': 0.9732210040092468,
  'f1': 0.9707237482070923}]

In [11]:
## save checkpoint
file = os.path.join(FOLDER_CHEKPOINTS, 'mnist_model.ckpt')
trainer.save_checkpoint(file)

### Bottleneck

In [12]:
bottel_neck_model = MNISTModelWithBottelNeck(lr=LEARING_RATE)

In [13]:
loggers = [
    log_tensorboard(bottel_neck_model, 'BottelNeck'),
    log_csv(bottel_neck_model, 'BottelNeck'),
]

In [14]:
trainer = pl.Trainer(max_epochs=EPOCHS, logger=log_tensorboard(bottel_neck_model, 'BottelNeck'))
trainer.fit(bottel_neck_model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                | Params
--------------------------------------------------
0 | train_acc | MulticlassAccuracy  | 0     
1 | val_acc   | MulticlassAccuracy  | 0     
2 | test_acc  | MulticlassAccuracy  | 0     
3 | precision | MulticlassPrecision | 0     
4 | recall    | MulticlassRecall    | 0     
5 | f1        | MulticlassF1Score   | 0     
6 | layer_1   | Linear              | 200 K 
7 | layer_2   | Lin

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [15]:
trainer.test(bottel_neck_model, test_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.10720314830541611,
  'test_acc': 0.9684000015258789,
  'precision': 0.9670910835266113,
  'recall': 0.9658780097961426,
  'f1': 0.9632332921028137}]

In [16]:
## save checkpoint
file = os.path.join(FOLDER_CHEKPOINTS, 'mnist_model_bottel_neck.ckpt')
trainer.save_checkpoint(file)

## Activations

In [17]:
def load_data_for_activations():
    # load only one image of the test set
    transform = transforms.ToTensor()
    mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
    test_loader = DataLoader(mnist_test, batch_size=1, shuffle=False)
    return test_loader

test_loader_act = load_data_for_activations()

In [18]:
minst_model_file = os.path.join(FOLDER_CHEKPOINTS, 'mnist_model.ckpt')
model_for_act = MNISTModel.load_from_checkpoint(minst_model_file)
model_for_act.eval()

MNISTModel(
  (train_acc): MulticlassAccuracy()
  (val_acc): MulticlassAccuracy()
  (test_acc): MulticlassAccuracy()
  (precision): MulticlassPrecision()
  (recall): MulticlassRecall()
  (f1): MulticlassF1Score()
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (layer_2): Linear(in_features=128, out_features=256, bias=True)
  (layer_3): Linear(in_features=256, out_features=10, bias=True)
)

In [19]:
def create_neuron_dataframe(layer_neurons, num_rows, test_loader_act, model_for_act):
    # Creación de las columnas para cada neurona
    columns = ['Number']
    for layer_index, neurons in enumerate(layer_neurons):
        columns += [f'Layer{layer_index+1}_Neuron{i+1}' for i in range(neurons)]

    # Pre-creación del DataFrame
    df = pd.DataFrame(index=range(num_rows), columns=columns)

    # Llenar el DataFrame
    for idx, batch in enumerate(tqdm(test_loader_act, desc='Running activations')):
        x, y = batch
        _, r = model_for_act(x, record_activations=True)

        # Construir fila para el DataFrame
        row = {'Number': y.item()}
        for layer_index, neurons in enumerate(layer_neurons):
            layer_activation = r[layer_index].cpu().detach().numpy()[0]
            for neuron_index in range(neurons):
                row[f'Layer{layer_index+1}_Neuron{neuron_index+1}'] = layer_activation[neuron_index]

        df.loc[idx] = row
    
    return df

In [20]:
# Ejemplo de uso
layer_neurons = [128, 256, 10]  # Lista con el número de neuronas en cada capa
num_rows = len(test_loader_act)  # Número de filas en el DataFrame
df = create_neuron_dataframe(layer_neurons, num_rows, test_loader_act, model_for_act)

Running activations: 100%|██████████| 10000/10000 [00:08<00:00, 1141.48it/s]


In [21]:
df.head()

Unnamed: 0,Number,Layer1_Neuron1,Layer1_Neuron2,Layer1_Neuron3,Layer1_Neuron4,Layer1_Neuron5,Layer1_Neuron6,Layer1_Neuron7,Layer1_Neuron8,Layer1_Neuron9,...,Layer3_Neuron1,Layer3_Neuron2,Layer3_Neuron3,Layer3_Neuron4,Layer3_Neuron5,Layer3_Neuron6,Layer3_Neuron7,Layer3_Neuron8,Layer3_Neuron9,Layer3_Neuron10
0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.92348,0.0,...,-16.595833,-14.188923,-8.905098,-10.092788,-22.101274,-15.535633,-29.975979,-0.00018,-14.806216,-13.204277
1,2.0,0.0,0.0,2.26403,2.359124,0.0,1.33489,0.0,0.821211,0.531313,...,-21.110266,-12.353317,-6e-06,-15.05283,-29.732016,-22.506277,-20.308481,-23.745291,-13.633943,-34.262554
2,1.0,0.0,0.59751,0.420798,0.433036,0.0,0.574497,1.608488,0.0,0.053121,...,-15.868408,-0.001101,-9.372239,-12.341898,-9.200323,-9.77616,-9.750698,-8.316074,-7.506509,-14.401055
3,0.0,0.0,0.0,0.270113,0.0,1.198138,0.0,0.0,2.564893,0.0,...,-0.000514,-13.230165,-9.013961,-16.788416,-12.716939,-12.95721,-9.12406,-12.553201,-12.449683,-8.221807
4,4.0,0.0,0.0,0.984886,0.0,0.0,0.0,0.0,1.628678,0.627652,...,-14.683038,-15.717505,-15.271838,-20.522961,-0.000891,-14.758453,-13.264597,-13.595921,-12.924339,-7.03157


In [24]:
# save activations csv
activation_file = os.path.join(FOLDER_ACTIVATIONS, 'activations_minist_model.csv')
df.to_csv(activation_file, index=False)