In [47]:
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
import torchmetrics

In [49]:
import wandb
from pytorch_lightning.loggers import WandbLogger

wandb.login()

True

In [50]:
# set seed for experiment reproducibility
pl.seed_everything(42)

Global seed set to 42


42

In [64]:
wandb_logger = WandbLogger(name='MNIST_MLP_traning', project='Pruning neurons')

In [52]:
LEARING_RATE = 1e-3
BATCH_SIZE = 64
EPOCHS = 5

In [None]:
activations_on_epochs = []

In [62]:
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.layer_1 = nn.Linear(28 * 28, 128)
        self.layer_2 = nn.Linear(128, 256)
        self.layer_3 = nn.Linear(256, 10)

        self.activations = {'layer_1': [], 'layer_2': [], 'layer_3': []}
        
        # metrics
        self.train_acc = torchmetrics.Accuracy(num_classes=10, task='multiclass')
        self.val_acc = torchmetrics.Accuracy(num_classes=10, task='multiclass')
        self.test_acc = torchmetrics.Accuracy(num_classes=10, task='multiclass')
        self.precision = torchmetrics.Precision(num_classes=10, average='macro', task='multiclass')
        self.recall = torchmetrics.Recall(num_classes=10, average='macro', task='multiclass')
        self.f1 = torchmetrics.F1Score(num_classes=10, average='macro', task='multiclass')

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.layer_1(x)
        self.activations['layer_1'].append(x.cpu().detach())
        x = F.relu(x)

        x = self.layer_2(x)
        self.activations['layer_2'].append(x.cpu().detach())
        x = F.relu(x)

        x = self.layer_3(x)
        self.activations['layer_3'].append(x.cpu().detach())
        return F.log_softmax(x, dim=1)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', self.train_acc(logits, y), prog_bar=True)
        return loss
    
    def on_train_epoch_end(self) -> None:
        for name, activations in self.activations.items():
            all_activations = torch.cat(activations).numpy()
            wandb.log({f'{name}_activations': wandb.Histogram(all_activations)}, commit=False)
            self.activations[name] = []

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', self.val_acc(logits, y), prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', self.test_acc(logits, y), prog_bar=True)
        self.log('precision', self.precision(logits, y), prog_bar=True)
        self.log('recall', self.recall(logits, y), prog_bar=True)
        self.log('f1', self.f1(logits, y), prog_bar=True)
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=LEARING_RATE)
    
model = MNISTModel()

In [61]:
def load_data(batch_size=BATCH_SIZE, num_workers=4):
    # Transformaciones para los datos
    transform = transforms.ToTensor()

    # Carga de datos de entrenamiento
    mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
    
    # División entre entrenamiento y validación
    train_size = int(0.8 * len(mnist_train))
    val_size = len(mnist_train) - train_size
    mnist_train, mnist_val = random_split(mnist_train, [train_size, val_size])

    # DataLoader para entrenamiento y validación
    train_loader = DataLoader(mnist_train, batch_size=batch_size, num_workers=num_workers)
    val_loader = DataLoader(mnist_val, batch_size=batch_size, num_workers=num_workers)

    # Carga de datos de test
    mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
    test_loader = DataLoader(mnist_test, batch_size=batch_size, num_workers=num_workers)

    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data()

In [65]:
trainer = pl.Trainer(max_epochs=EPOCHS, logger=wandb_logger)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                | Params
--------------------------------------------------
0 | layer_1   | Linear              | 100 K 
1 | layer_2   | Linear              | 33.0 K
2 | layer_3   | Linear              | 2.6 K 
3 | train_acc | MulticlassAccuracy  | 0     
4 | val_acc   | MulticlassAccuracy  | 0     
5 | test_acc  | MulticlassAccuracy  | 0     
6 | precision | MulticlassPrecision | 0     
7 | recall    | Mul

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [66]:
# Test
trainer.test(model, test_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.11035357415676117,
  'test_acc': 0.9690999984741211,
  'precision': 0.9692661166191101,
  'recall': 0.9678186774253845,
  'f1': 0.9652238488197327}]

In [67]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
f1,▁
precision,▁
recall,▁
test_acc,▁
test_loss,▁
train_acc,▁▅▂▂▅▅▇▇█▅▅▅▆▆█▇█▇▇▇▇▇█▇█▇▇▇▇▇█▇█▇▇▇▇▇▇▇
train_loss,█▄▆▆▆▄▂▂▁▄▃▃▄▃▁▂▁▃▂▂▃▂▁▂▁▃▂▂▄▂▁▂▁▁▁▁▄▁▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▅▇██

0,1
epoch,5.0
f1,0.96522
precision,0.96927
recall,0.96782
test_acc,0.9691
test_loss,0.11035
train_acc,0.98438
train_loss,0.02397
trainer/global_step,3750.0
val_acc,0.96933


In [68]:
model.activations

{'layer_1': [tensor([[ 0.9532,  0.6329, -0.9275,  ..., -0.0329, -1.7398,  0.8258],
          [-0.3106, -0.8216, -1.5208,  ..., -1.2881,  2.7661, -3.3714],
          [-0.3607, -0.4606,  0.1928,  ...,  0.2142,  0.4088,  0.1486],
          ...,
          [-1.4793,  2.0238, -0.4642,  ..., -1.9591, -0.8758,  3.9882],
          [-0.2231,  0.0967, -2.5163,  ...,  1.4518,  0.1059,  0.0323],
          [-1.8436,  0.0740,  0.6975,  ...,  1.9928,  2.9864,  0.8945]]),
  tensor([[ 0.5848,  1.4589, -1.3963,  ...,  2.5345, -0.6893,  4.1540],
          [-0.3384,  0.6508,  0.1686,  ...,  1.8318, -0.0638, -0.0683],
          [ 1.7734,  1.6116, -0.5179,  ..., -1.5879,  0.5780,  0.0450],
          ...,
          [ 0.0205, -0.2325, -2.1902,  ...,  0.0490, -0.8773,  0.1507],
          [ 0.6590, -1.4727, -0.6184,  ..., -0.6293, -1.2655, -1.6474],
          [-0.3523,  0.3966, -1.7331,  ...,  4.7514, -2.8576, -0.1103]]),
  tensor([[-0.1099,  1.9653, -0.3581,  ...,  2.2563, -0.5979,  0.7215],
          [-1.0060,