# Multi Layer Perceptron (MLP)

> Simple feedforward Multilayer perceptron model

In [1]:
#| default_exp models.mlp

In [2]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

In [3]:
#| export
import torch.nn as nn
import torch
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST


from pytorch_lightning import LightningModule, Trainer
from torchmetrics import Accuracy
from hydra.utils import instantiate
from omegaconf import OmegaConf

from nimrod.data.datasets import MNISTDataModule
from nimrod.utils import get_device

from IPython.core.debugger import set_trace

  from .autonotebook import tqdm as notebook_tqdm


## Basic model

In [4]:
#| export
class MLP(nn.Module):
    def __init__(
                self, n_in:int=32*32*3, # input dimension e.g. (H,W) for image
                n_h:int=64, # hidden dimension
                n_out:int=10 # output dimension (= number of classes for classification)
                ):
        super().__init__()
        l1 = nn.Linear(n_in, n_h)
        l2 = nn.Linear(n_h, n_out)
        dropout = nn.Dropout(0.2)
        self.layers = nn.Sequential(l1,l2, dropout)
        
    def forward(self, x: torch.FloatTensor # dim (B, H*W)
                ) -> torch.FloatTensor:
        return self.layers(x)

### Usage

In [5]:
image = torch.rand((5, 28*28))
mlp = MLP(n_in=28*28, n_h=64, n_out=10)
out = mlp(image)
print(out.shape)

torch.Size([5, 10])


### Basic training
#### Data Module
Data module
c.f. recipes/image/mnist

In [6]:
!cat ../config/data/image/mnist.yaml

dataset:
  _target_: nimrod.image.datasets.MNISTDataset
  data_dir: "../data/image"
  train: False
  transform: 
    _target_: torchvision.transforms.ToTensor

datamodule:
  _target_: nimrod.image.datasets.MNISTDataModule
  data_dir: "../data/image"
  train_val_test_split: [0.8, 0.1, 0.1]
  batch_size: 64
  num_workers: 0
  pin_memory: False


In [7]:
# load from config file
cfg = OmegaConf.load('../config/data/image/mnist.yaml')
datamodule = instantiate(cfg.datamodule)
datamodule.prepare_data()
datamodule.setup()
x = datamodule.data_test[0][0] # (C, H, W)
print(len(datamodule.data_test))
label = datamodule.data_test[0][1] #(int)
print("original shape (C,H,W): ", x.shape)
print("reshape (C,HxW): ", x.view(x.size(0), -1).shape)
print(x[0][1])

7000
original shape (C,H,W):  torch.Size([1, 28, 28])
reshape (C,HxW):  torch.Size([1, 784])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])


In [8]:
# using default Pytorch datasets
train_dataset = MNIST("../data/image", train=True, download=True, transform=ToTensor())
test_dataset = MNIST("../data/image", train=False, download=True, transform=ToTensor())
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# using nimrod datamodule
train_loader = datamodule.train_dataloader()
val_loader = datamodule.val_dataloader()
test_loader = datamodule.test_dataloader()

#### Hardware acceleration

In [9]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device = torch.device(device)
model = mlp.to(device)

#### Loss & optimizer setup

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

#### Training loop

In [11]:
%%time
n_epochs = 1
for epoch in range(n_epochs):
    model.train()
    for images, labels in train_loader:
        images = images.view(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        # print(loss.item())
        optimizer.step()

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            # model expects input (B,H*W)
            images = images.view(-1, 28*28).to(device)
            images = images.to(device)
            labels = labels.to(device)
            # Pass the input through the model
            outputs = model(images)
            # Get the predicted labels
            _, predicted = torch.max(outputs.data, 1)

            # Update the total and correct counts
            total += labels.size(0)
            correct += (predicted == labels).sum()

        # Print the accuracy
        print(f"Epoch {epoch + 1}: Accuracy = {100 * correct / total:.2f}%")

Epoch 1: Accuracy = 71.26%
CPU times: user 5.58 s, sys: 543 ms, total: 6.13 s
Wall time: 8.74 s


## Integrated model + training settings

In [2]:
#| export
class MLP_PL(LightningModule):
    def __init__(self,
                mlp:MLP # pure pytorch MLP model
                ):
        super().__init__()
        self.save_hyperparameters(ignore=['mlp'])
        self.mlp = mlp
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def forward(self,
                x: torch.Tensor # X input images dim(B, H*W)
                ) -> torch.Tensor: # y class probabilities (B, n_classes)
        return(self.mlp(x))

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.mlp(x)
        loss = self.loss(y_hat, y)
        return loss
    
    def _step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.mlp(x)
        loss = self.loss(y_hat, y)
        acc = self.accuracy(y_hat, y)
        return loss, acc
    
    def validation_step(self, batch, batch_idx, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True):
        loss, acc = self._step(batch, batch_idx)
        metrics = {"val/loss":loss, "val/acc": acc}
        self.log_dict(metrics, on_step=on_step, on_epoch=on_epoch, sync_dist=sync_dist)
    
    def test_step(self, batch, batch_idx, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True):
        loss, acc = self._step(batch, batch_idx)
        metrics = {"test/loss":loss, "test/acc": acc}
        self.log_dict(metrics, on_step=on_step, on_epoch=on_epoch, sync_dist=sync_dist)

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        y_hat = self.mlp(x)
        return y_hat


NameError: name 'LightningModule' is not defined

### Usage

In [42]:
# wrap simple model in modularized model
mlp_pl = MLP_PL(mlp)
# fake input
b = torch.rand((5,1, 28*28))

# move model and data to hardware
model = mlp_pl.to(device)
b = b.to(device)

y = mlp_pl(b)
print(y.shape)

torch.Size([5, 10])


## Integrated trainer

In [45]:
# trainer = Trainer(accelerator='cpu', fast_dev_run=False, max_epochs=3) #mps', devices=1)
# trainer.fit(mlp_pl, datamodule.data_train)
# trainer.fit(mlp_pl, datamodule.data_train)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

  | Name     | Type               | Params
------------------------------------------------
0 | mlp      | MLP                | 50.9 K
1 | loss     | CrossEntropyLoss   | 0     
2 | accuracy | MulticlassAccuracy | 0     
------------------------------------------------
50.9 K    Trainable params
0         Non-trainable params
50.9 K    Total params
0.204     Total estimated model params size (MB)


Epoch 0:   0%|          | 0/56000 [00:00<?, ?it/s] training step
Y: torch.Size([1, 784]) 6
Epoch 0:   0%|          | 1/56000 [00:00<02:48, 332.67it/s, v_num=14]training step
Y: torch.Size([1, 784]) 2
Epoch 0:   0%|          | 2/56000 [00:00<02:19, 402.27it/s, v_num=14]training step
Y: torch.Size([1, 784]) 5
Epoch 0:   0%|          | 3/56000 [00:00<02:10, 429.07it/s, v_num=14]training step
Y: torch.Size([1, 784]) 6
Epoch 0:   0%|          | 4/56000 [00:00<03:29, 267.20it/s, v_num=14]training step
Y: torch.Size([1, 784]) 9
Epoch 0:   0%|          | 5/56000 [00:00<04:05, 227.71it/s, v_num=14]training step
Y: torch.Size([1, 784]) 4
Epoch 0:   0%|          | 6/56000 [00:00<04:37, 201.96it/s, v_num=14]training step
Y: torch.Size([1, 784]) 2
Epoch 0:   0%|          | 7/56000 [00:00<04:16, 218.05it/s, v_num=14]training step
Y: torch.Size([1, 784]) 3
Epoch 0:   0%|          | 8/56000 [00:00<04:01, 232.18it/s, v_num=14]training step
Y: torch.Size([1, 784]) 4
Epoch 0:   0%|          | 9/56000 [00

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()