# Multi Layer Perceptron (MLP)

> Simple feedforward Multilayer perceptron model

In [None]:
#| default_exp models.mlp

In [None]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

In [None]:
#| export
import torch.nn as nn
import torch
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST


from pytorch_lightning import LightningModule, Trainer
from torchmetrics import Accuracy
from hydra.utils import instantiate
from omegaconf import OmegaConf
from matplotlib import pyplot as plt

from nimrod.data.datasets import MNISTDataModule
from nimrod.utils import get_device
from nimrod.image.datasets import ImageDataset

# from IPython.core.debugger import set_trace

  from .autonotebook import tqdm as notebook_tqdm


## Basic model

In [None]:
#| export
class MLP(nn.Module):
    def __init__(
                self,
                n_in:int, # input dimension e.g. (H,W) for image
                n_h:int, # hidden dimension
                n_out:int, # output dimension (= number of classes for classification)
                dropout:float=0.2
                ) -> None:
        super().__init__()
        l1 = nn.Linear(n_in, n_h)
        l2 = nn.Linear(n_h, n_out)
        relu = nn.ReLU()
        dropout = nn.Dropout(dropout)
        self.layers = nn.Sequential(l1, l2, relu, dropout)
        
    def forward(self, x: torch.Tensor # dim (B, H*W)
                ) -> torch.Tensor:
        return self.layers(x)

### Usage

In [None]:
image = torch.rand((5, 28*28))
mlp = MLP(n_in=28*28, n_h=64, n_out=10)
out = mlp(image)
print(out.shape)

torch.Size([5, 10])


### Basic training
#### Data Module
Data module
c.f. recipes/image/mnist

```bash
cat ../config/data/image/mnist.yaml
```

In [None]:
# load from config file
cfg = OmegaConf.load('../config/data/image/mnist.yaml')
datamodule = instantiate(cfg.datamodule)
datamodule.prepare_data()
datamodule.setup()
x = datamodule.data_test[0][0] # (C, H, W)
print(len(datamodule.data_test))
label = datamodule.data_test[0][1] #(int)
print("original shape (C,H,W): ", x.shape)
print("reshape (C,HxW): ", x.view(x.size(0), -1).shape)
print(x[0][1])

7000
original shape (C,H,W):  torch.Size([1, 28, 28])
reshape (C,HxW):  torch.Size([1, 784])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])


In [None]:
# using default Pytorch datasets
train_dataset = MNIST("../data/image", train=True, download=True, transform=ToTensor())
test_dataset = MNIST("../data/image", train=False, download=True, transform=ToTensor())

# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# using nimrod datamodule
train_loader = datamodule.train_dataloader()
val_loader = datamodule.val_dataloader()
test_loader = datamodule.test_dataloader()

In [None]:
type(datamodule.data_test)

torch.utils.data.dataset.Subset

#### Hardware acceleration

In [None]:
# device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cpu" # for CI on cpu instance
device = torch.device(device)
model = mlp.to(device)

#### Loss & optimizer setup

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

#### Training loop

In [None]:

%%time
n_epochs = 1
for epoch in range(n_epochs):
    model.train()
    for images, labels in train_loader:
        images = images.view(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            # model expects input (B,H*W)
            images = images.view(-1, 28*28).to(device)
            images = images.to(device)
            labels = labels.to(device)
            # Pass the input through the model
            outputs = model(images)
            # Get the predicted labels
            _, predicted = torch.max(outputs.data, 1)

            # Update the total and correct counts
            total += labels.size(0)
            correct += (predicted == labels).sum()

        # Print the accuracy
        print(f"Epoch {epoch + 1}: Accuracy = {100 * correct / total:.2f}%")


## Integrated model + training settings

In [None]:
#| export
class MLP_PL(LightningModule):
    def __init__(self,
                n_in:int, # input dimension e.g. (H,W) for image
                n_h:int, # hidden dimension
                n_out:int, # output dimension (= number of classes for classification)
                dropout:float=0.2, # dropout factor
                lr:float=1e-3 # learning rate
                ):
        super().__init__()

        self.save_hyperparameters()
        self.mlp = MLP(n_in, n_h, n_out, dropout)
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)
        self.lr = lr

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer
    
    def forward(self,
                x: torch.Tensor, # X input images dim(B, H*W)
                ) -> torch.Tensor: # y class probabilities (B, n_classes)
        return(self.mlp(x))

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.mlp(x)
        loss = self.loss(y_hat, y)
        return loss
    
    def _step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.mlp(x)
        loss = self.loss(y_hat, y)
        acc = self.accuracy(y_hat, y)
        return loss, acc
    
    def validation_step(self, batch, batch_idx, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True):
        loss, acc = self._step(batch, batch_idx)
        metrics = {"val/loss":loss, "val/acc": acc}
        self.log_dict(metrics, on_step=on_step, on_epoch=on_epoch, sync_dist=sync_dist)
    
    def test_step(self, batch, batch_idx, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True):
        loss, acc = self._step(batch, batch_idx)
        metrics = {"test/loss":loss, "test/acc": acc}
        self.log_dict(metrics, on_step=on_step, on_epoch=on_epoch, sync_dist=sync_dist)

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.mlp(x)
        return y_hat.argmax(dim=1)


### Usage

In [None]:
# wrap simple model in modularized model
mlp_pl = MLP_PL(28*28, 64, n_out=10, dropout=0.2, lr=1e-3)

# fake input
b = torch.rand((5,1, 28*28))

# move model and data to hardware
model = mlp_pl.to(device)

b = b.to(device)
y_hat = mlp_pl(b)
print(y_hat.shape)

# real data
batch = next(iter(test_loader))
print(batch[0].shape, batch[1].shape)
print(model.predict_step(batch, 0))

torch.Size([5, 1, 10])
torch.Size([64, 1, 28, 28]) torch.Size([64])
tensor([6, 8, 8, 8, 8, 8, 6, 4, 8, 8, 5, 5, 9, 5, 9, 8, 8, 2, 4, 4, 5, 2, 4, 5,
        9, 8, 6, 0, 6, 9, 9, 4, 9, 6, 8, 6, 5, 9, 4, 9, 6, 8, 2, 5, 8, 5, 9, 6,
        2, 9, 4, 0, 2, 0, 9, 6, 2, 2, 8, 9, 8, 9, 5, 6])


In [None]:
print(model.lr)

NameError: name 'model' is not defined

In [None]:
# print(bb)

## Integrated trainer

```python
trainer = Trainer(accelerator='mps', devices = 1, max_epochs=1)
trainer.fit(mlp_pl, datamodule.data_train)
trainer.fit(mlp_pl, datamodule.data_train)
```

## Training scripts with config file 

To check an example script leveraging model training with configurable yaml files check recipes folder

```bash
cd recipes/image/mnist
python train.py trainer.max_epochs 20 trainer.accelerator='mps' datamodule.num_workers=0
```

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()