# Pytorch Lightning Tutorial
This tutorial demonstrates integration of ModelBox with PyTorch Lightning.

In [1]:
import os

import pandas as pd
import seaborn as sn
import torch
import random
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import MNIST
from modelbox.lightning_logger import ModelBoxLogger

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
BATCH_SIZE = 256 if torch.cuda.is_available() else 64

  from IPython.core.display import display


In [2]:
class MNISTModel(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, learning_rate=2e-4):
        super().__init__()
        self.l1 = torch.nn.Linear(28 * 28, 10)
        
        self.data_dir = data_dir
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,)),
            ]
        )
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()
        self.save_hyperparameters()

    def forward(self, x):
        return torch.relu(self.l1(x.view(x.size(0), -1)))

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.cross_entropy(self(x), y)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

    def prepare_data(self):
        MNIST(self.data_dir, train=True, download=True)
        MNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        if stage == "fit" or stage is None:
            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)



In [3]:
mnist_model = MNISTModel()

train_ds = MNIST(PATH_DATASETS, train=True, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)

# Intialize ModelBoxLogger
experiment_name = f"lid_quartznet-{random.randint(1, 10000)}"
mbox_logger = ModelBoxLogger("langtech", experiment_name, "owner@pytorch.com")

trainer = Trainer(
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None, 
    max_epochs=3,
    logger=[mbox_logger],
    callbacks=[TQDMProgressBar(refresh_rate=20)],
)

trainer.fit(mnist_model, train_loader)
trainer.test()


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
modelbox - attempting to create a project
modelbox - created experiment with id: fc5b0a49a66a58141a350fef06827bf845545b59
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type     | Params
-------------------------------------------
0 | l1            | Linear   | 7.9 K 
1 | val_accuracy  | Accuracy | 0     
2 | test_accuracy | Accuracy | 0     
-------------------------------------------
7.9 K     Trainable params
0         Non-trainable params
7.9 K     Total params
0.031     Total estimated model params size (MB)
modelbox - attempting to create a project
modelbox - created experiment with id: fc5b0a49a66a58141a350fef06827bf845545b59
modelbox - log hpraams params "data_dir":      .
"hidden_size":   64
"learning_rate": 0.0002
modelbox - log hpraams metrics None


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

logging metrics {'val_loss': 12.475750923156738, 'val_acc': 0.551800012588501, 'epoch': 0}
modelbox - log metrics, step: 234 metrics: {'val_loss': 12.475750923156738, 'val_acc': 0.551800012588501, 'epoch': 0}


Validation: 0it [00:00, ?it/s]

logging metrics {'val_loss': 17.128355026245117, 'val_acc': 0.5260000228881836, 'epoch': 1}
modelbox - log metrics, step: 469 metrics: {'val_loss': 17.128355026245117, 'val_acc': 0.5260000228881836, 'epoch': 1}


Validation: 0it [00:00, ?it/s]

logging metrics {'val_loss': 20.481245040893555, 'val_acc': 0.5109999775886536, 'epoch': 2}
modelbox - log metrics, step: 704 metrics: {'val_loss': 20.481245040893555, 'val_acc': 0.5109999775886536, 'epoch': 2}
  rank_zero_warn(
Restoring states from the checkpoint path at /home/diptanuc/Projects/modelbox/tutorials/lid_quartznet-9729/0.1/checkpoints/epoch=2-step=705.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
modelbox - attempting to create a project
modelbox - created experiment with id: fc5b0a49a66a58141a350fef06827bf845545b59
modelbox - log hpraams params "data_dir":      .
"hidden_size":   64
"learning_rate": 0.0002
modelbox - log hpraams metrics None
Loaded model weights from checkpoint at /home/diptanuc/Projects/modelbox/tutorials/lid_quartznet-9729/0.1/checkpoints/epoch=2-step=705.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

logging metrics {'test_loss': 20.991371154785156, 'test_acc': 0.5055999755859375, 'epoch': 2}
modelbox - log metrics, step: 705 metrics: {'test_loss': 20.991371154785156, 'test_acc': 0.5055999755859375, 'epoch': 2}


[{'test_loss': 20.991371154785156, 'test_acc': 0.5055999755859375}]

In [4]:
from modelbox.modelbox import ModelBoxClient

In [5]:
client = ModelBoxClient(addr="localhost:8085")


In [7]:
client.list_metadata('fc5b0a49a66a58141a350fef06827bf845545b59').metadata

{'hyperparams': fields {
   key: "data_dir"
   value {
     string_value: "."
   }
 }
 fields {
   key: "hidden_size"
   value {
     number_value: 64.0
   }
 }
 fields {
   key: "learning_rate"
   value {
     number_value: 0.0002
   }
 }}

In [8]:
mbox_logger.experiment

modelbox - attempting to create a project
modelbox - created experiment with id: fc5b0a49a66a58141a350fef06827bf845545b59


CreateExperimentResult(experiment_id='fc5b0a49a66a58141a350fef06827bf845545b59', exists=False)