In [1]:
import pytorch_lightning as pl
import torch
import torch.nn as nn

from torchmetrics import Accuracy

In [2]:
class MultiLayerPerceptron(pl.LightningModule):
	def __init__(self, image_shape=(1,28,28), hidden_units=(32,16)):
		super().__init__()
		# new pl attributes
		self.train_acc = Accuracy(task="multiclass", num_classes=10)
		self.valid_acc = Accuracy(task="multiclass", num_classes=10)
		self.test_acc = Accuracy(task="multiclass", num_classes=10)

		# Model Similar to Previous section
		input_size = image_shape[0] * image_shape[1] * image_shape[2]
		all_layers = [nn.Flatten()]
		for hidden_unit in hidden_units:
			layer = nn.Linear(input_size, hidden_unit)
			all_layers.append(layer)
			all_layers.append(nn.ReLU())
			input_size = hidden_unit
		all_layers.append(nn.Linear(hidden_units[-1], 10))
		self.model = nn.Sequential(*all_layers)

	def forward(self, x):
		x = self.model(x)
		return x
	
	def training_step(self, batch, batch_idx):
		x,y = batch
		logits = self(x)
		loss = nn.functional.cross_entropy(self(x), y)
		preds = torch.argmax(logits, dim=1)
		self.train_acc.update(preds, y)
		self.log("train_loss", loss, prog_bar=True)
		return loss
	
	def on_train_epoch_end(self):
		self.log("Training Acc :", self.train_acc.compute())

	def validation_step(self, batch, batch_idx):
		x,y = batch
		logits = self(x)
		loss = nn.functional.cross_entropy(self(x), y)
		preds = torch.argmax(logits, dim=1)
		self.valid_acc.update(preds, y)
		self.log("Validation Loss:", loss, prog_bar=True)
		self.log("Validation Accuracy:", self.valid_acc.compute(), prog_bar=True)
		return loss

	def test_step(self, batch, batch_idx):
		x,y = batch
		logits = self(x)
		loss = nn.functional.cross_entropy(self(x), y)
		preds = torch.argmax(logits, dim=1)
		self.test_acc.update(preds, y)
		self.log("Testing Loss:", loss, prog_bar=True)
		self.log("Testing Accuracy:", self.test_acc.compute(), prog_bar=True)
		return loss
	
	def configure_optimizers(self):
		optimizer = torch.optim.Adam(self.parameters(), lr = 0.001)
		return optimizer

### Setting up the data loaders for Lightning
There are three main ways in which we can prepare the dataset for Lightning.
* Make the dataset part of the model
* Set up the data loaders as usual and feed them to the fit method of a lightning Trainer
* Create a LightningDataModule

In [3]:
# using LightningDataModue approach

from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision.datasets import MNIST
from torchvision import transforms


In [4]:
class MNISTDataModule(pl.LightningDataModule):
	def __init__(self, data_path="./mnist_images/"):
		super().__init__()
		self.data_path = data_path
		self.transform = transforms.Compose([transforms.ToTensor()])

	def prepare_data(self) :
		MNIST(root=self.data_path, download=True)

	def setup(self, stage=None):
		# stage is either fit, validate, test or predict
		mnist_all = MNIST(root=self.data_path, train=True, transform=self.transform, download=False)
		self.train, self.val = random_split(
			mnist_all, [55000, 5000], generator=torch.Generator().manual_seed(1)
		)
		self.test = MNIST(root=self.data_path, train=False, transform=self.transform, download=True)

	def train_dataloader(self) :
		return DataLoader(self.train, batch_size=64, num_workers=4)
	
	def val_dataloader(self) :
		return DataLoader(self.val, batch_size=64, num_workers=4)
	
	def test_dataloader(self) :
		return DataLoader(self.test, batch_size=64, num_workers=4)

 

In [5]:
torch.manual_seed(1)
mnist_dm = MNISTDataModule()

In [6]:
mnistclassifier = MultiLayerPerceptron()

In [7]:
if torch.cuda.is_available():
	trainer = pl.Trainer(max_epochs=20,devices=1)
else:
	trainer = pl.Trainer(max_epochs=20)

trainer.fit(model=mnistclassifier, datamodule=mnist_dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: /home/santosh/ML-DL_codeBook/Chapter_13/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | train_acc | MulticlassAccuracy | 0      | train
1 | valid_acc | MulticlassAccuracy | 0      | train
2 | test_acc  | MulticlassAccuracy | 0      | train
3 | model     | Sequential         | 25.8 K | train
---------------------------------------------------------
25.8 K    Trainable params
0         Non-trainable params
25.8 K    Total params
0.103     Total estimated model params size (MB)


Epoch 19: 100%|██████████| 860/860 [00:10<00:00, 81.75it/s, v_num=0, train_loss=0.084, Validation Loss:=0.177, Validation Accuracy:=0.944]  

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 860/860 [00:10<00:00, 81.67it/s, v_num=0, train_loss=0.084, Validation Loss:=0.177, Validation Accuracy:=0.944]


In [8]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

In [9]:
# Loading model from saved checkpoint and retraining for an additional 10 epochs
if torch.cuda.is_available():
    trainer = pl.Trainer(
        max_epochs=30,
        devices=1,
        accelerator="gpu"
    )
else:
    trainer = pl.Trainer(
        max_epochs=30,
        devices=1,
        accelerator="cpu"
    )

# Assuming you have a model and datamodule already defined
# model = mnistclassifier (already defined)
# mnist_dm = your_data_module (already defined)

trainer.fit(model=mnistclassifier, datamodule=mnist_dm,ckpt_path="lightning_logs/version_0/checkpoints/epoch=19-step=17200.ckpt"
)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at lightning_logs/version_0/checkpoints/epoch=19-step=17200.ckpt
/home/santosh/ML-DL_codeBook/.venv/lib/python3.10/site-packages/lightning_fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you sta

Epoch 29: 100%|██████████| 860/860 [00:12<00:00, 70.75it/s, v_num=1, train_loss=0.028, Validation Loss:=0.213, Validation Accuracy:=0.947]    

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 860/860 [00:12<00:00, 70.66it/s, v_num=1, train_loss=0.028, Validation Loss:=0.213, Validation Accuracy:=0.947]


In [10]:
# testing 
trainer.test(model=mnistclassifier, datamodule=mnist_dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 157/157 [00:01<00:00, 125.96it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    Testing Accuracy:       0.9513083100318909
      Testing Loss:         0.1878414899110794
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'Testing Loss:': 0.1878414899110794,
  'Testing Accuracy:': 0.9513083100318909}]