In [94]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from torchvision import datasets
from torchvision import transforms
import torchmetrics
import lightning as L

In [14]:
class DataModule(L.LightningDataModule):
  def __init__(self, data_dir='./mnistdata', batch_size = 64, num_workers=0, train_transform=None, test_transform=None):
    super().__init__()

    self.data_dir = data_dir
    self.batch_size = batch_size
    self.num_workers = num_workers
    self.train_transform = train_transform
    self.test_transform = test_transform

  def prepare_data(self):
    datasets.MNIST(self.data_dir, train=True, download=True)
    datasets.MNIST(self.data_dir, train=False, download=True)

  def setup(self, stage=None):
    self.test_data = datasets.MNIST(
      root=self.data_dir, 
      train=False,
      download=False, 
      transform=self.test_transform
    )
    mnist_full = datasets.MNIST(
      root=self.data_dir, 
      train=True, 
      download=False,
      transform=self.train_transform,
    )
    self.train_data, self.val_data = random_split(mnist_full, [55000, 5000])

  def train_dataloader(self):
    return DataLoader(
      dataset=self.train_data, 
      batch_size=self.batch_size, 
      shuffle=True, 
      drop_last=True,
      num_workers=self.num_workers, 
    )

  def val_dataloader(self):
    return DataLoader(
      dataset=self.val_data, 
      batch_size=self.batch_size, 
      shuffle=False,
      drop_last=False,
      num_workers=self.num_workers,
    )

  def test_dataloader(self):
    return DataLoader(
      dataset=self.test_data, 
      batch_size=self.batch_size, 
      shuffle=False,
      drop_last=False,
      num_workers=self.num_workers,
    )

In [32]:
class BaseModel(nn.Module):
  def __init__(self, num_features, num_classes):
    super().__init__()

    self.flatten = nn.Flatten()
    self.layers = nn.Sequential(
        nn.Linear(num_features, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, num_classes)
    )

  def forward(self, x):
    x = self.flatten(x)
    return self.layers(x)

In [54]:
class ImprovedModel(nn.Module):
  def __init__(self, num_classes):
    super().__init__()

    self.conv_layers = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),
      nn.BatchNorm2d(6),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),

      nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
      nn.BatchNorm2d(16),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )
    self.fc_layers = nn.Sequential(
        nn.Flatten(),
        nn.BatchNorm1d(5*5*16),
        nn.Linear(5*5*16, 120),
        nn.BatchNorm1d(120),
        nn.ReLU(),
        nn.Linear(120, 84),
        nn.BatchNorm1d(84),
        nn.ReLU(),
        nn.Linear(84, num_classes)
    )

  def forward(self, x):
    x = self.conv_layers(x)
    return self.fc_layers(x)

In [55]:
class LeNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()

        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=5*5*16, out_features=120),
            nn.Tanh(),
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=num_classes),
        )

    def forward(self, x):
        x = self.conv_layer(x)
        logits = self.fc_layer(x)
        return logits

In [65]:
class LightningModel(L.LightningModule):
  def __init__(self, model, lr):
    super().__init__()

    self.model = model
    self.lr = lr

    self.save_hyperparameters(ignore=["model"])

    self.train_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)
    self.val_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)
    self.test_acc = torchmetrics.Accuracy(task='multiclass', num_classes=10)

  def forward(self, x):
    return self.model(x)

  def _shared_steps(self, batch):
    X, y = batch
    logits = self(X)
    loss = F.cross_entropy(logits, y)
    pred = torch.argmax(logits, dim=1)

    return loss, y, pred

  def training_step(self, batch, batch_idx):
    loss, y, pred = self._shared_steps(batch)
    self.log("train_loss", loss)
    self.train_acc(pred, y)
    self.log("train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)

    return loss

  def validation_step(self, batch, batch_idx):
    loss, y, pred = self._shared_steps(batch)
    self.log("val_loss", loss)
    self.val_acc(pred, y)
    self.log("val_acc", self.val_acc, prog_bar=True)

  def test_step(self, batch, batch_idx):
    loss, y, pred = self._shared_steps(batch)
    self.test_acc(pred, y)
    self.log("test_acc", self.test_acc)

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr=self.lr)

In [66]:
train_transform = transforms.Compose([
    # transforms.Resize(64),
    # transforms.RandomCrop(),
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    # transforms.Resize(64),
    # transforms.CenterCrop(),
    transforms.ToTensor(),
])

In [67]:
torch.manual_seed(111)

dm = DataModule(train_transform=train_transform, test_transform=test_transform)
bm = BaseModel(784, 10)
lenet = LeNet(10)
imp_mod = ImprovedModel(10)
lm = LightningModel(imp_mod, lr=0.001)

In [103]:
from lightning.pytorch.callbacks import ModelCheckpoint
callbacks = [
    ModelCheckpoint(dirpath='./model/',filename='best_model' , save_top_k=1, mode='max', monitor='val_acc', save_last=True)
    ]

In [104]:
trainer = L.Trainer(
    max_epochs = 5,
    accelerator = 'gpu',
    devices = 'auto',
    callbacks=callbacks,
    precision='16-mixed',
    deterministic = True,
)

Using 16bit Automatic Mixed Precision (AMP)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [105]:
trainer.fit(model = lm, datamodule = dm)

c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:639: Checkpoint directory ./ exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | ImprovedModel      | 63.0 K
1 | train_acc | MulticlassAccuracy | 0     
2 | val_acc   | MulticlassAccuracy | 0     
3 | test_acc  | MulticlassAccuracy | 0     
-------------------------------------------------
63.0 K    Trainable params
0         Non-trainable params
63.0 K    Total params
0.252     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [106]:
train_acc = trainer.validate(dataloaders=dm.train_dataloader(), ckpt_path="best")[0]['val_acc']
print(train_acc)

Restoring states from the checkpoint path at ./best_model.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./best_model.ckpt
c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Validation: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc             0.999199628829956
        val_loss           0.0030940952710807323
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
0.999199628829956


In [107]:
val_acc = trainer.validate(datamodule=dm, ckpt_path="best")[0]['val_acc']

Restoring states from the checkpoint path at ./best_model.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./best_model.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.9990000128746033
        val_loss           0.0029711921233683825
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [108]:
test_acc = trainer.test(datamodule=dm, ckpt_path="best")[0]['test_acc']

Restoring states from the checkpoint path at ./best_model.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./best_model.ckpt
c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9909999966621399
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
