Task: Pytorch lightning demo (MNIST digit classification)

Dataset: MNIST

Model: Feedforward NN

Libraries: Pytorch

In [1]:
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.data import random_split
import lightning as L
import torchmetrics
from torchmetrics import Metric

In [2]:
# Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 3

In [3]:
from torch.utils.data import DataLoader
class MyAccuracy(Metric):
    def __init__(self):
        super().__init__()
        self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
    
    def update(self, preds, target):
        preds = torch.argmax(preds, dim=1)
        assert preds.shape == target.shape
        self.correct += torch.sum(preds == target)
        self.total += target.numel()
    
    def compute(self):
        return self.correct.float() / self.total.float()

    
class NNlightning(L.LightningModule):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        self.loss_function = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
        self.myaccuracy = MyAccuracy()
        self.f1_score = torchmetrics.F1Score(task="multiclass", num_classes=num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def _common_step(self, batch, batch_idx, step_type: str):
        x, y = batch
        x = x.reshape(x.size(0), -1)
        y_hat = self(x)
        loss = self.loss_function(y_hat, y)
        self.log(f'{step_type}_loss', loss)
        return loss, y_hat, y
    
    def training_step(self, batch, batch_idx):
        loss, y_hat, y = self._common_step(batch, batch_idx, "train")
        accuracy = self.accuracy(y_hat, y)
        f1_score = self.f1_score(y_hat, y)
        myaccuracy = self.myaccuracy(y_hat, y)
        self.log_dict({'train_accuracy': accuracy, 'train_f1_score': f1_score, "train_myaccuracy": myaccuracy}, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        loss, y_hat, y = self._common_step(batch, batch_idx, "val")
        return loss
    
    def test_step(self, batch, batch_idx):
        loss, y_hat, y = self._common_step(batch, batch_idx, "test")
        return loss
    
    def predict_step(self, batch, batch_idx):
        loss, y_hat, y = self._common_step(batch, batch_idx, "test")
        return torch.argmax(y_hat)
    
    def on_train_epoch_end(self) -> None:
        return super().on_train_epoch_end()

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=learning_rate)

# class MNISTDataModule(pl.LightningDataModule):
#     def __init__(self, data_dir: str = "/path/to/data", batch_size: int = 64):
#         super().__init__()
#         self.data_dir = data_dir
#         self.batch_size = batch_size
#         self.transform = transforms.Compose([
#             transforms.ToTensor(),
#             transforms.Normalize((0.5,), (0.5,))
#         ])

#     def prepare_data(self):
#         MNIST(self.data_dir, train=True, download=True, transform=self.transform)
#         MNIST(self.data_dir, train=False, download=True, transform=self.transform)

#     def setup(self, stage=None):
#         self.mnist_train = MNIST(self.data_dir, train=True, transform=self.transform)
#         self.mnist_val = MNIST(self.data_dir, train=False, transform=self.transform)

#     def train_dataloader(self):
#         return DataLoader(self.mnist_train, batch_size=self.batch_size, shuffle=True)

#     def val_dataloader(self):
#         return DataLoader(self.mnist_val, batch_size=self.batch_size)

In [4]:
# Set device cuda for GPU if it's available otherwise run on the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [5]:
# Load Data
entire_dataset = datasets.MNIST(root="./MNIST_data", train=True, transform=transforms.ToTensor(), download=True)
train_ds, val_ds = random_split(entire_dataset, [50000, 10000])
test_ds = datasets.MNIST(root="./MNIST_data", train=False, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_ds, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_ds, batch_size=batch_size, shuffle=False)


In [6]:
# Initialize network
model = NNlightning(input_size=input_size, num_classes=num_classes).to(device)

In [None]:
# # Loss and optimizer
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# # Train Network
# for epoch in range(num_epochs):
#     for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
#         # Get data to cuda if possible
#         data = data.to(device=device)
#         targets = targets.to(device=device)

#         # Get to correct shape
#         data = data.reshape(data.shape[0], -1)

#         # Forward
#         scores = model(data)
#         loss = criterion(scores, targets)

#         # Backward
#         optimizer.zero_grad()
#         loss.backward()

#         # Gradient descent or adam step
#         optimizer.step()

In [7]:
trainer = L.Trainer(max_epochs=1)
trainer.fit(model, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/soumyapm/Scratchpad/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name          | Type               | Params | Mode 
-------------------------------------------------------------
0 | fc1           | Linear             | 39.2 K | train
1 | fc2           | Linear             | 510    | train
2 | loss_function | CrossEntropyLoss   | 0      | train
3 | accuracy      | MulticlassAccuracy | 0      | train
4 

                                                                           

/Users/soumyapm/Scratchpad/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:475: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/Users/soumyapm/Scratchpad/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/soumyapm/Scratchpad/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 782/782 [00:12<00:00, 64.19it/s, v_num=13, train_accuracy_step=1.000, train_f1_score_step=1.000, train_myaccuracy_step=1.000, train_accuracy_epoch=0.883, train_f1_score_epoch=0.883, train_myaccuracy_epoch=0.883]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 782/782 [00:12<00:00, 64.12it/s, v_num=13, train_accuracy_step=1.000, train_f1_score_step=1.000, train_myaccuracy_step=1.000, train_accuracy_epoch=0.883, train_f1_score_epoch=0.883, train_myaccuracy_epoch=0.883]


In [None]:
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    # We don't need to keep track of gradients here so we wrap it in torch.no_grad()
    with torch.no_grad():
        # Loop through the data
        for x, y in loader:

            # Move data to device
            x = x.to(device=device)
            y = y.to(device=device)

            # Get to correct shape
            x = x.reshape(x.shape[0], -1)

            # Forward pass
            scores = model(x)
            _, predictions = scores.max(1)

            # Check how many we got correct
            num_correct += (predictions == y).sum()

            # Keep track of number of samples
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples


# Check accuracy on training & test to see how good our model
model.to(device)
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
print(f"Accuracy on validation set: {check_accuracy(val_loader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")