<a href="https://colab.research.google.com/github/rahiakela/deep-learning-research-and-practice/blob/main/pytorch-lightning-in-practice/pytorch-lightning/episode_3_pytorch_lightning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Episode 3:PyTorch Lightning 

**Reference:**

https://www.youtube.com/watch?v=DbESHcCoWbM&list=PLKMvSHzKWRgLTDtAl3txcULbHiKvQTx6U&index=3

In [None]:
!pip install pytorch-lightning

In [4]:
import torch

from torch import nn
from torch import optim
from torchvision import datasets, transforms 
from torch.utils.data import random_split, DataLoader

import pytorch_lightning as pl

import pdb

In [3]:
torch.randn(5).cuda()

tensor([ 0.9580, -1.5952,  0.4923, -0.6948,  0.3215], device='cuda:0')

##Lightning

1. model
2. optimizer
3. data
4. training loop "the training magic"
5. validation loop "the validation magic"

In [9]:
class ResNet(pl.LightningModule):

  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28 * 28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)

    self.loss = nn.CrossEntropyLoss()

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1) # resudual connection
    logits = self.l3(do)
    return logits

  def configure_optimizers(self):
    optimizer = optim.SGD(self.parameters(), lr=1e-2)
    return optimizer

  def training_step(self, batch, batch_idx):
    x, y = batch

    # x: b x 1 x 28 x 28 (B*C*W*H)
    b = x.size(0)
    x = x.view(b, -1)

    # Step 1: forward
    logits = self(x)  # logits

    # Step 2: compute the objective function
    loss = self.loss(logits, y)
    return {"loss": loss}
    
  def train_dataloader(self):
    # define data loader
    train_data = datasets.MNIST("data", train=True, download=True, transform=transforms.ToTensor())
    # train, val = random_split(train_data, [55000, 5000])

    train_loader = DataLoader(train_data, batch_size=32)
    # val_loader = DataLoader(val, batch_size=8)
    return train_loader

model = ResNet()

In [None]:
trainer = pl.Trainer(max_epochs=10, accelerator='gpu', devices=1)
trainer.fit(model)

In [None]:
class ImageClassifier(nn.Module):
  def __init__(self):
      super().__init__()

In [None]:
# define optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr=1e-2)

In [None]:
# define loss
loss = nn.CrossEntropyLoss()

In [None]:
# define data loader
train_data = datasets.MNIST("data", train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])

train_loader = DataLoader(train, batch_size=8)
val_loader = DataLoader(val, batch_size=8)

In [None]:
# define training and validation step
epochs = 10

for epoch in range(epochs):
  # training loop
  losses = list()
  accuracies = list()
  model.train()  # set training mode because I use dropout
  for batch in train_loader:
    x, y = batch

    # x: b x 1 x 28 x 28 (B*C*W*H)
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # Step 1: forward
    l = model.cuda()(x)  # l: logits
    # pdb.set_trace()

    # Step 2: compute the objective function
    J = loss(l, y.cuda())

    # Step 3: cleaining the gradients
    model.zero_grad()
    # optimizer.zero_grad()
    # params.grad._zero()

    # Step 4: accumulate the partial derivative of loss wrt params
    J.backward()
    # params.grad.sum_(dL/dparams)

    # Step 5: step in the opposite direction of the gradient
    optimizer.step()
    # with torch.no_grad(): params = params - eta * params.grad

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f"Epoch: {epoch + 1}", end=", ")
  print(f"training loss: \t{torch.tensor(losses).mean():.2f}", end=", ")
  print(f"training accuracy: \t{torch.tensor(accuracies).mean():.2f}")

  # validation loop
  losses = list()
  accuracies = list()
  model.train()  # validation mode because I use dropout
  for batch in val_loader:
    x, y = batch

    # x: b x 1 x 28 x 28 (B*C*W*H)
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # Step 1: forward
    with torch.no_grad():
      l = model(x)

    # Step 2: compute the objective function
    J = loss(l, y.cuda())

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f"Epoch: {epoch + 1}", end=", ")
  print(f"validation loss: \t{torch.tensor(losses).mean():.2f}", end=", ")
  print(f"training accuracy: \t{torch.tensor(accuracies).mean():.2f}")

Epoch: 1, training loss: 	0.15, training accuracy: 	0.96
Epoch: 1, validation loss: 	0.14, training accuracy: 	0.96
Epoch: 2, training loss: 	0.13, training accuracy: 	0.96
Epoch: 2, validation loss: 	0.14, training accuracy: 	0.96
Epoch: 3, training loss: 	0.11, training accuracy: 	0.97
Epoch: 3, validation loss: 	0.12, training accuracy: 	0.96
Epoch: 4, training loss: 	0.10, training accuracy: 	0.97
Epoch: 4, validation loss: 	0.11, training accuracy: 	0.97
Epoch: 5, training loss: 	0.09, training accuracy: 	0.97
Epoch: 5, validation loss: 	0.11, training accuracy: 	0.97
Epoch: 6, training loss: 	0.08, training accuracy: 	0.98
Epoch: 6, validation loss: 	0.11, training accuracy: 	0.97
Epoch: 7, training loss: 	0.07, training accuracy: 	0.98
Epoch: 7, validation loss: 	0.11, training accuracy: 	0.97
Epoch: 8, training loss: 	0.07, training accuracy: 	0.98
Epoch: 8, validation loss: 	0.11, training accuracy: 	0.97
Epoch: 9, training loss: 	0.06, training accuracy: 	0.98
Epoch: 9, valid

##Debugging the accuracy

(Pdb) p l.size() >>
torch.Size([32, 10])

(Pdb) p l[0] >>
tensor([-1.2800, -3.7171, -3.1333,  1.7426,  1.8951,  9.9960,  0.2454, -8.6478,
         4.0316, -0.0792], device='cuda:0', grad_fn=<SelectBackward0>)

(Pdb) p l[0].detach().argmax() >>
tensor(5, device='cuda:0')

(Pdb) p l[0].detach().softmax(dim=0) >>
tensor([1.2632e-05, 1.1043e-06, 1.9798e-06, 2.5952e-04, 3.0230e-04, 9.9676e-01,
        5.8069e-05, 7.9746e-09, 2.5602e-03, 4.1974e-05], device='cuda:0')

(Pdb) p [f"{p:.2f}" for p in l[0].detach().softmax(dim=0)] >>
['0.00', '0.00', '0.00', '0.00', '0.00', '1.00', '0.00', '0.00', '0.00', '0.00']

(Pdb) p y.size() >>
torch.Size([32])

(Pdb) p y[:4] >>
tensor([5, 5, 4, 9])

(Pdb) p l.detach().argmax(dim=0)[:4] >>
tensor([ 8, 17,  4,  9], device='cuda:0')

(Pdb) p l.detach().argmax(dim=1)[:4] >>
tensor([5, 8, 4, 9], device='cuda:0')

(Pdb) p y[:4].eq(l.detach().argmax(dim=1).cpu()[:4]) >>
tensor([ True, False,  True,  True])

(Pdb) p y[:4].eq(l.detach().argmax(dim=1).cpu()[:4]).float().mean() >>
tensor(0.7500)

(Pdb) p y.eq(l.detach().argmax(dim=1).cpu()).float().mean() >>
tensor(0.9375)

(Pdb) q