<a href="https://colab.research.google.com/github/rahiakela/deep-learning-research-and-practice/blob/main/pytorch-lightning-in-practice/pytorch-lightning/episode_1_training_classification_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Episode 1: Training a classification model on MNIST with PyTorch

**Reference:**

https://www.youtube.com/watch?v=OMDn66kM9Qc&list=PLaMu-SDt_RB5hhJKZC5a6HPdlDTawUT3r

In [13]:
import torch

from torch import nn
from torch import optim
from torchvision import datasets, transforms 
from torch.utils.data import random_split, DataLoader

In [14]:
torch.randn(5).cuda()

tensor([-2.3014, -1.3125, -1.4651,  0.9658, -0.7355], device='cuda:0')

In [15]:
# define model
model2 = nn.Sequential(
  nn.Linear(28 * 28, 64),
  nn.ReLU(),
  nn.Linear(64, 64),
  nn.ReLU(),
  nn.Linear(64, 10)
)

In [24]:
class ResNet(nn.Module):

  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28 * 28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits

model = ResNet().cuda()  # move model to GPU

In [26]:
# define optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr=1e-2)

In [27]:
# define loss
loss = nn.CrossEntropyLoss()

In [28]:
# define data loader
train_data = datasets.MNIST("data", train=True, download=False, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])

train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

In [29]:
# define training and validation step
epochs = 5

for epoch in range(epochs):
  # training loop
  losses = list()
  accuracies = list()
  model.train()  # training mode because I use dropout
  for batch in train_loader:
    x, y = batch

    # x: b x 1 x 28 x 28 (B*C*W*H)
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # Step 1: forward
    l = model(x)  # l: logits

    # Step 2: compute the objective function
    J = loss(l, y.cuda())

    # Step 3: cleaining the gradients
    model.zero_grad()
    # optimizer.zero_grad()
    # params.grad._zero()

    # Step 4: accumulate the partial derivative of loss wrt params
    J.backward()
    # params.grad.sum_(dL/dparams)

    # Step 5: step in the opposite direction of the gradient
    optimizer.step()
    # with torch.no_grad(): params = params - eta * params.grad

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f"Epoch: {epoch + 1},", end=", ")
  print(f"training loss: {torch.tensor(losses).mean():.2f}", end=", ")
  print(f"training accuracy: {torch.tensor(accuracies).mean():.2f}")

  # validation loop
  losses = list()
  accuracies = list()
  model.train()  # validation mode because I use dropout
  for batch in val_loader:
    x, y = batch

    # x: b x 1 x 28 x 28 (B*C*W*H)
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # Step 1: forward
    with torch.no_grad():
      l = model(x)

    # Step 2: compute the objective function
    J = loss(l, y.cuda())

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f"Epoch: {epoch + 1},", end=", ")
  print(f"validation loss: {torch.tensor(losses).mean():.2f}", end=", ")
  print(f"training accuracy: {torch.tensor(accuracies).mean():.2f}")

Epoch: 1,, training loss: 0.86, training accuracy: 0.77
Epoch: 1,, validation loss: 0.42, training accuracy: 0.89
Epoch: 2,, training loss: 0.38, training accuracy: 0.89
Epoch: 2,, validation loss: 0.33, training accuracy: 0.91
Epoch: 3,, training loss: 0.31, training accuracy: 0.91
Epoch: 3,, validation loss: 0.28, training accuracy: 0.92
Epoch: 4,, training loss: 0.27, training accuracy: 0.92
Epoch: 4,, validation loss: 0.25, training accuracy: 0.93
Epoch: 5,, training loss: 0.24, training accuracy: 0.93
Epoch: 5,, validation loss: 0.22, training accuracy: 0.93
