## Imports

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import sys
import numpy as np
import os

## Utilising GPU using Pytorch

In [2]:
# cpu-gpu
a = torch.randn((3, 4))
print(a.device)

device = torch.device("cuda")
a = a.to(device)
print(a.device)

# a more generic code
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

cpu
cuda:0


In [3]:
torch.cuda.is_available() 

True

In [4]:
!nvidia-smi

Sun Sep 18 11:55:15 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P0    26W /  70W |    612MiB / 15109MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Dataset and Transforms

In [5]:
train_transform = transforms.Compose([
  transforms.RandomCrop(32, padding=4),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
test_transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dset = torchvision.datasets.CIFAR10(root="data/", train=True, transform=train_transform, download=True)
test_dset = torchvision.datasets.CIFAR10(root="data/", train=False, transform=test_transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data/
Files already downloaded and verified


In [6]:
print(f"# of train samples: {len(train_dset)}")
print(f"# of test samples: {len(test_dset)}")

# of train samples: 50000
# of test samples: 10000


In [7]:
train_loader = DataLoader(train_dset, batch_size=100, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dset, batch_size=100, shuffle=False, num_workers=2)

In [8]:
print(f"# of train batches: {len(train_loader)}")
print(f"# of test batches: {len(test_loader)}")

# of train batches: 500
# of test batches: 100


In [9]:
print("sample i/o sizes")
data = next(iter(train_loader))
img, target = data
print(f"input size: {img.shape}")
print(f"output size: {target.shape}")

sample i/o sizes
input size: torch.Size([100, 3, 32, 32])
output size: torch.Size([100])


## LeNet

In [43]:
class LeNet(nn.Module):
  def __init__(self):
    super(LeNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
    self.conv2 = nn.Conv2d(6, 32, kernel_size=5)
    self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
    # TODO: missing input feature size
    self.fc1   = nn.Linear(64*5*5, 120)
    self.fc2   = nn.Linear(120, 84)
    # TODO: missing output feature size
    self.fc3   = nn.Linear(84, 10)
    self.activ = nn.ReLU()

  # TODO: add maxpool operation of given kernel size
  # https://pytorch.org/docs/stable/nn.functional.html
  def pool(self, x, kernel_size=2):
    out = F.max_pool2d(x,kernel_size)
    return out

  def forward(self, x):
    out = self.activ(self.conv1(x))
    out = self.pool(out)
    out = self.activ(self.conv2(out))
    out = self.pool(out)
    out = self.activ(self.conv3(out))
    out = self.pool(out)


    # TODO: flatten
    out = out.view(out.size(0),-1)
    out = self.activ(self.fc1(out))
    out = self.activ(self.fc2(out))
    out = self.fc3(out)
    return out

## Utility functions (can ignore)

In [20]:
def pbar(p=0, msg="", bar_len=20):
    sys.stdout.write("\033[K")
    sys.stdout.write("\x1b[2K" + "\r")
    block = int(round(bar_len * p))
    text = "Progress: [{}] {}% {}".format(
        "\x1b[32m" + "=" * (block - 1) + ">" + "\033[0m" + "-" * (bar_len - block),
        round(p * 100, 2),
        msg,
    )
    print(text, end="\r")
    if p == 1:
        print()


class AvgMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.metrics = {}

    def add(self, batch_metrics):
        if self.metrics == {}:
            for key, value in batch_metrics.items():
                self.metrics[key] = [value]
        else:
            for key, value in batch_metrics.items():
                self.metrics[key].append(value)

    def get(self):
        return {key: np.mean(value) for key, value in self.metrics.items()}

    def msg(self):
        avg_metrics = {key: np.mean(value) for key, value in self.metrics.items()}
        return "".join(["[{}] {:.5f} ".format(key, value) for key, value in avg_metrics.items()])

## Training

In [21]:
def train(model, optim, lr_sched=None, epochs=20, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), criterion=None, metric_meter=None, out_dir="out/"):
  model.to(device)
  best_acc = 0
  for epoch in range(epochs):
    model.train()
    metric_meter.reset()
    for indx, (img, target) in enumerate(train_loader):
      # TODO: send to device (cpu or gpu)
      img = img.to(device)
      target = target.to(device)

      # TODO: missing forward pass
      out = model(img)
      loss = criterion(out, target)
      # TODO: missing backward, parameter update
      optim.zero_grad()
      loss.backward()
      optim.step()
      metric_meter.add({"train loss": loss.item()})
      pbar(indx / len(train_loader), msg=metric_meter.msg())
    pbar(1, msg=metric_meter.msg())

    model.eval()
    metric_meter.reset()
    for indx, (img, target) in enumerate(test_loader):
      # TODO: send to device (cpu or gpu)
      img = img.to(device)
      target = target.to(device)

      # TODO: missing forward pass
      out = model(img)
      loss = criterion(out, target)
      # TODO: compute accuracy
      acc = (out.argmax(1) == target).type(torch.float).sum().item()

      metric_meter.add({"test loss": loss.item(), "test acc": acc})
      pbar(indx / len(test_loader), msg=metric_meter.msg())
    pbar(1, msg=metric_meter.msg())
    
    test_metrics = metric_meter.get()
    if test_metrics["test acc"] > best_acc:
      print(
          "\x1b[33m"
          + f"test acc improved from {round(best_acc, 5)} to {round(test_metrics['test acc'], 5)}"
          + "\033[0m"
      )
      best_acc = test_metrics['test acc']
      torch.save(model.state_dict(), os.path.join(out_dir, "best.ckpt"))
    lr_sched.step()

## Run Experiments

In [22]:
def run_experiment(model_name="lenet", model_cfg=None, epochs=20):
  if model_name == "lenet":
    model = LeNet()
  optim = torch.optim.SGD(model.parameters(), lr=1e-1, momentum=0.9, weight_decay=5e-4)
  lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max=epochs)
  criterion = nn.CrossEntropyLoss()
  metric_meter = AvgMeter()
  out_dir = f"{model_name}_{model_cfg}"
  os.makedirs(out_dir, exist_ok=True)
  train(model, optim, lr_sched, epochs=epochs, criterion=criterion, metric_meter=metric_meter, out_dir=out_dir)

In [23]:
run_experiment(model_name="lenet")

[33mtest acc improved from 0 to 25.95[0m
[33mtest acc improved from 25.95 to 29.02[0m
[33mtest acc improved from 29.02 to 33.49[0m
[33mtest acc improved from 33.49 to 36.34[0m
[33mtest acc improved from 36.34 to 36.95[0m
[33mtest acc improved from 36.95 to 37.38[0m
[33mtest acc improved from 37.38 to 43.42[0m
[33mtest acc improved from 43.42 to 44.17[0m
[33mtest acc improved from 44.17 to 44.66[0m
[33mtest acc improved from 44.66 to 46.27[0m
[33mtest acc improved from 46.27 to 48.96[0m
[33mtest acc improved from 48.96 to 53.62[0m
[33mtest acc improved from 53.62 to 56.95[0m
[33mtest acc improved from 56.95 to 58.1[0m
[33mtest acc improved from 58.1 to 59.24[0m
[33mtest acc improved from 59.24 to 61.27[0m
[33mtest acc improved from 61.27 to 62.81[0m
[33mtest acc improved from 62.81 to 63.75[0m
[33mtest acc improved from 63.75 to 64.48[0m


16->32  Accuracy Moved from 62.57% to 64.48%


In [37]:
def run_experiment(model_name="lenet", model_cfg=None, epochs=20):
  if model_name == "lenet":
    model = LeNet()
  optim = torch.optim.SGD(model.parameters(), lr=1e-1, momentum=0.9, weight_decay=5e-4)
  lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max=epochs)
  criterion = nn.CrossEntropyLoss()
  metric_meter = AvgMeter()
  out_dir = f"{model_name}_{model_cfg}"
  os.makedirs(out_dir, exist_ok=True)
  train(model, optim, lr_sched, epochs=epochs, criterion=criterion, metric_meter=metric_meter, out_dir=out_dir)

In [42]:
run_experiment(model_name="lenet")

[33mtest acc improved from 0 to 27.5[0m
[33mtest acc improved from 27.5 to 28.54[0m
[33mtest acc improved from 28.54 to 32.68[0m
[33mtest acc improved from 32.68 to 40.93[0m
[33mtest acc improved from 40.93 to 41.67[0m
[33mtest acc improved from 41.67 to 43.41[0m
[33mtest acc improved from 43.41 to 44.27[0m
[33mtest acc improved from 44.27 to 46.65[0m
[33mtest acc improved from 46.65 to 49.88[0m
[33mtest acc improved from 49.88 to 52.4[0m
[33mtest acc improved from 52.4 to 56.5[0m
[33mtest acc improved from 56.5 to 59.38[0m
[33mtest acc improved from 59.38 to 63.02[0m
[33mtest acc improved from 63.02 to 65.2[0m
[33mtest acc improved from 65.2 to 67.8[0m
[33mtest acc improved from 67.8 to 69.45[0m
[33mtest acc improved from 69.45 to 70.31[0m
[33mtest acc improved from 70.31 to 70.96[0m
[33mtest acc improved from 70.96 to 71.33[0m


32->64  Accuracy Moved from  64.48% to 71.33
