<a href="https://colab.research.google.com/github/laurefindele-o-catto/ML-Projects/blob/main/CIFAR-10/Model%20using%20resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision as tv
import torchvision.transforms as T
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
import PIL
from PIL import Image
from pathlib import Path
import seaborn as sns
import sys
import math
import time

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

Using device: cuda
GPU name: Tesla T4


**Helper Functions**

In [None]:
def mixup_data(x, y, alpha = 0.1):
  if alpha <= 0:
    return x, y, y, 1.0

  lam = np.random.beta(alpha, alpha)
  batch_size = x.size(0)
  index = torch.randperm(batch_size).to(x.device)
  mixed_x = lam*x + (1-lam) * x[index, :]

def cutmix_data(x, y, alpha = 1.0):
  if alpha <= 0:
    return x, y, y, 1.0

  lam = np.random.beta(alpha, alpha)
  batch_size, _, H, W = x.size()
  index = torch.randperm(batch_size).to(x.device)

  cut_rat = np.sqrt(1. - lam)
  cut_w, cut_h = int(W * cut_rat), int(H * cut_rat)
  cx, cy = np.random.randint(W), np.random.randint(H)
  x1, x2 = np.clip(cx - cut_w // 2, 0, W), np.clip(cx + cut_w // 2, 0, W)
  y1, y2 = np.clip(cy - cut_h // 2, 0, H), np.clip(cy + cut_h // 2, 0, H)

  x[:, :, y1:y2, x1:x2] = x[index, :, y1:y2, x1:x2]

  lam = 1 - ((x2 - x1) * (y2-y1) / (W*H) )
  y_a, y_b = y, y[index]

  return x, y_a, y_b, lam


def mixup_cutmix_criterion(criterion, pred, y_a, y_b, lam):
  return lam * criterion(pred, y_a) + (1 - lam)*criterion(pred, y_b)

**Model Using Resnet**

In [None]:
CIFAR10_MEAN = (0.4914, 0.4822, 0.4456)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

transform_train = T.Compose([
    T.RandomCrop(32, padding = 4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = T.Compose([
    T.ToTensor(),
    T.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

trainset = tv.datasets.CIFAR10(root='./data', train = True, download = True, transform = transform_train)
testset = tv.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = 128, shuffle = True, num_workers = 2, pin_memory = True)
test_loader = torch.utils.data.DataLoader(testset, batch_size = 256, shuffle = False, num_workers = 2, pin_memory = True)

100%|██████████| 170M/170M [00:04<00:00, 36.6MB/s]


**Training Setup**

In [None]:
def accuracy(logits, targets):
  return (logits.argmax(1) == targets).float().mean().item() * 100.0

def train_resnet(model, train_loader, test_loader, epochs = 200, base_lr = 0.1, weight_decay = 5e-4, label_smoothing = 0.1, device = device):
  model = model.to(device)

  criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)
  optimizer = torch.optim.SGD(model.parameters(), lr = base_lr, momentum=0.9, weight_decay = weight_decay, nesterov = True)
  scheduler = torch.optimc.lr_scheduler.CosineAnnealingLR(optimizer, T_max = epochs, eta_min = base_lr*1e-2)

  train_hist, test_hist = [], []

  for epoch in range(1, epochs+1):
    model.train()
    total, correct, running = 0, 0, 0.0
    for x, y in train_loader:
      x, y = x.to(device), y.to(device)
      optimizer.zero_grad()
      logits = model(x)
      loss = criterion(logits, y)
      loss.backward()
      optimizer.step()

      running += loss.item() * x.size(0)
      total += x.size(0)
      correct += (logits.argmax(1) == y).sum().item()

    train_loss = running/total
    train_acc = 100.0 * correct/total

    model.eval()
    total, correct, runnin = 0, 0, 0.0

    with torch.no_grad():
      for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)
        running += loss.item() * x.size(0)
        total += x.size(0)
        correct += (logits.argmax(1) == y).sum().item()

    test_loss = running/total
    test_acc = 100.0*correct/total

    scheduler.step()
    train_hist.append(train_acc)
    test_hist.append(test_acc)
    print(f"Epoch [{epoch:3d}/{epochs}] LR {scheduler.get_last_lr()[0]:.5f} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

  return train_hist, test_hist

In [None]:
def train_resnet_adv(model, train_loader, test_loader, epochs=200, base_lr=0.1,
                     weight_decay=5e-4, label_smoothing=0.1,
                     use_mixup=False, use_cutmix=False, alpha=1.0, device=None):
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)
    optimizer = torch.optim.SGD(model.parameters(), lr=base_lr, momentum=0.9,
                                weight_decay=weight_decay, nesterov=True)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=base_lr, epochs=epochs,
        steps_per_epoch=len(train_loader), pct_start=0.1,
        div_factor=25.0, final_div_factor=1e3
    )

    train_hist, test_hist = [], []
    for epoch in range(1, epochs+1):
        model.train()
        total, correct = 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            # Apply MixUp or CutMix
            if use_mixup:
                x, y_a, y_b, lam = mixup_data(x, y, alpha)
            elif use_cutmix:
                x, y_a, y_b, lam = cutmix_data(x, y, alpha)
            else:
                y_a, y_b, lam = y, y, 1.0

            optimizer.zero_grad()
            logits = model(x)
            loss = mixup_cutmix_criterion(criterion, logits, y_a, y_b, lam)
            loss.backward()
            optimizer.step()
            scheduler.step()

            total += y.size(0)
            correct += (logits.argmax(1) == y).sum().item()

        train_acc = 100.0 * correct / total

        # Evaluate
        model.eval()
        total, correct = 0, 0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.to(device), y.to(device)
                logits = model(x)
                total += y.size(0)
                correct += (logits.argmax(1) == y).sum().item()
        test_acc = 100.0 * correct / total

        train_hist.append(train_acc)
        test_hist.append(test_acc)
        print(f"Epoch [{epoch:3d}/{epochs}] LR {scheduler.get_last_lr()[0]:.5f} | "
              f"Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

    return train_hist, test_hist

In [None]:
model = resnet18_cifar(num_classes = 10)
train_hist, test_hist = train_resnet(model, train_loader, test_loader, epochs = 100, base_lr=0/1)

In [None]:
model = resnet18_cifar()
# MixUp
train_hist, test_hist = train_resnet_adv(model, train_loader, test_loader,
                                         epochs=100, base_lr=0.1,
                                         use_mixup=True, alpha=1.0)

# CutMix
model = resnet18_cifar()
train_hist, test_hist = train_resnet_adv(model, train_loader, test_loader,
                                         epochs=100, base_lr=0.1,
                                         use_cutmix=True, alpha=1.0)