In [41]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch, torch.nn as nn, torch.optim as optim
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import accuracy_score
import torch.optim.lr_scheduler as lr_sched


In [42]:
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2470, 0.2435, 0.2616)

def get_dataloaders(aug_type):
    if aug_type == "none":
        train_tf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
    elif aug_type == "basic":
        train_tf = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
    elif aug_type == "strong":
        train_tf = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.2,0.2,0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])

    test_tf = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    train_ds = torchvision.datasets.CIFAR10(
        "./data", train=True, download=True, transform=train_tf
    )
    test_ds = torchvision.datasets.CIFAR10(
        "./data", train=False, download=True, transform=test_tf
    )

    train_loader = DataLoader(train_ds, 128, shuffle=True)
    test_loader  = DataLoader(test_ds, 256, shuffle=False)
    return train_loader, test_loader


In [43]:
import torch.nn as nn
import torch.nn.functional as F
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Linear(128*8*8, 10)

    def forward(self, x):
        x = self.net(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)



In [44]:
def get_optimizer(opt_name, model, lr, wd):
    if opt_name == "sgd":
        return optim.SGD(model.parameters(), lr=lr,
                         momentum=0.9, weight_decay=wd)
    elif opt_name == "adam":
        return optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    elif opt_name == "adamw":
        return optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)


In [45]:
def get_scheduler(name, opt):
    if name == "none":
        return None
    elif name == "step":
        return lr_sched.StepLR(opt, 20, 0.1)
    elif name == "cosine":
        return lr_sched.CosineAnnealingLR(opt, T_max=50)


In [46]:
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
def train_one_epoch(model, loader, opt, use_amp, use_clip):
    model.train()
    correct = total = 0

    for x, y in loader:
        x, y = x.cuda(), y.cuda()
        opt.zero_grad()

        if use_amp:
            with autocast():
                out = model(x)
                loss = nn.CrossEntropyLoss()(out, y)
            scaler.scale(loss).backward()
            if use_clip:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt)
            scaler.update()
        else:
            out = model(x)
            loss = nn.CrossEntropyLoss()(out, y)
            loss.backward()
            if use_clip:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()

        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)

    return correct / total


  scaler = GradScaler()


In [47]:
def evaluate(model, loader):
    model.eval()

    correct = 0
    total = 0
    total_loss = 0.0

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for x, y in loader:
            x = x.cuda()
            y = y.cuda()

            out = model(x)
            loss = criterion(out, y)

            total_loss += loss.item()
            pred = out.argmax(dim=1)

            correct += (pred == y).sum().item()
            total += y.size(0)

    avg_loss = total_loss / len(loader)
    acc = correct / total

    return avg_loss, acc


In [None]:
def run_experiment(cfg):
    train_loader, test_loader = get_dataloaders(cfg["aug"])
    model = CNN().cuda()
    opt = get_optimizer(cfg["opt"], model, cfg["lr"], cfg["wd"])
    sched = get_scheduler(cfg["sched"], opt)

    best_acc = 0
    for epoch in range(10):
        train_one_epoch(model, train_loader, opt,
                        cfg["amp"], cfg["clip"])
        val_loss, val_acc = evaluate(model, test_loader)

        if sched: sched.step()
        best_acc = max(best_acc, val_acc)

    return best_acc


In [None]:
import pandas as pd

results = []

configs = [
    # baseline
    {"aug":"none","opt":"adam","wd":0,"sched":"none","lr":1e-3,"amp":False,"clip":False, "es":False},

    # data augmentation
    {"aug":"basic","opt":"adam","wd":0,"sched":"none","lr":1e-3,"amp":False,"clip":False, "es":False},
    {"aug":"strong","opt":"adam","wd":0,"sched":"none","lr":1e-3,"amp":False,"clip":False, "es":False},

    # optimizer + weight decay
    {"aug":"strong","opt":"adamw","wd":5e-4,"sched":"none","lr":1e-3,"amp":False,"clip":False, "es":False},

    # scheduler
    {"aug":"strong","opt":"adamw","wd":5e-4,"sched":"cosine","lr":1e-3,"amp":False,"clip":False, "es":False},

    # AMP + gradient clipping
    {"aug":"strong","opt":"adamw","wd":5e-4,"sched":"cosine","lr":1e-3,"amp":True,"clip":True, "es":False},

    # Early stopping
    {"aug":"strong","opt":"adamw","wd":5e-4,"sched":"cosine","lr":1e-3,"amp":True,"clip":True, "es":True},
]

for cfg in configs:
    print(cfg)
    best_acc = run_experiment(cfg)
    row = cfg.copy()
    row["best_acc"] = best_acc
    print(row)
    results.append(row)

df = pd.DataFrame(results)
print(df)


{'aug': 'none', 'opt': 'adam', 'wd': 0, 'sched': 'none', 'lr': 0.001, 'amp': False, 'clip': False, 'es': False}
i'm in run_exp
