In [1]:
%pip install -r requirements.txt

Collecting appnope==0.1.4 (from -r requirements.txt (line 1))
  Downloading appnope-0.1.4-py2.py3-none-any.whl.metadata (908 bytes)
Collecting certifi==2024.8.30 (from -r requirements.txt (line 3))
  Downloading certifi-2024.8.30-py3-none-any.whl.metadata (2.2 kB)
Collecting charset-normalizer==3.4.0 (from -r requirements.txt (line 4))
  Downloading charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)
Collecting comm==0.2.2 (from -r requirements.txt (line 5))
  Downloading comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)
Collecting contourpy==1.3.0 (from -r requirements.txt (line 6))
  Downloading contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.4 kB)
Collecting cycler==0.12.1 (from -r requirements.txt (line 7))
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting debugpy==1.8.7 (from -r requirements.txt (line 8))
  Downloading debugpy-1.8.7-cp310-cp310-manylinux_2_5_x86_64.manylinux

In [2]:
import torchvision.transforms as tt
from torch.utils.data import DataLoader, ConcatDataset
import torchvision

stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

# data from augmentation ablation here
basic_tfms = tt.Compose([tt.ToTensor(), tt.Normalize(*stats)])
train_fms = tt.Compose([tt.RandomCrop(32, padding=4, padding_mode='reflect'), 
                        tt.RandomHorizontalFlip(), 
                        tt.ToTensor(), 
                        tt.Normalize(*stats,inplace=True)])

batch_size = 1024

train_normal = torchvision.datasets.CIFAR10(root='./data', train=True, transform=basic_tfms, download=True)
train_ds = torchvision.datasets.CIFAR10(root='./data', train=True, transform=train_fms)

train_dataset = ConcatDataset([train_ds, train_normal])
train_dl = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=3, pin_memory=True)

val_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=basic_tfms)
valid_dl = DataLoader(val_dataset, batch_size*2, num_workers=3, pin_memory=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:02<00:00, 64.3MB/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
import torch

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device("cuda")
    # elif torch.backends.mps.is_available():
    #     return torch.device("mps")
    else:
        return torch.device("cpu")
    
def clear_cache():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    elif torch.backends.mps.is_available():
        torch.mps.empty_cache()
    # else:
    #     return torch.device("cpu")
    # appers there's nothing to do here
        
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
    
device = get_default_device()
print(f"running on {device}")

running on cuda


In [4]:
train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ResNet9(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        
        self.conv1 = self.conv_block(in_channels, 64)
        self.conv2 = self.conv_block(64, 128, pool=True)
        self.res1 = nn.Sequential(self.conv_block(128, 128), self.conv_block(128, 128))
        
        self.conv3 = self.conv_block(128, 256, pool=True)
        self.conv4 = self.conv_block(256, 512, pool=True)
        self.res2 = nn.Sequential(self.conv_block(512, 512), self.conv_block(512, 512))
        
        self.classifier = nn.Sequential(nn.MaxPool2d(4), 
                                        nn.Flatten(), 
                                        nn.Linear(512, num_classes))

    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  
        loss = F.cross_entropy(out, labels) 
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    
        loss = F.cross_entropy(out, labels)   
        acc = accuracy(out, labels)           
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))

    def conv_block(self, in_channels, out_channels, pool=False):
        layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
                nn.BatchNorm2d(out_channels), 
                nn.ReLU(inplace=True)]
        if pool: 
            layers.append(nn.MaxPool2d(2))
        return nn.Sequential(*layers)
        
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out

In [6]:
model = to_device(ResNet9(3, 10), device)

In [7]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader),
                                                pct_start=0.3)
    
    for epoch in range(epochs):
        # Training Phase 
        #print(f'Allocated: {torch.cuda.memory_allocated() / 1024 ** 2} MB')
        #print(f'Cached: {torch.cuda.memory_reserved() / 1024 ** 2} MB')
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            #print(f'Allocated: {torch.cuda.memory_allocated() / 1024 ** 2} MB')
            #print(f'Cached: {torch.cuda.memory_reserved() / 1024 ** 2} MB')
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            
            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()
        
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [8]:
epochs = 8
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

In [9]:
# testing epochs
history_map = {}
index = 0
history = []
for i in range(8,21):
    history = []
    model = to_device(ResNet9(3, 10), device)
    history += fit_one_cycle(i, max_lr, model, train_dl, valid_dl, 
                                 grad_clip=grad_clip, 
                                 weight_decay=weight_decay, 
                                 opt_func=opt_func)
    history_map[index] = history
    index += 1

Epoch [0], last_lr: 0.00392, train_loss: 1.1557, val_loss: 0.9121, val_acc: 0.6802
Epoch [1], last_lr: 0.00935, train_loss: 0.7715, val_loss: 1.0019, val_acc: 0.6827
Epoch [2], last_lr: 0.00972, train_loss: 0.6176, val_loss: 0.6538, val_acc: 0.7893
Epoch [3], last_lr: 0.00812, train_loss: 0.3642, val_loss: 0.6018, val_acc: 0.8011
Epoch [4], last_lr: 0.00556, train_loss: 0.2459, val_loss: 0.4182, val_acc: 0.8651
Epoch [5], last_lr: 0.00283, train_loss: 0.1520, val_loss: 0.2951, val_acc: 0.9032
Epoch [6], last_lr: 0.00077, train_loss: 0.0885, val_loss: 0.2665, val_acc: 0.9125
Epoch [7], last_lr: 0.00000, train_loss: 0.0561, val_loss: 0.2474, val_acc: 0.9196
Epoch [0], last_lr: 0.00327, train_loss: 1.2054, val_loss: 1.0240, val_acc: 0.6577
Epoch [1], last_lr: 0.00848, train_loss: 0.7267, val_loss: 1.2581, val_acc: 0.6532
Epoch [2], last_lr: 0.00994, train_loss: 0.6725, val_loss: 1.1960, val_acc: 0.6586
Epoch [3], last_lr: 0.00899, train_loss: 0.3791, val_loss: 0.6367, val_acc: 0.7977
Epoc

In [10]:
import pandas as pd

def compare_final_val_losses(names):
    # Create a list of final validation losses
    final_val_losses = [history_map[i][-1]['val_loss'] for i in range(len(history_map))]
    final_val_acc = [history_map[i][-1]['val_acc'] for i in range(len(history_map))]

    # Create a pandas DataFrame and display it
    df = pd.DataFrame({
        'Transformation': names,
        'Final Validation Loss': final_val_losses
    })
    print(df)
    print("*"*10)
    df = pd.DataFrame({
        'Transformation': names,
        'Final Validation Accuracy': final_val_acc
    })
    print(df)
    
compare_final_val_losses(range(8,21))

    Transformation  Final Validation Loss
0                8               0.247389
1                9               0.237656
2               10               0.250610
3               11               0.233113
4               12               0.230280
5               13               0.236255
6               14               0.244910
7               15               0.239803
8               16               0.238151
9               17               0.246307
10              18               0.239790
11              19               0.245538
12              20               0.234441
**********
    Transformation  Final Validation Accuracy
0                8                   0.919586
1                9                   0.923237
2               10                   0.923882
3               11                   0.929284
4               12                   0.927352
5               13                   0.928744
6               14                   0.929245
7               15               

In [13]:
# best lr

import numpy as np
best_epoch = 20
history_map = {}
index = 0

for i in np.arange(0.001, 0.011, 0.001):
    history = []
    model = to_device(ResNet9(3, 10), device)
    history += fit_one_cycle(best_epoch, i, model, train_dl, valid_dl, 
                                 grad_clip=grad_clip, 
                                 weight_decay=weight_decay, 
                                 opt_func=opt_func)
    history_map[index] = history
    index += 1

compare_final_val_losses(np.arange(0.001, 0.011, 0.001))

Epoch [0], last_lr: 0.00010, train_loss: 1.4311, val_loss: 1.0412, val_acc: 0.6289
Epoch [1], last_lr: 0.00028, train_loss: 0.8788, val_loss: 0.8245, val_acc: 0.7204
Epoch [2], last_lr: 0.00052, train_loss: 0.6469, val_loss: 0.9044, val_acc: 0.6986
Epoch [3], last_lr: 0.00076, train_loss: 0.4994, val_loss: 0.7206, val_acc: 0.7534
Epoch [4], last_lr: 0.00094, train_loss: 0.3980, val_loss: 0.7287, val_acc: 0.7598
Epoch [5], last_lr: 0.00100, train_loss: 0.3091, val_loss: 0.6891, val_acc: 0.7742
Epoch [6], last_lr: 0.00099, train_loss: 0.2449, val_loss: 0.5677, val_acc: 0.8226
Epoch [7], last_lr: 0.00095, train_loss: 0.1914, val_loss: 0.4617, val_acc: 0.8468
Epoch [8], last_lr: 0.00089, train_loss: 0.1525, val_loss: 0.4744, val_acc: 0.8458
Epoch [9], last_lr: 0.00081, train_loss: 0.1249, val_loss: 0.3626, val_acc: 0.8787
Epoch [10], last_lr: 0.00072, train_loss: 0.1034, val_loss: 0.4931, val_acc: 0.8459
Epoch [11], last_lr: 0.00061, train_loss: 0.0851, val_loss: 0.3752, val_acc: 0.8806
Ep

In [14]:
# best weight decay

import numpy as np
best_lr = 0.01
history_map = {}
index = 0

for i in np.arange(1e-4, 6e-4, 1e-4):
    history = []
    model = to_device(ResNet9(3, 10), device)
    history += fit_one_cycle(best_epoch, best_lr, model, train_dl, valid_dl, 
                                 grad_clip=grad_clip, 
                                 weight_decay=i, 
                                 opt_func=opt_func)
    history_map[index] = history
    index += 1

Epoch [0], last_lr: 0.00103, train_loss: 1.2177, val_loss: 0.9754, val_acc: 0.6544
Epoch [1], last_lr: 0.00279, train_loss: 0.6893, val_loss: 0.6192, val_acc: 0.7821
Epoch [2], last_lr: 0.00519, train_loss: 0.5214, val_loss: 0.7510, val_acc: 0.7533
Epoch [3], last_lr: 0.00759, train_loss: 0.4429, val_loss: 1.0602, val_acc: 0.7164
Epoch [4], last_lr: 0.00935, train_loss: 0.4036, val_loss: 0.8116, val_acc: 0.7471
Epoch [5], last_lr: 0.01000, train_loss: 0.3566, val_loss: 0.5698, val_acc: 0.8214
Epoch [6], last_lr: 0.00987, train_loss: 0.2759, val_loss: 0.8872, val_acc: 0.7623
Epoch [7], last_lr: 0.00950, train_loss: 0.2423, val_loss: 0.5699, val_acc: 0.8205
Epoch [8], last_lr: 0.00891, train_loss: 0.2010, val_loss: 0.5921, val_acc: 0.8188
Epoch [9], last_lr: 0.00812, train_loss: 0.1703, val_loss: 0.6214, val_acc: 0.8233
Epoch [10], last_lr: 0.00717, train_loss: 0.1625, val_loss: 0.4328, val_acc: 0.8683
Epoch [11], last_lr: 0.00611, train_loss: 0.1308, val_loss: 0.4220, val_acc: 0.8740
Ep

In [15]:
compare_final_val_losses(np.arange(1e-4, 6e-4, 1e-4))

   Transformation  Final Validation Loss
0          0.0001               0.244382
1          0.0002               0.242561
2          0.0003               0.233962
3          0.0004               0.232100
4          0.0005               0.220281
**********
   Transformation  Final Validation Accuracy
0          0.0001                   0.931589
1          0.0002                   0.932585
2          0.0003                   0.927938
3          0.0004                   0.932571
4          0.0005                   0.932130


In [17]:
# best optimizer

import numpy as np
best_weight_decay = 5e-4
history_map = {}

opt_func_list = ["Adam", "AdamW", "SGD"]

history = []
model = to_device(ResNet9(3, 10), device)
history += fit_one_cycle(best_epoch, best_lr, model, train_dl, valid_dl, 
                             grad_clip=grad_clip, 
                             weight_decay=best_weight_decay, 
                             opt_func=torch.optim.Adam)
history_map[0] = history

history = []
model = to_device(ResNet9(3, 10), device)
history += fit_one_cycle(best_epoch, best_lr, model, train_dl, valid_dl, 
                             grad_clip=grad_clip, 
                             weight_decay=best_weight_decay, 
                             opt_func=torch.optim.AdamW)
history_map[1] = history

history = []
model = to_device(ResNet9(3, 10), device)
history += fit_one_cycle(best_epoch, best_lr, model, train_dl, valid_dl, 
                             grad_clip=grad_clip, 
                             weight_decay=best_weight_decay, 
                             opt_func=torch.optim.SGD)
history_map[2] = history

compare_final_val_losses(opt_func_list)

Epoch [0], last_lr: 0.00103, train_loss: 1.2340, val_loss: 1.1875, val_acc: 0.5924
Epoch [1], last_lr: 0.00279, train_loss: 0.6994, val_loss: 0.8359, val_acc: 0.7318
Epoch [2], last_lr: 0.00519, train_loss: 0.5430, val_loss: 0.8894, val_acc: 0.7189
Epoch [3], last_lr: 0.00759, train_loss: 0.5108, val_loss: 1.0892, val_acc: 0.6441
Epoch [4], last_lr: 0.00935, train_loss: 0.4916, val_loss: 0.8881, val_acc: 0.6972
Epoch [5], last_lr: 0.01000, train_loss: 0.4443, val_loss: 1.0657, val_acc: 0.6696
Epoch [6], last_lr: 0.00987, train_loss: 0.3957, val_loss: 0.6484, val_acc: 0.7813
Epoch [7], last_lr: 0.00950, train_loss: 0.3591, val_loss: 0.7255, val_acc: 0.7660
Epoch [8], last_lr: 0.00891, train_loss: 0.3516, val_loss: 0.8823, val_acc: 0.7378
Epoch [9], last_lr: 0.00812, train_loss: 0.3243, val_loss: 0.5837, val_acc: 0.8051
Epoch [10], last_lr: 0.00717, train_loss: 0.3022, val_loss: 0.7355, val_acc: 0.7524
Epoch [11], last_lr: 0.00611, train_loss: 0.2748, val_loss: 0.5755, val_acc: 0.8122
Ep