In [1]:
import torch
import sys

sys.path.append("..")
from src.models import MyrtleNet
from src.image_data import get_cifar10_loaders

In [2]:
device = "cuda:1"

In [3]:
# model
model_cfg = {
    "architecture": "myrtle_net",
    "n_layers": 3,
    "residual_blocks": [0, 2],
}
model = MyrtleNet(**model_cfg).to(device)

In [4]:
# data
batch_size = 512
root = "/mnt/ssd/ronak/datasets/"

train_loader, val_loader = get_cifar10_loaders(batch_size, root)

Files already downloaded and verified
Files already downloaded and verified
50,000 training samples.
10,000 test samples.


In [20]:
# optim
max_iters = 500
optim_cfg = {
    "optimizer": "adam",
    "lr": 0.4,
}

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.0,
    momentum=0.9,
    weight_decay=0.0005,
)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=optim_cfg["lr"],
    steps_per_epoch=len(train_loader),
    total_steps=max_iters,
    anneal_strategy="linear",
    pct_start=0.2
)

In [21]:
# Run experiment.
grad_accumulation_steps = 1
print_interval = 20

model.train()
iter_num = 0
print("Training Loss")
print("-------------")
try:
    while iter_num < max_iters:
        for X, Y in train_loader:
            loss, logits = model(X.to(device), Y.to(device))
            loss = loss / grad_accumulation_steps
            if iter_num % print_interval == 0:
                print(f"{iter_num:03d}: {loss.item():0.4f}")
            loss.backward()
            if iter_num % grad_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad(set_to_none=True)
            iter_num += 1
            if iter_num > max_iters:
                break
except KeyboardInterrupt:
    print("Graceful Exit")

Training Loss
-------------
000: 2.6876
020: 5.2998
040: 6.2296
060: 5.1665
080: 7.1584
100: 4.5171
120: 4.2654
140: 5.1073
160: 7.9966
Graceful Exit
