In [1]:

# =======================================
# Q1: CIFAR-10 with Pretrained ViT (Tiny)
# =======================================

!pip install timm optuna -q

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import timm
import optuna
import gc

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Clear GPU memory if needed
gc.collect()
torch.cuda.empty_cache()


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hUsing device: cuda


In [2]:

# -----------------------------
# Data: CIFAR-10, Resize 224x224
# -----------------------------
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


100%|██████████| 170M/170M [00:13<00:00, 12.5MB/s]


In [3]:

# =============================
# Helper Functions: Train & Eval
# =============================
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct, total = 0, 0, 0
    loop = tqdm(loader, leave=False)
    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
        loop.set_description(f"Train acc {100*correct/total:.2f}")
    return 100 * correct / total, total_loss / len(loader)


def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total, total_loss / len(loader)


In [4]:

# =============================
# Train ViT-Tiny (Pretrained)
# =============================

# Use pretrained ViT-Tiny model from timm
model = timm.create_model("vit_tiny_patch16_224", pretrained=True, num_classes=10)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.05)
EPOCHS = 5  # ⚡ keep this small for limited Colab runtime
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

best_acc = 0
for epoch in range(1, EPOCHS + 1):
    train_acc, train_loss = train_one_epoch(model, trainloader, optimizer, criterion)
    test_acc, test_loss = evaluate(model, testloader, criterion)
    scheduler.step()

    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), "best_vit_tiny_plain.pth")

    print(f"Epoch [{epoch}/{EPOCHS}] "
          f"Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}% | Best: {best_acc:.2f}%")

print("✅ Training complete. Best Test Accuracy:", best_acc)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]



Epoch [1/5] Train Acc: 89.33% | Test Acc: 91.53% | Best: 91.53%




Epoch [2/5] Train Acc: 93.64% | Test Acc: 93.52% | Best: 93.52%




Epoch [3/5] Train Acc: 96.17% | Test Acc: 94.75% | Best: 94.75%




Epoch [4/5] Train Acc: 98.48% | Test Acc: 95.59% | Best: 95.59%




Epoch [5/5] Train Acc: 99.50% | Test Acc: 96.83% | Best: 96.83%
✅ Training complete. Best Test Accuracy: 96.83
