Level 1


In [None]:
# Important Pre-requisites
import random
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
from sklearn.model_selection import train_test_split


In [None]:
#Due to some Hugging Face API Rate limits we did the installation od dataset locally (int the next block in detailed)
!wget -q http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz
!wget -q http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat


In [None]:
!wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz
!wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat
!tar -xzf 102flowers.tgz
!ls jpg | head


In [None]:
import os
import numpy as np
import scipy.io

# load labels from Oxford .mat file
mat = scipy.io.loadmat("imagelabels.mat")
labels = mat["labels"].squeeze() - 1   # convert from 1–102 to 0–101

print("Total labels:", len(labels))
print("Unique classes:", len(set(labels)))

# load image filenames
img_dir = "jpg"
img_files = sorted(os.listdir(img_dir))

print("Total images:", len(img_files))

# checkpoint to make sure we are going well
assert len(img_files) == len(labels), "Images and labels count mismatch"
print("Image-label alignment OK")


In [None]:
from sklearn.model_selection import train_test_split

idx = np.arange(len(img_files))

# first split: 80% train, 20% temp
train_idx, temp_idx = train_test_split(
    idx,
    test_size=0.2,
    stratify=labels,
    random_state=42
)

temp_labels = labels[temp_idx]

# second split: 10% val, 10% test
val_idx, test_idx = train_test_split(
    temp_idx,
    test_size=0.5,
    stratify=temp_labels,
    random_state=42
)

print("Train:", len(train_idx))
print("Val:", len(val_idx))
print("Test:", len(test_idx))

# verifying proportions
n = len(idx)
print("Ratios:",
      len(train_idx)/n,
      len(val_idx)/n,
      len(test_idx)/n)



In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_tfms = A.Compose([
    A.RandomResizedCrop((224, 224), scale=(0.7, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(0.2, 0.2, 0.2, 0.1),
    A.Normalize(),
    ToTensorV2()
])

val_tfms = A.Compose([
    A.Resize(256, 256),
    A.CenterCrop(224, 224),
    A.Normalize(),
    ToTensorV2()
])



In [None]:
from torch.utils.data import Dataset
import cv2

class FlowersDataset(Dataset):
    def __init__(self, img_files, labels, indices, tfms):
        self.img_files = img_files
        self.labels = labels
        self.indices = indices
        self.tfms = tfms

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, i):
        idx = self.indices[i]
        img_path = os.path.join("jpg", self.img_files[idx])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        y = int(self.labels[idx])

        out = self.tfms(image=img)
        x = out["image"]

        return x, y



In [None]:
from torch.utils.data import DataLoader

train_ds = FlowersDataset(img_files, labels, train_idx, train_tfms)
val_ds   = FlowersDataset(img_files, labels, val_idx, val_tfms)
test_ds  = FlowersDataset(img_files, labels, test_idx, val_tfms)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)


In [None]:
import torch

x, y = next(iter(train_loader))
print("Batch shape:", x.shape)
print("Labels shape:", y.shape)
print("Classes in batch:", y.unique())



In [None]:
import timm
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

model = timm.create_model("resnet50", pretrained=True, num_classes=102)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)


In [None]:
from tqdm import tqdm

def run_epoch(loader, training):
    if training:
        model.train()
    else:
        model.eval()

    total_loss = 0
    correct = 0
    total = 0

    for x, y in tqdm(loader, leave=False):
        x = x.to(device)
        y = y.to(device)

        if training:
            optimizer.zero_grad()

        with torch.set_grad_enabled(training):
            out = model(x)
            loss = criterion(out, y)

            if training:
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * x.size(0)
        preds = out.argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    return total_loss / total, correct / total


In [None]:
best_val = 0

for epoch in range(1, 11):
    train_loss, train_acc = run_epoch(train_loader, True)
    val_loss, val_acc = run_epoch(val_loader, False)

    print(f"Epoch {epoch:02d} | "
          f"train acc {train_acc:.4f} | "
          f"val acc {val_acc:.4f}")

    if val_acc > best_val:
        best_val = val_acc
        torch.save(model.state_dict(), "best_resnet50.pth")


In [None]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix

# load best weights
model.load_state_dict(torch.load("best_resnet50.pth"))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)

        out = model(x)
        preds = out.argmax(1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(y.cpu().numpy())

all_preds = np.concatenate(all_preds)
all_labels = np.concatenate(all_labels)

test_acc = (all_preds == all_labels).mean()
print(f"TEST ACCURACY: {test_acc:.4f}")


In [None]:
cm = confusion_matrix(all_labels, all_preds)
print("Confusion matrix shape:", cm.shape)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,8))
plt.imshow(cm, cmap="viridis")
plt.title("Flowers-102 Confusion Matrix (ResNet-50)- Dhruv Pandita")
plt.colorbar()
plt.tight_layout()
plt.show()


Level 2


In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_tfms_l2 = A.Compose([
    A.RandomResizedCrop((224, 224), scale=(0.65, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(0.25, 0.25, 0.25, 0.1),
    A.HueSaturationValue(10, 15, 10, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=1, max_height=32, max_width=32, p=0.4),
    A.Normalize(),
    ToTensorV2()
])


In [None]:
train_ds_l2 = FlowersDataset(img_files, labels, train_idx, train_tfms_l2)
val_ds_l2   = FlowersDataset(img_files, labels, val_idx, val_tfms)

train_loader_l2 = DataLoader(train_ds_l2, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader_l2   = DataLoader(val_ds_l2, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)


In [None]:
import torch.nn as nn
from torch.optim.lr_scheduler import CosineAnnealingLR

criterion_l2 = nn.CrossEntropyLoss(label_smoothing=0.1)

optimizer_l2 = torch.optim.AdamW(
    model.parameters(),
    lr=3e-4,
    weight_decay=1e-4
)

scheduler = CosineAnnealingLR(optimizer_l2, T_max=10)


In [None]:
best_val_l2 = 0

for epoch in range(1, 11):
    train_loss, train_acc = run_epoch(train_loader_l2, True)
    val_loss, val_acc = run_epoch(val_loader_l2, False)

    scheduler.step()

    print(f"L2 Epoch {epoch:02d} | train {train_acc:.4f} | val {val_acc:.4f}")

    if val_acc > best_val_l2:
        best_val_l2 = val_acc
        torch.save(model.state_dict(), "best_resnet50_l2.pth")


In [None]:
model.load_state_dict(torch.load("best_resnet50_l2.pth"))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)

        out = model(x)
        preds = out.argmax(1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(y.cpu().numpy())

all_preds = np.concatenate(all_preds)
all_labels = np.concatenate(all_labels)

test_acc_l2 = (all_preds == all_labels).mean()
print(f"L2 TEST ACCURACY: {test_acc_l2:.4f}")


Level 3


In [None]:
import torch
import torch.nn as nn

class SpatialAttention(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=1)

    def forward(self, x):
        attn = torch.sigmoid(self.conv(x))   # B×1×H×W
        return x * attn, attn


In [None]:
import timm

class ResNet50WithAttention(nn.Module):
    def __init__(self, num_classes=102):
        super().__init__()
        self.backbone = timm.create_model(
            "resnet50", pretrained=True, features_only=True
        )
        self.attn = SpatialAttention(2048)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        feats = self.backbone(x)[-1]      # B×2048×H×W
        feats, attn = self.attn(feats)
        pooled = self.pool(feats).squeeze(-1).squeeze(-1)
        out = self.fc(pooled)
        return out, attn



In [None]:
model_l3 = ResNet50WithAttention().to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model_l3.parameters(), lr=3e-4)



In [None]:
def run_epoch_l3(loader, training):
    model_l3.train() if training else model_l3.eval()

    total_loss, correct, total = 0, 0, 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        if training:
            optimizer.zero_grad()

        with torch.set_grad_enabled(training):
            out, _ = model_l3(x)
            loss = criterion(out, y)
            if training:
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * x.size(0)
        preds = out.argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    return total_loss / total, correct / total


In [None]:
best_val = 0

for epoch in range(1, 9):
    tr_loss, tr_acc = run_epoch_l3(train_loader_l2, True)
    val_loss, val_acc = run_epoch_l3(val_loader_l2, False)

    print(f"L3 {epoch} | train {tr_acc:.4f} | val {val_acc:.4f}")

    if val_acc > best_val:
        best_val = val_acc
        torch.save(model_l3.state_dict(), "resnet50_attention_l3.pth")


In [None]:
import numpy as np
import torch

# load best L3 model
model_l3.load_state_dict(torch.load("resnet50_attention_l3.pth"))
model_l3.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)

        out, _ = model_l3(x)
        preds = out.argmax(1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(y.cpu().numpy())

all_preds = np.concatenate(all_preds)
all_labels = np.concatenate(all_labels)

test_acc_l3 = (all_preds == all_labels).mean()
print(f"L3 TEST ACCURACY: {test_acc_l3:.4f}")


In [None]:
import numpy as np

num_classes = 102

class_correct = np.zeros(num_classes)
class_total = np.zeros(num_classes)

for p, y in zip(all_preds, all_labels):
    class_total[y] += 1
    if p == y:
        class_correct[y] += 1

class_acc = class_correct / class_total

# top 10 easiest classes
easy = np.argsort(class_acc)[-10:][::-1]

# top 10 hardest classes
hard = np.argsort(class_acc)[:10]

print("Easiest classes:", easy)
print("Their accuracies:", class_acc[easy])

print("Hardest classes:", hard)
print("Their accuracies:", class_acc[hard])


In [None]:
import matplotlib.pyplot as plt

x_vis, y_vis = next(iter(test_loader))
x_vis = x_vis.to(device)

model_l3.eval()
with torch.no_grad():
    _, attn = model_l3(x_vis)

attn = attn.cpu()

# visualize first 5 samples
for i in range(5):
    img = x_vis[i].cpu().permute(1,2,0).numpy()
    heat = attn[i,0].numpy()

    plt.figure(figsize=(6,3))
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.title("Input")

    plt.subplot(1,2,2)
    plt.imshow(heat, cmap="hot")
    plt.title("Attention map")

    plt.show()


In [None]:
# storage
activations = None
gradients = None

def save_acts(module, inp, out):
    global activations
    activations = out

def save_grads(module, grad_in, grad_out):
    global gradients
    gradients = grad_out[0]

# hook the last feature block
target_layer = model_l3.backbone.feature_info[-1]["module"]
layer = dict(model_l3.backbone.named_modules())[target_layer]

layer.register_forward_hook(save_acts)
layer.register_backward_hook(save_grads)


In [None]:
x_gc, y_gc = next(iter(test_loader))
x_gc = x_gc.to(device)
y_gc = y_gc.to(device)

model_l3.zero_grad()
out, _ = model_l3(x_gc)

pred = out.argmax(1)
score = out[0, pred[0]]
score.backward()


In [None]:
import numpy as np
import cv2

g = gradients[0].cpu().numpy()   # C×H×W
a = activations[0].detach().cpu().numpy()


weights = g.mean(axis=(1,2))

cam = np.zeros(a.shape[1:], dtype=np.float32)
for i, w in enumerate(weights):
    cam += w * a[i]

cam = np.maximum(cam, 0)
cam = cam / cam.max()
cam = cv2.resize(cam, (224,224))


In [None]:
import matplotlib.pyplot as plt

img = x_gc[0].cpu().permute(1,2,0).numpy()
img = (img - img.min()) / (img.max() - img.min())

plt.figure(figsize=(6,3))

plt.subplot(1,2,1)
plt.imshow(img)
plt.title("Original")

plt.subplot(1,2,2)
plt.imshow(img)
plt.imshow(cam, cmap="jet", alpha=0.5)
plt.title("Grad-CAM")

plt.show()


Level 4


In [None]:
# load L1
model_l1 = timm.create_model("resnet50", pretrained=False, num_classes=102)
model_l1.load_state_dict(torch.load("best_resnet50.pth"))
model_l1.to(device).eval()

# load L2
model_l2 = timm.create_model("resnet50", pretrained=False, num_classes=102)
model_l2.load_state_dict(torch.load("best_resnet50_l2.pth"))
model_l2.to(device).eval()

# load L3 (attention)
model_l3 = ResNet50WithAttention()
model_l3.load_state_dict(torch.load("resnet50_attention_l3.pth"))
model_l3.to(device).eval()


In [None]:
import torch.nn.functional as F
import numpy as np

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)

        p1 = F.softmax(model_l1(x), dim=1)
        p2 = F.softmax(model_l2(x), dim=1)
        p3, _ = model_l3(x)
        p3 = F.softmax(p3, dim=1)

        p = (p1 + p2 + p3) / 3
        preds = p.argmax(1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(y.numpy())

all_preds = np.concatenate(all_preds)
all_labels = np.concatenate(all_labels)

ensemble_acc = (all_preds == all_labels).mean()
print(f"ENSEMBLE TEST ACCURACY: {ensemble_acc:.4f}")


Level 5


In [None]:
student = timm.create_model("mobilenetv3_large_100", pretrained=True, num_classes=102)
student = student.to(device)


In [None]:
import torch.nn.functional as F

def distill_loss(student_logits, teacher_probs, y, T=4):
    soft = F.kl_div(
        F.log_softmax(student_logits / T, dim=1),
        teacher_probs,
        reduction="batchmean"
    ) * (T*T)

    hard = F.cross_entropy(student_logits, y)
    return 0.7 * soft + 0.3 * hard


In [None]:
import torch.nn.functional as F

def teacher_predict(x):
    with torch.no_grad():
        p1 = F.softmax(model_l1(x), dim=1)
        p2 = F.softmax(model_l2(x), dim=1)
        p3, _ = model_l3(x)
        p3 = F.softmax(p3, dim=1)
        return (p1 + p2 + p3) / 3


In [None]:
optimizer = torch.optim.AdamW(student.parameters(), lr=3e-4)

for epoch in range(6):
    student.train()
    total, correct = 0, 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        with torch.no_grad():
            teacher_p = teacher_predict(x)

        out = student(x)
        loss = distill_loss(out, teacher_p, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        preds = out.argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    print(f"KD Epoch {epoch} | Train Acc {correct/total:.4f}")


In [None]:
student_cpu = student.cpu()
student_cpu.eval()

student_q = torch.quantization.quantize_dynamic(
    student_cpu, {torch.nn.Linear}, dtype=torch.qint8
)


In [None]:
import time

x = torch.randn(1,3,224,224)
t0 = time.time()
for _ in range(100):
    student_q(x)
print("Latency ms:", (time.time()-t0)/100*1000)


In [None]:
student_q.eval()
correct, total = 0, 0

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.cpu(), y
        out = student_q(x)
        preds = out.argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print("STUDENT TEST ACC:", correct / total)


In [None]:
def predict_with_uncertainty(x):
    p = F.softmax(student_q(x), dim=1)
    entropy = -(p * p.log()).sum(1)
    return p.argmax(1), entropy


In [None]:
# Export TorchScript for deployment
example = torch.randn(1,3,224,224)
traced = torch.jit.trace(student_q, example)
traced.save("flower_classifier_int8.pt")

print("Saved deployable model: flower_classifier_int8.pt")
