In [None]:
pip install wandb

In [None]:
!wandb login

In [None]:
#loading data set
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip
!rm nature_12K.zip

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import numpy as np
import wandb
from tqdm import tqdm

In [None]:
# Trying to set device as GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Transforming the dataset
INPUT_SIZE = 224
transform = transforms.Compose([
    transforms.Resize((INPUT_SIZE, INPUT_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

In [None]:
# DataLoaders with 20% validation split of train
def get_dataloaders(train_dir, test_dir, batch_size=64):
    full = datasets.ImageFolder(train_dir, transform=transform)
    targets = [label for _, label in full.samples]
    train_idx, val_idx = train_test_split(
        list(range(len(full))), test_size=0.2, stratify=targets, random_state=42
    )
    train_loader = DataLoader(Subset(full, train_idx), batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader   = DataLoader(Subset(full, val_idx),   batch_size=batch_size, shuffle=False, num_workers=2)

    test_ds = datasets.ImageFolder(test_dir, transform=transform)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader, test_loader

In [None]:
# Building ResNet50 with with 10 classes in last layer
def build_model(num_classes=10, pretrained=True):
    weights = models.ResNet50_Weights.DEFAULT if pretrained else None
    model = models.resnet50(weights=weights)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

In [None]:
# Apply one of four strategies
def apply_strategy(model, strategy, k=None):
    # Strategy 1: freeze all but final fc
    if strategy == "freeze_all_except_last":
        for p in model.parameters():
            p.requires_grad = False
        for p in model.fc.parameters():
            p.requires_grad = True

    # Strategy 2: freeze first k children modules
    elif strategy == "freeze_first_k":
        children = list(model.children())
        for idx, child in enumerate(children):
            requires_grad = False if idx < k else True
            for p in child.parameters():
                p.requires_grad = requires_grad

    # Strategy 3: freeze last k children modules (excluding fc)
    elif strategy == "freeze_last_k":
        children = list(model.children())[:-1]  # omiting final fc
        total = len(children)
        for idx, child in enumerate(children):
            requires_grad = False if idx >= total - k else True
            for p in child.parameters():
                p.requires_grad = requires_grad
        # ensure fc is trainable
        for p in model.fc.parameters():
            p.requires_grad = True

    # Strategy 4: train from scratch (nothing frozen)
    elif strategy == "train_from_scratch":
        for p in model.parameters():
            p.requires_grad = True

    return model


In [None]:
# Training loop
def train_one_epoch(model, opt, criterion, loader):
    model.train()
    running = 0.0
    for X, y in loader:
        X, y = X.to(device), y.to(device)
        opt.zero_grad()
        out = model(X)
        loss = criterion(out, y)
        loss.backward()
        opt.step()
        running += loss.item()
    return running / len(loader)

In [None]:
# Evaluate on a loader
def evaluate(model, criterion, loader, tag):
    model.eval()
    total, correct, loss_sum = 0, 0, 0.0
    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            out = model(X)
            loss = criterion(out, y)
            loss_sum += loss.item()
            pred = out.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    acc = 100 * correct / total
    wandb.log({f"{tag}_loss": loss_sum/len(loader), f"{tag}_acc": acc})
    print(f"{tag} — loss: {loss_sum/len(loader):.4f}, acc: {acc:.2f}%")

In [None]:
# Main loop over strategies
if __name__ == "__main__":
    train_dl, val_dl, test_dl = get_dataloaders(
        "inaturalist_12K/train", "inaturalist_12K/val", batch_size=64
    )

    strategies = [
        ("freeze_all_except_last", None),
        ("freeze_first_k", 4),   # freeze up to just before layer1
        ("freeze_last_k", 2),    # freeze layer4 and avgpool
        ("train_from_scratch", None)
    ]

    for name, k in strategies:
        wandb.init(project="Assignment_02B", name=name, reinit=True)
        print(f"\n=== Strategy: {name}, k={k} ===")

        # Build and apply
        pretrained = False if name == "train_from_scratch" else True
        model = build_model(pretrained=pretrained).to(device)
        model = apply_strategy(model, name, k)
        model = nn.DataParallel(model)

        # Optimizer on trainable params only
        optimizer = optim.NAdam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=1e-4, weight_decay=0.005
        )
        criterion = nn.CrossEntropyLoss()

        # Train & validate
        for epoch in range(1, 11):
            train_loss = train_one_epoch(model, optimizer, criterion, train_dl)
            wandb.log({"epoch": epoch, "train_loss": train_loss})
            evaluate(model, criterion, val_dl, "val")

        # Final test evaluation
        evaluate(model, criterion, test_dl, "test")

        wandb.finish()