<a href="https://colab.research.google.com/github/ragingthunder511/da6401_assignment2/blob/main/cs24m020_dl_a1_partA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import wandb
import urllib.request
import zipfile
from pathlib import Path
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import cv2
import random
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import Subset, DataLoader
from typing import List, Tuple
from torch.cuda.amp import autocast, GradScaler


In [None]:
#WandB authentication
wandb.login(key="01bb56b62b8d93215a878ebdbc41b79e456d010c")
#Downloading the iNaturalist dataset
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


--2025-04-19 17:04:42--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.216.207, 172.217.203.207, 173.194.217.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.216.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: ‘nature_12K.zip’


2025-04-19 17:04:56 (262 MB/s) - ‘nature_12K.zip’ saved [3816687935/3816687935]



In [None]:
# Part 1: Extraction of iNaturalist data
class INatSplitLoader:
    """
    Loads the iNaturalist data, performs a manual per‐class 80/20 split
    (without external libs), and exposes PyTorch DataLoaders.
    """
    def __init__(
        self,
        train_root: str,
        test_root: str,
        img_size: tuple = (256, 256),
        batch_size: int = 32,
        num_workers: int = 2,
    ):

        # Preprocessing on data
        self.transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5),
                                 (0.5, 0.5, 0.5))
        ])

        self.batch_size = batch_size
        self.train_root = train_root
        self.num_workers = num_workers
        self.test_root = test_root

    def build(self):
        # 1. Load the complete training folder
        full_ds = datasets.ImageFolder(self.train_root, transform=self.transform)

        # 2. Group indices by class label
        class_to_idxs = {}
        for idx, (_, lbl) in enumerate(full_ds.samples):
            class_to_idxs.setdefault(lbl, []).append(idx)

        # 3. For each class, shuffle and split 80/20
        train_idxs, val_idxs = [], []
        for lbl, idxs in class_to_idxs.items():
            # shuffle in‐place
            random.shuffle(idxs)
            split = int(len(idxs)*0.8)
            train_idxs += idxs[:split]
            val_idxs   += idxs[split:]

        # 4. Load test set untouched
        self.test_ds = datasets.ImageFolder(self.test_root, transform=self.transform)

        # 5. Build Subsets
        self.val_ds   = Subset(full_ds, val_idxs)
        self.train_ds = Subset(full_ds, train_idxs)



    def get_loaders(self):
        """
        Returns (train_loader, val_loader, test_loader).
        Exact same loader settings as before so performance is unaffected.
        """
        test_loader = DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True
        )
        val_loader = DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True
        )
        train_loader = DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True
        )

        return train_loader, val_loader, test_loader

In [None]:
# Part2: CNN class module for INaturalist dataset - includes function for training

class INatCustomCNN(nn.Module):
    """
    A configurable CNN tailored for classifying iNaturalist images.
    Includes its own training routine with mixed-precision support.
    """
    def __init__(
        self,
        input_shape: Tuple[int, int, int],
        conv_channels: List[int],
        kernel_sizes: List[int],
        fc_units: int,
        act_fn: nn.Module,
        use_batchnorm: bool,
        drop_p: float,
        opt_type: str,
        learning_rate: float,
        l2_reg: float
    ):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss()
        self.feature_extractor = self._build_conv_layers(input_shape[0], conv_channels, kernel_sizes, use_batchnorm, act_fn)
        self.flatten_dim = self._infer_flat_dim(input_shape)

        self.optimizer = self._configure_optimizer(opt_type, learning_rate, l2_reg)


        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flatten_dim, fc_units),
            act_fn,
            nn.Dropout(drop_p),
            nn.Linear(fc_units, 10)  # Output layer for 10 classes
        )

    def _infer_flat_dim(self, input_shape):
        with torch.no_grad():
            dummy_input = torch.zeros(1, *input_shape)
            out = self.feature_extractor(dummy_input)
        return out.view(out.size(0), -1).size(1)

    def _configure_optimizer(self, opt_type, lr, weight_decay):
        optimizers = {
            'adam': lambda p: torch.optim.Adam(p, lr=lr, weight_decay=weight_decay),
            'sgd': lambda p: torch.optim.SGD(p, lr=lr, weight_decay=weight_decay, momentum=0.9),
            'rmsprop': lambda p: torch.optim.RMSprop(p, lr=lr, weight_decay=weight_decay),
            'nadam': lambda p: torch.optim.NAdam(p, lr=lr, weight_decay=weight_decay),
        }
        return optimizers[opt_type](self.parameters())

    def _build_conv_layers(self, in_channels, channel_list, kernel_list, use_bn, act_fn):
        layers = []
        for out_channels, k_size in zip(channel_list, kernel_list):
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=k_size))
            if use_bn:
                layers.append(nn.BatchNorm2d(out_channels))
            layers.append(act_fn)
            layers.append(nn.MaxPool2d(kernel_size=2))
            in_channels = out_channels
        return nn.Sequential(*layers)


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        features = self.feature_extractor(x)
        return self.classifier(features)

    def training_model(self, train_loader, val_loader, epochs, device, log_fn=lambda m: None):
        """
        Runs the training loop with validation per epoch. Supports AMP (automatic mixed precision).
        """
        self.to(device)
        scaler = torch.cuda.amp.GradScaler()

        for ep in range(1, epochs + 1):
            self.train()
            epoch_loss, correct_preds, total_seen = 0, 0, 0

            for imgs, targets in train_loader:
                imgs, targets = imgs.to(device), targets.to(device)
                self.optimizer.zero_grad()

                with torch.cuda.amp.autocast():
                    outputs = self(imgs)
                    loss = self.loss_fn(outputs, targets)

                scaler.scale(loss).backward()
                scaler.step(self.optimizer)
                scaler.update()

                epoch_loss += loss.item()
                correct_preds += (outputs.argmax(1) == targets).sum().item()
                total_seen += targets.size(0)

            train_acc = 100 * correct_preds / total_seen
            train_loss = epoch_loss / len(train_loader)

            # Validation phase
            self.eval()
            val_loss, val_correct, val_total = 0, 0, 0
            with torch.no_grad():
                for imgs, targets in val_loader:
                    imgs, targets = imgs.to(device), targets.to(device)
                    preds = self(imgs)
                    loss = self.loss_fn(preds, targets)

                    val_loss += loss.item()
                    val_correct += (preds.argmax(1) == targets).sum().item()
                    val_total += targets.size(0)

            val_acc = 100 * val_correct / val_total
            avg_val_loss = val_loss / len(val_loader)

            print(f"Epoch {ep}/{epochs} | "
                  f"training Loss: {train_loss:.4f}, training accuracy: {train_acc:.2f}% | "
                  f"validation Loss: {avg_val_loss:.4f}, validation accuracy Acc: {val_acc:.2f}%")

            log_fn({
                'validation_loss': avg_val_loss,
                'validation_accuracy': val_acc,
                'epoch': ep,
                'train_accuracy': train_acc,
                'train_loss': train_loss,
            })

        torch.cuda.empty_cache()


In [None]:
# Part 3: W&B Hyperparameter Sweep Setup

def configure_sweep():
    sweep_dict = {
        'method': 'bayes',
        'name': 'dl_a2_part1',
        'metric': {
            'goal': 'maximize',
            'name': 'validation_accuracy'
        },
        'parameters': {
            'batch_norm': {
                'values': ['true', 'false']
            },
            'batch_size': {
                'values': [64, 32]
            },
            'filter_sizes': {
                'values': [[3]*5, [3,5,3,5,3], [5]*5, [5,3,5,3,5]]
            },
            'num_filters': {
                'values': [
                    [32, 64, 128, 256, 512],
                    [512, 256, 128, 64, 32],
                    [32]*5,
                    [32, 64, 64, 128, 128],
                    [128, 128, 64, 64, 32],
                ]
            },
            'learning_rate': {
                'values': [1e-3, 1e-4]
            },
            'data_aug': {
                'values': ['true', 'false']
            },
            'weight_decay': {
                'values': [0.0, 0.0005, 0.5]
            },
            'dropout': {
                'values': [0.0, 0.2, 0.4]
            },
            'activation': {
                'values': ['relu', 'elu', 'silu']
            },
            'optimiser': {
                'values': ['nadam', 'adam', 'rmsprop']
            },
            'dense_layer': {
                'values': [128, 256, 512]
            }
        }
    }
    return sweep_dict


def run_sweep_experiment(config=None):
    with wandb.init(config=config):
        cfg = wandb.config
        run_name = f"{cfg.batch_size}_{cfg.activation}_{cfg.optimiser}_{cfg.num_filters}"

        wandb.run.name = run_name
        wandb.run.save()

        activation_fn_map = {
            'relu': nn.ReLU(),
            'elu': nn.ELU(),
            'silu': nn.SELU()
        }

        act_fn = activation_fn_map[cfg.activation]

        loader = INatSplitLoader(
            train_root='inaturalist_12K/train',
            test_root='inaturalist_12K/val',
            img_size=(256, 256),
            batch_size=cfg.batch_size
        )
        loader.build()
        train_loader, val_loader, _ = loader.get_loaders()

        model = INatCustomCNN(
            input_shape=(3, 256, 256),
            conv_channels=cfg.num_filters,
            kernel_sizes=cfg.filter_sizes,
            fc_units=cfg.dense_layer,
            act_fn=act_fn,
            use_batchnorm=(cfg.batch_norm.lower() == 'true'),
            drop_p=cfg.dropout,
            opt_type=cfg.optimiser,
            learning_rate=cfg.learning_rate,
            l2_reg=cfg.weight_decay
        )

        model.training_model(
            train_loader=train_loader,
            val_loader=val_loader,
            epochs=10,
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
            log_fn=wandb.log
        )


# Trigger sweep
sweep_identifier = wandb.sweep(configure_sweep(), project='cs24m020_dl_a2_sweep1')
wandb.agent(sweep_identifier, function=run_sweep_experiment, count=1)


Create sweep with ID: 2wdeix3t
Sweep URL: https://wandb.ai/karekargrishma1234-iit-madras-/cs24m020_dl_a2_sweep1/sweeps/2wdeix3t


[34m[1mwandb[0m: Agent Starting Run: ii9k74kb with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_norm: false
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_aug: true
[34m[1mwandb[0m: 	dense_layer: 512
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	filter_sizes: [5, 3, 5, 3, 5]
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_filters: [32, 64, 64, 128, 128]
[34m[1mwandb[0m: 	optimiser: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/10 | training Loss: 2.6488, training accuracy: 10.84% | validation Loss: 2.3027, validation accuracy Acc: 10.00%
Epoch 2/10 | training Loss: 2.3030, training accuracy: 10.03% | validation Loss: 2.3028, validation accuracy Acc: 10.00%
Epoch 3/10 | training Loss: 2.3030, training accuracy: 9.58% | validation Loss: 2.3027, validation accuracy Acc: 10.00%
Epoch 4/10 | training Loss: 2.3031, training accuracy: 9.80% | validation Loss: 2.3028, validation accuracy Acc: 10.00%
Epoch 5/10 | training Loss: 2.3029, training accuracy: 9.76% | validation Loss: 2.3027, validation accuracy Acc: 10.00%
Epoch 6/10 | training Loss: 2.3030, training accuracy: 10.30% | validation Loss: 2.3027, validation accuracy Acc: 10.00%
Epoch 7/10 | training Loss: 2.3030, training accuracy: 9.90% | validation Loss: 2.3027, validation accuracy Acc: 10.00%
Epoch 8/10 | training Loss: 2.3030, training accuracy: 10.46% | validation Loss: 2.3026, validation accuracy Acc: 10.00%
Epoch 9/10 | training Loss: 2.3030, 

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█▄▂▃▃▅▃▆▁▂
train_loss,█▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,▅▆▅█▅▄▄▁▅▆

0,1
epoch,10.0
train_accuracy,9.6012
train_loss,2.30289
validation_accuracy,10.0
validation_loss,2.30274
