In [2]:
# !pip install -q -r requirements.txt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torchvision import datasets, transforms
import os
import wandb
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import random


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


In [5]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip -q nature_12K.zip

--2025-05-05 15:36:04--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.217.207, 108.177.12.207, 108.177.11.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.217.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: ‘nature_12K.zip’


2025-05-05 15:36:54 (73.1 MB/s) - ‘nature_12K.zip’ saved [3816687935/3816687935]



In [8]:
data_dir = 'inaturalist_12K'
train_dir = os.path.join(data_dir, 'train')
best_model_path = "best_models"
os.makedirs(best_model_path, exist_ok=True)

print("Data directory:", data_dir)

Data directory: inaturalist_12K


In [9]:


def get_data_loaders(config):
    # Constants
    BATCH_SIZE = config.get("batch_size", 64)
    IMAGE_SIZE = 224


    # Augmentation logic
    if config.get("augment", False):
        print("Applying full data augmentation")
        transform_train = transforms.Compose([
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(20),
            transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
            transforms.RandomGrayscale(p=0.1),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),
        ])
    else:
        print("Minimal preprocessing, no augmentation")
        transform_train = transforms.Compose([
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),
        ])

    transform_val = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ])

    # Load full training dataset
    full_dataset = datasets.ImageFolder(root=train_dir, transform=transform_train)
    targets = np.array(full_dataset.targets)

    # Stratified split
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, val_idx = next(sss.split(np.zeros(len(targets)), targets))

    # Subsets with respective transforms
    train_dataset = Subset(full_dataset, train_idx)
    val_dataset = Subset(datasets.ImageFolder(root=train_dir, transform=transform_val), val_idx)

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

    # Info
    print(f"Total images: {len(full_dataset)}")
    print(f"Training set: {len(train_dataset)} images")
    print(f"Validation set: {len(val_dataset)} images")
    print(f"Number of classes: {len(full_dataset.classes)}")

    return train_loader, val_loader #, len(full_dataset.classes)

In [10]:
class SmallCNN(nn.Module):
    def __init__(self, num_filters, activation_fn, kernel_size, dense_neurons, num_classes=1010, dropout_rate=0.2, batch_norm=False):
        super().__init__()

        act_fn = {
            'relu': nn.ReLU(),
            'gelu': nn.GELU(),
            'silu': nn.SiLU(),
            'mish': nn.Mish()
        }[activation_fn]

        layers = []
        in_channels = 3

        for nf in num_filters:
            layers.append(nn.Conv2d(in_channels, nf, kernel_size=kernel_size, padding=kernel_size//2))
            if batch_norm:
                layers.append(nn.BatchNorm2d(nf))
            layers.append(act_fn)
            layers.append(nn.MaxPool2d(kernel_size=2))
            in_channels = nf

        self.feature_extractor = nn.Sequential(*layers)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        self.dense = nn.Linear(num_filters[-1] * 7 * 7, dense_neurons)
        self.output = nn.Linear(dense_neurons, num_classes)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.dense(x)
        x = self.output(x)
        return x


In [11]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def evaluate(model, loader, criterion, device):
    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return val_loss / total, correct / total


In [12]:
PROJECT_NAME = "Assignment2DL"
ENTITY_NAME = "soumitrapy-iit-madras"

In [13]:
def sweep_train():
    wandb.init(project=PROJECT_NAME, entity=ENTITY_NAME)
    config = wandb.config
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Compute num_filters based on filter_organization
    num_layers = 5
    if config.filter_organization == "same":
        filters = [config.filters_base] * num_layers
    elif config.filter_organization == "double":
        filters = [config.filters_base * (2 ** i) for i in range(num_layers)]
    elif config.filter_organization == "half":
        # filters = [max(config.filters_base // (2 ** i), 1) for i in range(num_layers)]
        filters = [config.filters_base * (2 ** i) for i in range(num_layers)]
        filters = filters[::-1]  # Reverse the order
    else:
        raise ValueError(f"Unknown filter organization: {config.filter_organization}")

    wandb.run.name = (
        f"fbase_{config.filters_base}_"
        f"forg_{config.filter_organization}_"
        f"act_{config.activation}_"
        f"k_{config.kernel_size}_"
        f"drop_{config.dropout}_"
        f"bn_{config.batch_norm}_"
        f"lr_{config.lr:.5f}_"
        f"aug_{config.augment}_"
        f"ep_{config.epochs}_"
        f"bs_{config.batch_size}_"
        f"n_{config.dense_neurons}"
    )
    wandb.run.save()

    # DataLoader
    train_loader, val_loader = get_data_loaders(
        {
        "batch_size":config.batch_size,
        "augment":config.augment
        }
    )

    # Model
    model = SmallCNN(
        num_filters=filters,
        activation_fn=config.activation,
        kernel_size=config.kernel_size,
        dense_neurons=config.dense_neurons,
        dropout_rate=config.dropout,
        batch_norm=config.batch_norm
    ).to(device)

    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion = nn.CrossEntropyLoss()
    best_val_acc = 0.0

    for epoch in range(config.epochs):
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = evaluate(model, val_loader, criterion, device)

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_acc,
            "val_loss": val_loss,
            "val_accuracy": val_acc
        })
        print(f"Epoch {epoch + 1}/{config.epochs} - "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} - "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            # torch.save(model.state_dict(), os.path.join(best_model_path, "partA.pth"))
            torch.save(model, os.path.join(best_model_path, "partA.pth"))
            wandb.save("best_model_A.pth")


In [15]:
sweep_config = {
    "method": "bayes",
    "metric": {"name": "val_accuracy", "goal": "maximize"},
    "parameters": {
        "filters_base": {"values": [3, 8, 16, 32, 64]},
        "filter_organization": {"values": ["same", "double", "half"]},
        "activation": {"values": ["relu", "gelu", "silu", "mish"]},
        "kernel_size": {"values": [3, 5]},
        "dense_neurons": {"values": [256, 512]},
        "batch_norm": {"values": [True, False]},
        "dropout": {"values": [0.2, 0.3]},
        "lr": {"distribution": "uniform", "min": 0.0001, "max": 0.01},
        "batch_size": {"values": [32, 64]},
        "augment": {"values": [True, False]},
        "epochs": {"values": [5, 10, 15]}
    }
}
sweep_id = wandb.sweep(sweep_config, project=PROJECT_NAME, entity=ENTITY_NAME)
wandb.agent(sweep_id, function=sweep_train, count=60)  # Adjust count as needed
wandb.finish()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: 8cuzzwge
Sweep URL: https://wandb.ai/soumitrapy-iit-madras/Assignment2DL/sweeps/8cuzzwge


[34m[1mwandb[0m: Agent Starting Run: h0vcseu6 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filters_base: 3
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	lr: 0.004151943554040556
[34m[1mwandb[0m: Currently logged in as: [33msoumitrapy[0m ([33msoumitrapy-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Applying full data augmentation
Total images: 9999
Training set: 7999 images
Validation set: 2000 images
Number of classes: 10


 10%|█         | 13/125 [00:27<03:05,  1.66s/it][34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
