In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np
from sklearn.utils import shuffle # for shuffling
import os
import cv2
import random
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

from sklearn.model_selection import train_test_split

# Reproducibility
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CLASS_NAMES = ["Amphibia", "Animalia", "Arachnida", "Aves", 
               "Fungi", "Insecta", "Mammalia", "Mollusca", 
               "Plantae", "Reptilia"]

In [None]:
# !pip install wandb
import wandb
# !wandb login
wandb.login(key="ad59fd6ee8f94be6bca41cbc7385976e9111be2b")

#ad59fd6ee8f94be6bca41cbc7385976e9111be2b

In [3]:
import gc

In [None]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip

In [5]:
!rm nature_12K.zip

In [6]:
dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
print(device)

In [None]:
resize_width = 224
resize_height= 224

In [None]:
# Cell 2: data transforms and dataloader factory
def make_transforms(img_size: int, augment: bool):
    base = [
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,)*3, (0.5,)*3),
    ]
    if augment:
        aug = [
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.2,0.2,0.2,0.1),
            transforms.RandomRotation(10),
            transforms.RandomResizedCrop(img_size),
        ]
        return transforms.Compose(base[:2] + aug + base[2:])
    return transforms.Compose(base)

def get_dataloaders(data_dir: str, batch_size: int, img_size: int, augment: bool):
    train_dir = os.path.join(data_dir, "train")
    val_dir   = os.path.join(data_dir, "val")

    train_ds = datasets.ImageFolder(train_dir, make_transforms(img_size, augment))
    val_ds   = datasets.ImageFolder(val_dir,   make_transforms(img_size, False))

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4, pin_memory=True)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    return train_loader, val_loader

In [None]:
# Cell 3: SimpleCNN definition
class SimpleCNN(nn.Module):
    def __init__(self, in_channels, conv_specs, dense_units, num_classes, dropout, use_bn):
        super().__init__()
        layers = []
        C = in_channels
        for out_c, k in conv_specs:
            layers += [
                nn.Conv2d(C, out_c, kernel_size=k),
                nn.ReLU(),
                nn.MaxPool2d(2),
            ]
            if use_bn:
                layers.append(nn.BatchNorm2d(out_c))
            C = out_c
        self.conv = nn.Sequential(*layers)

        # infer flattened feature size
        with torch.no_grad():
            dummy = torch.zeros(1, in_channels, img_size, img_size)
            feat_dim = self.conv(dummy).view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(feat_dim, dense_units),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dense_units, num_classes)
        )

    def forward(self, x):
        return self.classifier(self.conv(x))


In [None]:
# Cell 4: train/validate functions
def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct, count = 0, 0, 0
    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            logits = model(X)
            total_loss += criterion(logits, y).item()
            preds = logits.argmax(1)
            correct += (preds == y).sum().item()
            count += y.size(0)
    return total_loss/len(loader), 100 * correct/count

def train_loop(model, train_loader, val_loader, epochs, lr, wd):
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1, epochs+1):
        model.train()
        loop = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}")
        for X, y in loop:
            X, y = X.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            loss = criterion(model(X), y)
            loss.backward()
            optimizer.step()

        train_loss, train_acc = evaluate(model, train_loader, criterion)
        val_loss,   val_acc   = evaluate(model, val_loader,   criterion)
        print(f"Train: {train_loss:.4f}, {train_acc:.1f}% | Val: {val_loss:.4f}, {val_acc:.1f}%")

        # wandb.log({
        #     "train_loss": train_loss, "train_acc": train_acc,
        #     "val_loss": val_loss,     "val_acc": val_acc
        # })

        # wandb.log({f'{dataName}_accuracy': 100*correct/total})
        # wandb.log({f'{dataName}_loss': val_loss/len(dataLoader)})

In [None]:
# Cell 5: hyperparams, wandb, and launch
img_size   = 128
batch_size = 32
epochs     = 10
lr, wd      = 1e-3, 1e-4
augment     = True

wandb.init(project="Deep_Learning_Assignment_2", entity="cs24m023-indian-institute-of-technology-madras",
           config={"img_size":img_size, "batch":batch_size, 
                   "epochs":epochs, "lr":lr, "wd":wd, "aug":augment})

model = SimpleCNN(
    in_channels=3,
    conv_specs=[(32,3),(32,3),(64,3)],
    dense_units=128,
    num_classes=len(CLASS_NAMES),
    dropout=0.2,
    use_bn=True
).to(DEVICE)

train_loader, val_loader = get_dataloaders("nature_12K", batch_size, img_size, augment)
train_loop(model, train_loader, val_loader, epochs, lr, wd)


In [None]:
model = train_model(learning_rate = 0.0001, num_filters = [128,128,64,64,32], filter_sizes=[3,5,3,5,3], 
                    activation_fn = "elu", optimiser_fn ="rmsprop", num_neurons_dense = 512, 
                    weight_decay = 0.0004, dropout = 0.4, useBatchNorm = True, batchSize = 32, 
                    num_epochs = 10)

In [None]:
trainDataLoader, valDataLoader, testDataLoader = load_data(train_dir = 'inaturalist_12K/train', test_dir = 'inaturalist_12K/val', batchSize = 16)
    
criterion = nn.CrossEntropyLoss()
find_accuracy(model, criterion, valDataLoader, "val")
find_accuracy(model, criterion, testDataLoader, "test")

In [None]:
import torch
import matplotlib.pyplot as plt
import wandb

# Initialize Weights & Biases tracking
wandb_session = wandb.init(
    project="Deep_Learning_Assignment_2",
    config={"architecture": "ResNet50", "dataset": "iNaturalist10"}
)

# Set model to evaluation mode and detect device
model_device = next(model.parameters()).device
model.eval()

# Configure sample collection parameters
num_classes = 10
max_examples = 3
class_examples = {cls_id: [] for cls_id in range(num_classes)}

# Collect model predictions on validation set
with torch.no_grad():
    for batch_images, batch_labels in valDataLoader:
        batch_images = batch_images.to(model_device)
        batch_labels = batch_labels.to(model_device)
        
        # Check if all classes have sufficient samples
        collection_complete = all(len(examples) >= max_examples 
                                for examples in class_examples.values())
        if collection_complete:
            break
            
        for single_image, true_label in zip(batch_images, batch_labels):
            class_id = true_label.item()
            if len(class_examples[class_id]) < max_examples:
                # Get model prediction
                prediction = model(single_image.unsqueeze(0)).argmax(dim=1).item()
                # Store image and prediction (move to CPU for plotting)
                class_examples[class_id].append((
                    single_image.cpu().clone(),
                    prediction
                ))

# Image normalization reversal function
def restore_original_image(normalized_img):
    normalization_mean = torch.tensor([0.485, 0.456, 0.406]).reshape(3, 1, 1)
    normalization_std = torch.tensor([0.229, 0.224, 0.225]).reshape(3, 1, 1)
    return normalized_img * normalization_std + normalization_mean

# Create visualization grid
figure, axis_grid = plt.subplots(num_classes, max_examples, figsize=(15, 35))
for class_idx in range(num_classes):
    for example_idx in range(max_examples):
        img_tensor, pred_class = class_examples[class_idx][example_idx]
        original_image = restore_original_image(img_tensor)
        
        current_axis = axis_grid[class_idx, example_idx]
        current_axis.imshow(original_image.permute(1, 2, 0).numpy())
        current_axis.set_title(
            f"Actual: {classesList[class_idx]}\nPredicted: {classesList[pred_class]}",
            fontsize=9
        )
        current_axis.axis('off')

plt.subplots_adjust(wspace=0.4, hspace=0.6)
visualization_path = "/kaggle/working/class_predictions_grid.png"
plt.savefig(visualization_path, bbox_inches='tight', pad_inches=0.2, dpi=250)
plt.close()

# Log results to W&B
wandb.log({"validation_predictions": wandb.Image(visualization_path)})
wandb.finish()

In [None]:
def main():
    wandb.init(project="Deep_Learning_Assignment_2")
    config = wandb.config
    run_name = f"{config.optimiser}_{config.activation}_{config.num_filters}_{config.batch_size}"

    # Set the run name
    wandb.run.name = run_name
    wandb.run.save()

    # Define and train the model as before
    train_model(learning_rate = config.learning_rate, num_filters = config.num_filters,
                filter_sizes = config.filter_sizes, activation_fn = config.activation, 
                optimiser_fn = config.optimiser, num_neurons_dense = config.dense_layer,
                weight_decay = config.weight_decay, dropout = config.dropout, useBatchNorm = False, 
                batchSize = config.batch_size, num_epochs = 10)
    
sweep_config = {
    'method': 'bayes',
    'name' : 'sweep cross entropy',
    'metric': {
      'name': 'validation_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {
          'values': [[32,32,32,32,32],[32,64,64,128,128],[128,128,64,64,32],[32,64,128,256,512]]
        },
        'filter_sizes': {
          'values': [[3,3,3,3,3], [5,5,5,5,5], [3,5,3,5,3]]
        },
        'weight_decay': {
            'values':[0, 0.0005, 0.5]
        },
        'learning_rate': {
            'values':[1e-3,1e-4]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.005]
        },
        'dropout': {
            'values': [0, 0.2, 0.4]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'activation': {
            'values': ['relu', 'elu', 'selu']
        },
        'optimiser': {
            'values': ['nadam', 'adam', 'rmsprop']
        },
        'batch_norm':{
            'values': ['true','false']
        },
        'batch_size': {
            'values': [32, 64]
        },
        'dense_layer':{
            'values': [128, 256, 512]
        }
    }
}


sweep_id = wandb.sweep(sweep=sweep_config,project='Deep_Learning_Assignment_2')
wandb.agent("hpi0co5y" , function = main , count = 50)

wandb.finish()