In [1]:
!pip install -q wandb

In [30]:
import wandb

In [31]:
wandb.login(key="8b9afebd4705d9e357dd7b2fa957f97e19705bf1")



True

In [24]:
import os
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit

In [25]:
class CNN(nn.Module):
    def __init__(self, num_classes, in_channels=3, base_filter=32, filter_mode='same',
                 activation='relu', dropout=0.0, use_batchnorm=False):
        super(CNN, self).__init__()

        ACTIVATIONS = {
            'relu': nn.ReLU(),
            'gelu': nn.GELU(),
            'silu': nn.SiLU(),
            'mish': nn.Mish()
        }
        act_fn = ACTIVATIONS[activation]

        # filter organization strategy
        if filter_mode == 'same':
            filters = [base_filter] * 3
        elif filter_mode == 'double':
            filters = [base_filter * (2 ** i) for i in range(3)]
        elif filter_mode == 'half':
            filters = [max(8, base_filter // (2 ** i)) for i in range(3)]  # avoid going below 8
        else:
            filters = [base_filter] * 3

        layers = []
        input_channels = in_channels
        for out_channels in filters:
            layers.append(nn.Conv2d(input_channels, out_channels, kernel_size=3, padding=1))
            if use_batchnorm:
                layers.append(nn.BatchNorm2d(out_channels))
            layers.append(act_fn)
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            if dropout > 0:
                layers.append(nn.Dropout2d(dropout))
            input_channels = out_channels  # <-- FIXED HERE

        self.features = nn.Sequential(*layers)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_channels, 128),  # <-- FIXED HERE
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = self.classifier(x)
        return x

In [26]:
def get_dataloaders(batch_size=64, augment=False):
    train_dir = '/kaggle/input/nature/inaturalist_12K/train'

    base_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    if augment:
        transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.RandomResizedCrop(224),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            *base_transform.transforms
        ])
    else:
        transform = base_transform

    full_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
    targets = [sample[1] for sample in full_dataset]


    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, val_idx = next(sss.split(list(range(len(targets))), targets))

    train_dataset = Subset(full_dataset, train_idx)
    val_dataset = Subset(full_dataset, val_idx)

    val_dataset.dataset.transform = base_transform

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    num_classes = len(full_dataset.classes)

    return train_loader, val_loader, num_classes

In [27]:
def train_model(config=None):
    with wandb.init(config=config):
        config = wandb.config
        run_name = f"act:{config.activation}_fm:{config.filter_mode}_do:{config.dropout}_bn:{config.batchnorm}_aug:{config.augment}"
        wandb.run.name = run_name
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        train_loader, val_loader, num_classes = get_dataloaders(config.batch_size, config.augment)
        model = CNN(
            in_channels=3,
            num_classes=num_classes,
            base_filter=config.base_filter,
            filter_mode=config.filter_mode,
            activation=config.activation,
            dropout=config.dropout,
            use_batchnorm=config.batchnorm
        ).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

        for epoch in range(config.epochs):
            model.train()
            running_loss, correct, total = 0.0, 0, 0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item() * images.size(0)
                correct += (outputs.argmax(1) == labels).sum().item()
                total += labels.size(0)

            train_acc = correct / total
            train_loss = running_loss / total

            # Validation
            model.eval()
            val_correct, val_total = 0, 0
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    val_correct += (outputs.argmax(1) == labels).sum().item()
                    val_total += labels.size(0)

            val_acc = val_correct / val_total
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_accuracy": val_acc
            })


In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'epochs': {'values': [20]},
        'batch_size': {'values': [32]},
        'learning_rate': {'values': [0.001]},
        'dropout': {'values': [0.2, 0.3]},
        'activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'base_filter': {'values': [32]},
        'filter_mode': {'values': ['same', 'double', 'half']},
        'batchnorm': {'values': [True, False]},
        'augment': {'values': [True, False]},
    }
}

# 🎯 Sweep initialization
sweep_id = wandb.sweep(sweep_config, project="iNaturalist-Sweep-Final")
wandb.agent(sweep_id, function=train_model, count=20)

Create sweep with ID: gbv3ri8z
Sweep URL: https://wandb.ai/roohiparveen/iNaturalist-Sweep-Final/sweeps/gbv3ri8z


[34m[1mwandb[0m: Agent Starting Run: cola3wlg with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▄▄▅▅▆▆▇▆▇▇▇▇▇█████
train_loss,█▆▅▅▄▄▃▃▃▃▂▃▂▂▂▁▁▁▁▁
val_accuracy,▂▂▁▄▄▄▅▅▅▆▅▆▅▆▇▆▇▆█▇

0,1
epoch,20.0
train_accuracy,0.27253
train_loss,2.01504
val_accuracy,0.3145


[34m[1mwandb[0m: Agent Starting Run: lyake3fs with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▃▄▅▅▅▆▆▆▆▇▆▇▇▇▇█▇█
train_loss,█▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁
val_accuracy,▁▃▄▅▅▄▆▆▅▆▇▇▅▇███▇▇▇

0,1
epoch,20.0
train_accuracy,0.28854
train_loss,1.98269
val_accuracy,0.304


[34m[1mwandb[0m: Agent Starting Run: 6fude6zm with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: same
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▅▅▆▆▇▆▇▇▇▇▇▇█▇▇▇██▇
train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▁▂▄▃▃▅▆▇▅▆▆▇██▇▆█▇▇

0,1
epoch,20.0
train_accuracy,0.25153
train_loss,2.06522
val_accuracy,0.2845


[34m[1mwandb[0m: Agent Starting Run: e4wqzvgx with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: same
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█
train_loss,█▅▅▄▃▃▃▂▃▂▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▃▃▃▄▆▆▆▆▆▆▅▇▆▇▇▇▇▇█

0,1
epoch,20.0
train_accuracy,0.26216
train_loss,2.05243
val_accuracy,0.293


[34m[1mwandb[0m: Agent Starting Run: v0ggwvyh with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: half
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▆▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▆▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▅▅▅▅▅▇▇▆█▇▇█▇▇▇██▇

0,1
epoch,20.0
train_accuracy,0.19965
train_loss,2.17031
val_accuracy,0.199


[34m[1mwandb[0m: Agent Starting Run: htiia9xa with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: half
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▇▆▇▇▇▇▇▇███████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁
val_accuracy,▁▂▃▃▄▂▄▅▅▅▇▆▇▆▆▇▆█▆▅

0,1
epoch,20.0
train_accuracy,0.25003
train_loss,2.06659
val_accuracy,0.251


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qopeuqlo with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▅▅▆▆▆▇▇▇▇▆▇▇▇███
train_loss,█▆▅▄▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▂
val_accuracy,▁▃▄▅▆▅▄▅▄▆▆▇▇▆▇▇▆▇█▇

0,1
epoch,20.0
train_accuracy,0.26828
train_loss,2.04383
val_accuracy,0.294


[34m[1mwandb[0m: Agent Starting Run: ps9bju22 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: half
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▃▅▆▆▇▇▇▇▇▇▇▇█▇▇███
train_loss,█▇▆▅▃▃▃▂▂▃▂▂▂▂▁▁▂▁▁▁
val_accuracy,▁▁▅▆▆▆▆▆█▇▇▇▇▇▇█████

0,1
epoch,20.0
train_accuracy,0.19577
train_loss,2.16703
val_accuracy,0.2265


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ku6j10fa with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▆▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁
val_accuracy,▁▂▄▃▄▄▆▅▆▆▆▇▆▇▇▆▇█▇█

0,1
epoch,20.0
train_accuracy,0.29041
train_loss,1.98712
val_accuracy,0.3125


[34m[1mwandb[0m: Agent Starting Run: 0x0g7wk0 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: same
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▆▆▆▆▆▇▇▇▇▇███▇██
train_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▂▁▁
val_accuracy,▁▁▃▃▆▅▆▅▇▆▅▆▆▇▇▆▇█▇▇

0,1
epoch,20.0
train_accuracy,0.25828
train_loss,2.05728
val_accuracy,0.286


[34m[1mwandb[0m: Agent Starting Run: jzho9p3y with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇██▇▇█
train_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁
val_accuracy,▁▁▂▂▃▃▄▆▄▆▆▆▆█▇▇▇██▇

0,1
epoch,20.0
train_accuracy,0.26141
train_loss,2.05564
val_accuracy,0.2725


[34m[1mwandb[0m: Agent Starting Run: 06y3wv2b with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▅▅▆▆▇▆▇▇▇▇▇█▇██████
train_loss,█▅▄▄▃▃▃▃▂▂▂▂▁▂▁▁▁▁▁▁
val_accuracy,▁▃▃▄▅▅▅▆▅▆▆▆▆▆▆▆▆▇▇█

0,1
epoch,20.0
train_accuracy,0.26741
train_loss,2.03747
val_accuracy,0.3085


[34m[1mwandb[0m: Agent Starting Run: g0jrfec8 with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_mode: double
[34m[1mwandb[0m: 	learning_rate: 0.001


In [29]:
test_dir = "/kaggle/input/inaturalist/inaturalist_12K/val"

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Quick check
print("Test samples:", len(test_dataset))
print("Classes:", test_dataset.classes)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/inaturalist/inaturalist_12K/val'

In [None]:
best_config = {
    'activation': 'gelu',
    'filter_mode': 'double',
    'dropout': 0.2,
    'batchnorm': False,
    'augment': True,
    'batch_size': 32,
    'learning_rate': 0.001,
    'epochs': 20,
    'base_filter': 32
}


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Recreate the best model
best_model = CNN(
    in_channels=3,
    num_classes=10,
    base_filter=32,  # fixed base filter
    filter_mode=best_config['filter_mode'],
    activation=best_config['activation'],
    dropout=best_config['dropout'],
    use_batchnorm=best_config['batchnorm']
).to(device)

In [None]:
def evaluate(model, dataloader):
    model.eval()
    correct, total = 0, 0
    all_preds = []
    all_images = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(1)
            
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu())
            all_images.extend(images.cpu())

    accuracy = correct / total
    return accuracy, all_images, all_preds

In [None]:
test_loader, class_names = get_test_loader(batch_size=64)
test_acc, test_images, test_preds = evaluate(best_model, test_loader)
print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
import matplotlib.pyplot as plt

def show_prediction_grid(images, predictions, class_names, rows=10, cols=3):
    fig, axes = plt.subplots(rows, cols, figsize=(15, 25))
    for i in range(rows * cols):
        ax = axes[i // cols, i % cols]
        img = images[i]
        img = img * 0.5 + 0.5  # unnormalize
        img = img.permute(1, 2, 0).numpy()
        ax.imshow(img)
        ax.set_title(f"Pred: {class_names[predictions[i]]}", fontsize=10)
        ax.axis('off')
    
    plt.tight_layout()
    plt.suptitle("Predictions from Best Model", fontsize=16, y=1.02)
    plt.show()

show_prediction_grid(test_images, test_preds, class_names)