### **Installing Pytorch Lightning**

In [None]:
!pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.1.0->pytorch_lightning)
  Downloadi

In [None]:
# Mount Google Drive and unzip the dataset
from google.colab import drive
import zipfile
import os

drive.mount('/content/drive')

# Unzip dataset
zip_path = '/content/drive/MyDrive/inaturalist_12K.zip'
extract_path = '/content/inaturalist_12K'

if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('/content/')

Mounted at /content/drive


In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
import wandb
from sklearn.model_selection import StratifiedShuffleSplit
from tqdm import tqdm

# Define a dictionary to map activation function names to their corresponding PyTorch classes
activation_map = {
    'ReLU': nn.ReLU,
    'GELU': nn.GELU,
    'SiLU': nn.SiLU,
    'Mish': nn.Mish
}

# Flexible Convolutional Neural Network class definition
class FlexibleCNN(nn.Module):
    def __init__(self, conv_filters, kernel_sizes, activation_fn_str, dense_units, dropout_rate=0.5, num_classes=10):
        super(FlexibleCNN, self).__init__()

        # Validate activation function string
        if activation_fn_str not in activation_map:
            raise ValueError(f"Unsupported activation function: {activation_fn_str}")
        activation_fn = activation_map[activation_fn_str]

        self.convs = nn.ModuleList()
        in_channels = 3  # RGB channels for images

        # Create convolutional blocks with Conv2D -> Activation -> MaxPooling
        for filters, k in zip(conv_filters, kernel_sizes):
            self.convs.append(nn.Sequential(
                nn.Conv2d(in_channels, filters, kernel_size=k, padding=k // 2),
                activation_fn(),  # Activation function instance
                nn.MaxPool2d(kernel_size=2)
            ))
            in_channels = filters  # Update for next layer

        self.flatten = nn.Flatten()

        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(dense_units, num_classes)

    def forward(self, x):
        for conv in self.convs:
            x = conv(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Utility function to calculate accuracy
def accuracy(preds, labels):
    _, pred_classes = preds.max(1)
    correct = (pred_classes == labels).sum().item()
    return correct / len(labels)

# Load training and validation data using stratified split
def load_data(data_dir, batch_size, val_split=0.2):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    full_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transform)
    targets = [label for _, label in full_dataset.imgs]

    splitter = StratifiedShuffleSplit(n_splits=1, test_size=val_split, random_state=42)
    train_idx, val_idx = next(splitter.split(full_dataset.imgs, targets))

    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader

# Load test data
def load_test_data(data_dir, batch_size):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return test_loader, test_dataset

# Main training function integrated with Weights & Biases
def train_model(config=None):
    wandb.init(config=config)
    config = wandb.config

    # Apply data augmentation only if specified
    if config.data_augmentation == 'yes':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    # Select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize model
    model = FlexibleCNN(
        conv_filters=config.conv_filters,
        kernel_sizes=config.kernel_sizes,
        activation_fn_str=config.activation_fn,
        dense_units=config.dense_units,
        dropout_rate=config.dropout,
        num_classes=10
    ).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

    # Load data loaders
    train_loader, val_loader = load_data(config.data_dir, config.batch_size)

    # Training loop
    for epoch in range(config.epochs):
        model.train()
        total_train_loss, total_train_acc = 0, 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]"):
            inputs, labels = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            total_train_acc += accuracy(outputs, labels)

        # Validation loop
        model.eval()
        total_val_loss, total_val_acc = 0, 0
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Epoch {epoch+1} [Val]"):
                inputs, labels = batch[0].to(device), batch[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()
                total_val_acc += accuracy(outputs, labels)

        # Compute average metrics
        train_acc = total_train_acc / len(train_loader)
        val_acc = total_val_acc / len(val_loader)

        # Log metrics to Weights & Biases
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': total_train_loss / len(train_loader),
            'train_accuracy': train_acc,
            'val_loss': total_val_loss / len(val_loader),
            'val_accuracy': val_acc
        })


**Sweep Configuration**

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'activation_fn': {
            'values': ['GELU', 'SiLU']
        },
        'batch_size': {
            'values': [32, 64]
        },
        'conv_filters': {
            'values': [[64, 64, 64, 64, 64]]
        },
        'kernel_sizes': {
            'values': [[3, 3, 3, 3, 3]]
        },
        'dense_units': {
            'values': [128, 256, 512]
        },
        'dropout': {
            'values': [0.3, 0.5]
        },
        'lr': {
            'distribution': 'uniform',
            'min': 0.0003,
            'max': 0.0007
        },
        'epochs': {
            'value': 10
        },
        'data_dir': {
            'value': '/content/inaturalist_12K'
        },
        'data_augmentation': {
            'values': ['yes', 'no']
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="A2")
wandb.agent(sweep_id, function=train_model, count=20)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: 65nw10d9
Sweep URL: https://wandb.ai/me21b164-indian-institute-of-technology-madras/A2/sweeps/65nw10d9


[34m[1mwandb[0m: Agent Starting Run: 1walwjo9 with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: yes
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006120471159227737
[34m[1mwandb[0m: Currently logged in as: [33mme21b164[0m ([33mme21b164-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1 [Train]: 100%|██████████| 125/125 [00:44<00:00,  2.82it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.37it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.27it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.91it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.77it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.36it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.41it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.85it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.97it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▅▆▆▇▇█
train_loss,█▇▆▅▅▄▄▃▂▁
val_accuracy,▁▃▅▅▆▇█▇█▇
val_loss,█▆▅▃▃▂▁▂▂▅

0,1
epoch,10.0
train_accuracy,0.45511
train_loss,1.55861
val_accuracy,0.33496
val_loss,2.04728


[34m[1mwandb[0m: Agent Starting Run: utibc5y5 with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0004653150146577237


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.90it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.97it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.49it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.97it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:07<00:00,  4.01it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.44it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.98it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.97it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.98it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.58it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▄▅▆██▇██▇
val_loss,█▆▅▄▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.39934
train_loss,1.7313
val_accuracy,0.33984
val_loss,1.86936


[34m[1mwandb[0m: Agent Starting Run: wt1abkcb with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: yes
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006821000437245545


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.85it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.79it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.91it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.71it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.91it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.89it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.86it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.46it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.44it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▅▅▅▆▇▇█
train_loss,█▇▆▅▅▄▃▃▂▁
val_accuracy,▁▄▆▆▆▆██▇█
val_loss,█▆▃▄▂▁▁▁▁▂

0,1
epoch,10.0
train_accuracy,0.46208
train_loss,1.53227
val_accuracy,0.36816
val_loss,1.90594


[34m[1mwandb[0m: Agent Starting Run: x3i0d27u with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006296106098193848


Epoch 1 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.92it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.85it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.41it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.88it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.78it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.90it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.81it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.86it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▆▇██
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▅▅▆▇▇▇▇██
val_loss,█▄▃▂▂▁▁▁▂▂

0,1
epoch,10.0
train_accuracy,0.43486
train_loss,1.61115
val_accuracy,0.36719
val_loss,1.91757


[34m[1mwandb[0m: Agent Starting Run: llmitigv with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.00033669101974018764


Epoch 1 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.89it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.87it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.91it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.92it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.68it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.46it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.97it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:41<00:00,  2.98it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▆▇██
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▃▅▆▆▇▇▇▇█
val_loss,█▆▄▃▂▁▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.44082
train_loss,1.59097
val_accuracy,0.39062
val_loss,1.82653


[34m[1mwandb[0m: Agent Starting Run: wkcj08ql with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: yes
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.00043381004726031233


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.88it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.47it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.98it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.41it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.87it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.86it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.54it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▅▅▆▆▇▇█
train_loss,█▇▆▅▅▄▃▃▂▁
val_accuracy,▁▂▄▅▆▆▇▇██
val_loss,█▇▅▄▃▃▂▃▁▂

0,1
epoch,10.0
train_accuracy,0.41238
train_loss,1.70425
val_accuracy,0.37012
val_loss,1.89108


[34m[1mwandb[0m: Agent Starting Run: a65kzgy2 with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 512
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006142400411324606


Epoch 1 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.52it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.68it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:40<00:00,  1.54it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.99it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.54it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:40<00:00,  1.54it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.95it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:40<00:00,  1.54it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.50it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.54it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▅▆▆▆▇▇█
train_loss,█▇▆▅▄▃▃▂▂▁
val_accuracy,▁▃▆▇▇▇▇███
val_loss,█▆▅▃▃▂▂▂▁▂

0,1
epoch,10.0
train_accuracy,0.4194
train_loss,1.65637
val_accuracy,0.36777
val_loss,1.89079


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b3dkkpg0 with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006030631204261661


Epoch 1 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.74it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.76it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.97it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:10<00:00,  3.00it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.54it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.99it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.46it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.85it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▂▅▆▆▇███▇
val_loss,█▆▄▃▄▂▁▁▃▃

0,1
epoch,10.0
train_accuracy,0.45112
train_loss,1.58924
val_accuracy,0.34766
val_loss,1.92909


[34m[1mwandb[0m: Agent Starting Run: kbvwcgmo with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: yes
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0005100329541681699


Epoch 1 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.82it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:40<00:00,  1.54it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.68it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.73it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.88it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.47it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.68it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.91it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▅▆▆▇▇██
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▄▄▅▆▇▇▆▇█
val_loss,█▇▆▄▃▃▂▂▂▁

0,1
epoch,10.0
train_accuracy,0.38354
train_loss,1.76887
val_accuracy,0.38418
val_loss,1.84194


[34m[1mwandb[0m: Agent Starting Run: ohp39vww with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: yes
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 512
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0005517140840889947


Epoch 1 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.76it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.50it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:11<00:00,  1.42it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.50it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.83it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.50it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.78it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:10<00:00,  1.49it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.50it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.66it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.90it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.52it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▇▆▅▄▃▃▂▁▁
val_accuracy,▁▃▆▆▆▇▇███
val_loss,█▆▄▃▃▂▁▁▂▁

0,1
epoch,10.0
train_accuracy,0.41147
train_loss,1.71447
val_accuracy,0.37852
val_loss,1.86969


[34m[1mwandb[0m: Agent Starting Run: 1d13tc06 with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006907383955474962


Epoch 1 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.93it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:44<00:00,  1.43it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.83it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.50it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.66it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.63it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:43<00:00,  1.44it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.91it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.66it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.50it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.66it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:44<00:00,  1.43it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇▇█
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▂▅▆▇▇▇███
val_loss,▇█▄▃▂▂▂▁▂▁

0,1
epoch,10.0
train_accuracy,0.41838
train_loss,1.67766
val_accuracy,0.36309
val_loss,1.85783


[34m[1mwandb[0m: Agent Starting Run: ful8vnfb with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0005138838360107809


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.86it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.40it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.96it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:44<00:00,  2.82it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.71it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.88it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.73it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▅▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▃▆▇▇▇█▇██
val_loss,█▆▄▂▁▁▁▁▁▂

0,1
epoch,10.0
train_accuracy,0.46755
train_loss,1.52867
val_accuracy,0.34473
val_loss,1.90975


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uljet5fc with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0005471292652182568


Epoch 1 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:10<00:00,  3.17it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.96it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.39it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:10<00:00,  2.92it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.71it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:44<00:00,  2.83it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.85it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:45<00:00,  2.77it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▃▅▆▇██▇█▇
val_loss,█▆▄▄▂▁▁▁▁▂

0,1
epoch,10.0
train_accuracy,0.44537
train_loss,1.59466
val_accuracy,0.3418
val_loss,1.88964


[34m[1mwandb[0m: Agent Starting Run: 3lrm588d with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006396728658935783


Epoch 1 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.48it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.68it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.95it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.48it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▃▅▅▇▇▇██▇
val_loss,█▇▅▄▃▂▂▁▂▂

0,1
epoch,10.0
train_accuracy,0.41281
train_loss,1.69506
val_accuracy,0.31992
val_loss,1.95242


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ih3rj70s with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 128
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.000491387996577819


Epoch 1 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:10<00:00,  3.09it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.91it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.84it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.94it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.60it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.93it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.84it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▄▅▆▇▇▇█
train_loss,█▇▆▆▄▄▃▂▂▁
val_accuracy,▁▂▄▅▆▆▇█▆█
val_loss,█▇▅▄▂▂▁▁▃▂

0,1
epoch,10.0
train_accuracy,0.40635
train_loss,1.71485
val_accuracy,0.37109
val_loss,1.90126


[34m[1mwandb[0m: Agent Starting Run: 9bziukmn with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 512
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006890923230562056


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.87it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.45it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.95it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:07<00:00,  4.01it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.92it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.63it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.86it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.37it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.69it/s]
Epoch 6 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.91it/s]
Epoch 6 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.30it/s]
Epoch 7 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.92it/s]
Epoch 7 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 8 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.93it/s]
Epoch 8 [Val]: 100%|███

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▅▆▆▇█
train_loss,█▇▆▅▅▄▄▃▂▁
val_accuracy,▁▃▆▆▆▇█▇█▆
val_loss,█▅▂▃▂▁▁▁▃▅

0,1
epoch,10.0
train_accuracy,0.52713
train_loss,1.37578
val_accuracy,0.32715
val_loss,2.05882


[34m[1mwandb[0m: Agent Starting Run: oq0pc2er with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.00045743269240618113


Epoch 1 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.95it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.48it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.77it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.72it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.69it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.86it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.50it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▅▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▃▅▅▆▆▇██▇
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.42179
train_loss,1.67746
val_accuracy,0.34629
val_loss,1.84317


[34m[1mwandb[0m: Agent Starting Run: m2zb5do0 with config:
[34m[1mwandb[0m: 	activation_fn: GELU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0005218821522417687


Epoch 1 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 1 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.95it/s]
Epoch 2 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 2 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.66it/s]
Epoch 3 [Train]: 100%|██████████| 63/63 [00:43<00:00,  1.46it/s]
Epoch 3 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 4 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 4 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.81it/s]
Epoch 5 [Train]: 100%|██████████| 63/63 [00:42<00:00,  1.50it/s]
Epoch 5 [Val]: 100%|██████████| 16/16 [00:08<00:00,  1.80it/s]
Epoch 6 [Train]: 100%|██████████| 63/63 [00:43<00:00,  1.46it/s]
Epoch 6 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.67it/s]
Epoch 7 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
Epoch 7 [Val]: 100%|██████████| 16/16 [00:09<00:00,  1.70it/s]
Epoch 8 [Train]: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
Epoch 8 [Val]: 100%|██████████| 16/16 [

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▅▆▆▇▇█
train_loss,█▇▆▅▄▄▃▃▂▁
val_accuracy,▁▃▄▅▅▇▆▇█▇
val_loss,█▆▅▄▃▂▁▁▂▂

0,1
epoch,10.0
train_accuracy,0.43172
train_loss,1.61158
val_accuracy,0.36543
val_loss,1.87764


[34m[1mwandb[0m: Agent Starting Run: 14wao7y8 with config:
[34m[1mwandb[0m: 	activation_fn: SiLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [64, 64, 64, 64, 64]
[34m[1mwandb[0m: 	data_augmentation: no
[34m[1mwandb[0m: 	data_dir: /content/inaturalist_12K
[34m[1mwandb[0m: 	dense_units: 256
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	kernel_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	lr: 0.0006717255301094951


Epoch 1 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.87it/s]
Epoch 1 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.38it/s]
Epoch 2 [Train]: 100%|██████████| 125/125 [00:41<00:00,  2.98it/s]
Epoch 2 [Val]: 100%|██████████| 32/32 [00:07<00:00,  4.01it/s]
Epoch 3 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 3 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.44it/s]
Epoch 4 [Train]: 100%|██████████| 125/125 [00:43<00:00,  2.89it/s]
Epoch 4 [Val]: 100%|██████████| 32/32 [00:09<00:00,  3.42it/s]
Epoch 5 [Train]: 100%|██████████| 125/125 [00:42<00:00,  2.96it/s]
Epoch 5 [Val]: 100%|██████████| 32/32 [00:08<00:00,  3.92it/s]
Epoch 6 [Train]:  34%|███▍      | 43/125 [00:14<00:33,  2.44it/s]

## **Best Model**

Selecting configuration of the best performing model to report accuracy of test data set

In [24]:
import wandb

api = wandb.Api()
sweep = api.sweep('me21b164-indian-institute-of-technology-madras/A2/sweeps/65nw10d9')
best_run = max(sweep.runs, key=lambda run: run.summary.get('val_accuracy', 0))
best_config = best_run.config


def load_data_with_test(data_dir, batch_size, val_split=0.2):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    full_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transform)
    targets = [label for _, label in full_dataset.imgs]

    splitter = StratifiedShuffleSplit(n_splits=1, test_size=val_split, random_state=42)
    train_idx, val_idx = next(splitter.split(full_dataset.imgs, targets))

    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader, test_dataset.classes


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def evaluate_on_test(config):


    model = FlexibleCNN(
        conv_filters=config['conv_filters'],
        kernel_sizes=config['kernel_sizes'],
        activation_fn_str=config['activation_fn'],
        dense_units=config['dense_units'],
        dropout_rate=config['dropout'],
        num_classes=10
    ).to(device)

    _, _, test_loader, class_names = load_data_with_test(config['data_dir'], config['batch_size'])

    model.eval()
    total_test_acc = 0
    predictions = []
    images = []
    labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch[0].to(device), batch[1].to(device)
            outputs = model(inputs)
            total_test_acc += accuracy(outputs, targets)

            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            images.extend(inputs.cpu())
            labels.extend(targets.cpu().numpy())

    test_acc = total_test_acc / len(test_loader)
    print(f"Test Accuracy: {test_acc:.4f}")

    return test_acc, images, predictions, labels, class_names


test_acc, test_images, test_preds, test_labels, class_names = evaluate_on_test(best_config)

Test Accuracy: 0.0977


## **Display (10x3) Grid**

In [None]:
import os
import torch
import random
import matplotlib.pyplot as plt
from PIL import Image
import wandb

transform = transforms.Compose([
    transforms.Resize((256,256)),

    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

# Define class labels
labels = ["Amphibia", "Animalia", "Arachnida", "Aves", "Fungi",
          "Insecta", "Mammalia", "Mollusca", "Plantae", "Reptilia"]

# Device selection
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#Best Model Configuration
model = FlexibleCNN(
    conv_filters=[64, 64, 64, 64, 64],
    kernel_sizes=[3, 3, 3, 3, 3],
    activation_fn_str='GELU',
    dense_units=256,
    dropout_rate=0.3,
    num_classes=10
).to(device)


# Transform for resizing, converting to tensor, and normalizing
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create a 10x3 grid for plotting
fig, axes = plt.subplots(nrows=10, ncols=3, figsize=(8, 16))
j = -1

# Loop through the grid and plot images and predictions
for i, ax in enumerate(axes.flat):
    if i % 3 == 0:
        j += 1

    # Load a random image from the respective class folder
    folder_path = f"/content/inaturalist_12K/train/{labels[j]}"
    file_list = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    random_file = random.choice(file_list)
    image_path = os.path.join(folder_path, random_file)

    # Load and transform the image
    image = Image.open(image_path).convert("RGB")
    image_t = transform(image).unsqueeze(0).to(device)  # Add batch dimension

    # Get model prediction
    with torch.no_grad():
        output = model(image_t)
        probs = torch.sigmoid(output).cpu().squeeze()  # Sigmoid for multi-label output

    # Set a threshold for predicting labels (e.g., 0.5)
    threshold = 0.5
    predicted_labels = [labels[idx] for idx, prob in enumerate(probs) if prob >= threshold]
    predicted_text = ", ".join(predicted_labels) if predicted_labels else "None"

    # Plot the image
    ax.imshow(image)
    ax.axis("off")
    ax.set_ylabel(f'Actual: {labels[j]}', fontsize=9)
    ax.set_title(f'Predicted: {predicted_text}', fontsize=8)

# Adjust layout and display the plot
plt.tight_layout()
wandb.log({"Prediction Grid": wandb.Image(plt)})  # Log to wandb