In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler, random_split, ConcatDataset
from torchvision.datasets import ImageFolder
import wandb
from wandb.sdk.wandb_run import Run
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import numpy as np



# Check if CUDA (GPU) is available, and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Set up data transformations
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Load the dataset
train_data = ImageFolder('/kaggle/input/dataset2/inaturalist_12K/train', transform=train_transforms)
test_data = ImageFolder('/kaggle/input/dataset2/inaturalist_12K/val', transform=test_transforms)


In [2]:
# Count the number of samples in each class
class_counts = {}
pbar = tqdm(total=len(train_data))
for _, label in train_data:
    if label not in class_counts:
        class_counts[label] = 0
    class_counts[label] += 1
    pbar.set_postfix()
    pbar.update(1)

pbar.close()

# Calculate the number of samples per class for validation set
val_size_per_class = {label: int(count * 0.2) for label, count in class_counts.items()}

# Initialize lists to hold indices for train and validation sets
train_indices = []
val_indices = []

# Iterate through the dataset and assign samples to train or validation set
pbar = tqdm(total=len(train_data))
for idx, (_, label) in enumerate(train_data):
    if val_size_per_class[label] > 0:
        val_indices.append(idx)
        val_size_per_class[label] -= 1
    else:
        train_indices.append(idx)
    pbar.set_postfix()
    pbar.update(1)

pbar.close()

# Create SubsetRandomSampler for train and validation sets
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
    

  0%|          | 0/9999 [00:00<?, ?it/s]

  0%|          | 0/9999 [00:00<?, ?it/s]

In [7]:
# Function to calculate accuracy
def calculate_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    accuracy = correct / labels.size(0)
    return accuracy

In [8]:
# Training loop
def training_model(epochs, optimizer, criterion, model, train_loader, val_loader):
    for epoch in range(epochs):
        model.train()
        training_loss = 0.0
        train_accuracy = 0.0
        pbar = tqdm(total=len(train_loader), desc=f'Epoch {epoch+1}/{epochs}')
        for images, labels in train_loader:
            optimizer.zero_grad()
            images, labels = images.to(device), labels.to(device) 
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            training_loss += loss.item()
            train_accuracy += calculate_accuracy(outputs, labels)
            pbar.set_postfix({'Train Loss': training_loss / (pbar.n + 1), 'Train Acc': train_accuracy / (pbar.n + 1)})
            pbar.update(1)

        pbar.close()


        model.eval()
        val_loss = 0.0
        val_accuracy = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device) 
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()
                val_accuracy += calculate_accuracy(outputs, labels)

        train_accuracy /= len(train_loader)
        training_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_accuracy /= len(val_loader)
        print(f'Epoch {epoch+1}/{epochs}, Train_Loss: {training_loss:.4f},  Train_Acc: {train_accuracy:.4f},  Val_Loss: {val_loss:.4f},  Val_Accuracy: {val_accuracy:.4f}')
        wandb.log({"epoch": epoch+1, "train_loss": training_loss, "val_loss": val_loss, "val_accuracy": val_accuracy, "train_accuracy": train_accuracy})
    return model



In [9]:
sweep_config = {
    'method': 'bayes',  # Random search method
    'metric': {'goal': 'maximize', 'name': 'val_accuracy'},  # Metric to optimize
    'parameters': {
        'epochs': {'values':[5, 10]},
        'batch_size': {'values':[32, 64]},
        'num_filters': {'values': [32, 64, 128]},
        'activation': {'values': ['ReLU', 'GELU', 'SiLU', 'Mish']},
        'filter_organization': {'values': ['same', 'double', 'halve']},
        'data_augmentation': {'values': [True, False]},
        'batch_norm': {'values': [True, False]},
        'dropout': {'values': [0.2, 0.3]},
        'strategy':{'values':[1, 2, 3]}
    }
}

In [10]:
def apply_additional_transforms(loader, additional_transforms, batch_size):
    transformed_dataset = []
    original_dataset = []
    pbar = tqdm(total=len(loader))
    for images, labels in loader:
        images1 = additional_transforms(images)
        for i in range(batch_size):
            original_dataset.append((images[i], labels[i]))
            transformed_dataset.append((images1[i], labels[i]))
        pbar.set_postfix()
        pbar.update(1)

    pbar.close()
    return original_dataset, transformed_dataset



In [11]:
def augment_data(data_augmentation, train_loader, batch_size):
    if data_augmentation:
        additional_transforms = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
            transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        ])

        # Apply additional transformations to the new DataLoader
        original_dataset, transformed_dataset = apply_additional_transforms(train_loader, additional_transforms, batch_size)
        combined_dataset = ConcatDataset([original_dataset, transformed_dataset])

        # Create a new DataLoader using the combined dataset
        combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True)
    else:
        combined_loader = train_loader
    return combined_loader

In [12]:
def train_CNN(num_filters, activation, filter_organization, data_augmentation, batch_norm, dropout, batch_size, epochs, strategy):
    
    # Load pre-trained model (ResNet50)
    model = torchvision.models.resnet50(pretrained=True)
    model.to(device)
    if strategy == 1:
    # Strategy 1: Freeze all layers except the last layer
        for param in model.parameters():
            param.requires_grad = False
        
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 101)  # 101 classes in iNaturalist
    elif strategy == 2:
    # Strategy 2: Freeze layers up to a certain depth
    # Freeze layers up to layer 4
        for name, param in model.named_parameters():
            if 'layer4' not in name:  # Freeze layers up to layer 4
                param.requires_grad = False
    else:
    # Strategy 3: Layer-wise fine-tuning
        for param in model.layer4.parameters():
            param.requires_grad = True
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Create DataLoader instances for train and validation sets using the samplers
    train_loader = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(train_data, batch_size=batch_size, sampler=val_sampler)

    combined_loader = augment_data(data_augmentation, train_loader, batch_size)
    
    model = training_model(epochs, optimizer, criterion, model, combined_loader, val_loader)
    return model

In [None]:
wandb.login(key = "1d2c93cf7ddd48a63114848b66796301171827b6")
sweep_id = wandb.sweep(sweep_config, project='DL-Assignment-2')

# Define your training function
def train():
    
    # Initialize Wandb run with custom run name
    with wandb.init() as run:
        
        # Use wandb.config to access hyperparameters in your training script
        config = wandb.config
        num_filters = config['num_filters']
        activation = config['activation']
        filter_organization = config['filter_organization']
        data_augmentation = config['data_augmentation']
        batch_norm = config['batch_norm']
        batch_size = config['batch_size']
        epochs = config['epochs']
        dropout = config['dropout']
        strategy = config['strategy']
        # Generate a custom run name based on hyperparameters
        run_name = "Part-B_" + "epochs_" + str(epochs) + "_nFilters_" + str(num_filters) + "_activation_" + str(activation)+ "_filterOrg_" + str(filter_organization) + "_batchSize_" + str(batch_size)
        wandb.run.name = run_name
        
        model = train_CNN(num_filters, activation, filter_organization, data_augmentation, batch_norm, dropout, batch_size, epochs, strategy)

        # Test the model
        test_loader = DataLoader(test_data, batch_size=batch_size)
        model.eval()

        test_accuracy = 0.0
        pbar = tqdm(total=len(test_loader))
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device) 
                outputs = model(images)
                images.to("cpu")
                labels.to("cpu")
                for i in range(len(images)):
                    image = images[i]
                    label = labels[i]
                    output = outputs[i].argmax(dim = 0)
                    if (label == output):
                        test_accuracy += 1
                pbar.set_postfix()
                pbar.update(1)

        pbar.close()
        wandb.login(key = "1d2c93cf7ddd48a63114848b66796301171827b6")
        with wandb.init( project='DL-Assignment-2') as run:      
            run_name = "test_accuracy - Part B"
            wandb.run.name = run_name
            test_accuracy /= len(test_data)
            print(test_accuracy)
            wandb.log({"test_accuracy": test_accuracy})

        wandb.finish()
        
# Run the sweep
wandb.agent(sweep_id, function=train, count=20)
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mcs23m047[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: p5xs97w0
Sweep URL: https://wandb.ai/cs23m047/DL-Assignment-2/sweeps/p5xs97w0


[34m[1mwandb[0m: Agent Starting Run: cd271bxo with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_organization: halve
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	strategy: 3




  0%|          | 0/125 [00:00<?, ?it/s]



Epoch 1/5:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 1/5, Train_Loss: 2.0441,  Train_Acc: 0.3026,  Val_Loss: 1.9413,  Val_Accuracy: 0.3511


Epoch 2/5:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 2/5, Train_Loss: 1.6119,  Train_Acc: 0.4365,  Val_Loss: 1.4645,  Val_Accuracy: 0.4911


Epoch 3/5:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 3/5, Train_Loss: 1.3861,  Train_Acc: 0.5230,  Val_Loss: 1.5204,  Val_Accuracy: 0.4693


Epoch 4/5:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 4/5, Train_Loss: 1.1994,  Train_Acc: 0.5891,  Val_Loss: 1.3130,  Val_Accuracy: 0.5527


Epoch 5/5:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 5/5, Train_Loss: 1.0258,  Train_Acc: 0.6460,  Val_Loss: 1.6516,  Val_Accuracy: 0.4918


  0%|          | 0/32 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▄▅▇█
train_loss,█▅▃▂▁
val_accuracy,▁▆▅█▆
val_loss,█▃▃▁▅

0,1
epoch,5.0
train_accuracy,0.646
train_loss,1.02576
val_accuracy,0.49176
val_loss,1.65159


0.5045


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
test_accuracy,▁

0,1
test_accuracy,0.5045


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o2t124o8 with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	strategy: 2


  0%|          | 0/125 [00:00<?, ?it/s]

Epoch 1/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 1/10, Train_Loss: 1.6615,  Train_Acc: 0.5537,  Val_Loss: 1.0253,  Val_Accuracy: 0.6868


Epoch 2/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 2/10, Train_Loss: 0.9140,  Train_Acc: 0.6974,  Val_Loss: 0.9145,  Val_Accuracy: 0.7340


Epoch 3/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 3/10, Train_Loss: 0.6308,  Train_Acc: 0.7919,  Val_Loss: 0.9802,  Val_Accuracy: 0.7193


Epoch 4/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 4/10, Train_Loss: 0.4097,  Train_Acc: 0.8644,  Val_Loss: 1.1029,  Val_Accuracy: 0.7094


Epoch 5/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 5/10, Train_Loss: 0.2563,  Train_Acc: 0.9157,  Val_Loss: 1.1250,  Val_Accuracy: 0.7374


Epoch 6/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 6/10, Train_Loss: 0.1820,  Train_Acc: 0.9389,  Val_Loss: 1.2091,  Val_Accuracy: 0.7137


Epoch 7/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 7/10, Train_Loss: 0.1182,  Train_Acc: 0.9622,  Val_Loss: 1.3766,  Val_Accuracy: 0.7071


Epoch 8/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 8/10, Train_Loss: 0.1007,  Train_Acc: 0.9664,  Val_Loss: 1.4721,  Val_Accuracy: 0.7144


Epoch 9/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 9/10, Train_Loss: 0.0889,  Train_Acc: 0.9710,  Val_Loss: 1.3653,  Val_Accuracy: 0.7113


Epoch 10/10:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 10/10, Train_Loss: 0.0691,  Train_Acc: 0.9770,  Val_Loss: 1.4000,  Val_Accuracy: 0.7230


  0%|          | 0/32 [00:00<?, ?it/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▇▇████
train_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁█▅▄█▅▄▅▄▆
val_loss,▂▁▂▃▄▅▇█▇▇

0,1
epoch,10.0
train_accuracy,0.977
train_loss,0.06908
val_accuracy,0.72301
val_loss,1.40004


0.7205


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
test_accuracy,▁

0,1
test_accuracy,0.7205


[34m[1mwandb[0m: Agent Starting Run: 9kr8o6nv with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: halve
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	strategy: 1


Epoch 1/10:   0%|          | 0/250 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/tmp/ipykernel_176/2050151741.py", line 25, in train
    model = train_CNN(num_filters, activation, filter_organization, data_augmentation, batch_norm, dropout, batch_size, epochs, strategy)
  File "/tmp/ipykernel_176/1325316421.py", line 34, in train_CNN
    model = training_model(epochs, optimizer, criterion, model, combined_loader, val_loader)
  File "/tmp/ipykernel_176/1012289808.py", line 11, in training_model
    outputs = model(images)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torchvision/models/resnet.py", line 285, in forward
    return self._forward_impl(x)
  File "/opt/conda/lib/python3.10/site-packages/torchvision/models/resnet.py", l

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 9kr8o6nv errored:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/tmp/ipykernel_176/2050151741.py", line 25, in train
    model = train_CNN(num_filters, activation, filter_organization, data_augmentation, batch_norm, dropout, batch_size, epochs, strategy)
  File "/tmp/ipykernel_176/1325316421.py", line 34, in train_CNN
    model = training_model(epochs, optimizer, criterion, model, combined_loader, val_loader)
  File "/tmp/ipykernel_176/1012289808.py", line 11, in training_model
    outputs = model(images)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torchvision/models/resnet.

  0%|          | 0/250 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/500 [00:00<?, ?it/s]

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))