In [1]:
!pip install wandb



In [2]:
import wandb
key = input('Enter your API:')
wandb.login(key=key)

Enter your API: 580e769ee2f34eafdded556ce52aaf31c265ad3b


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23m011[0m ([33mma23m011-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import wandb

#  # Sweep configuration for hyperparameter tuning
sweep_config = {
    'name': 'resnet50_hyperparam_sweep-3',# Sweep name
    'method': 'bayes',  # Use Bayesian optimization
    'metric': {
        'name': 'val_acc', # Optimize for validation accuracy
        'goal': 'maximize' # Maximize the metric
    },
    'parameters': {
        'batch_size': {
            'values': [16, 32, 64, 128]  # different batch sizes
        },
        'freeze_up_to': {'values': [0, 3, 5, 7]}, # Freeze first few layers
        'epochs': {
            'values': [5,7, 10,12]  # different epoch values
        },
        'learning_rate': {
            'values': [0.1, 0.01, 0.001]  # different learning rates
        }
    }
}

#  Create Sweep
sweep_id = wandb.sweep(sweep_config, project="DL_A2")

# # Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  Training Function
def train():
    # Initialize W&B run
    with wandb.init() as run:
        config = run.config

        #  Transforms
        transform = transforms.Compose([
            transforms.Resize((224, 224)), # Resize image
            transforms.RandomResizedCrop(224),# Random crop
            transforms.RandomHorizontalFlip(),# Random horizontal flip
            transforms.ToTensor(),  # Convert to tensor
            transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize
                                 std=[0.229, 0.224, 0.225])
        ])

        #  Load Data
        train_data = ImageFolder('/kaggle/input/inaturalist-12/inaturalist_12K/train', transform=transform)
        val_data = ImageFolder('/kaggle/input/inaturalist-12/inaturalist_12K/val', transform=transform)

        # Create data loaders
        train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=config.batch_size)

        dataloaders = {'train': train_loader, 'val': val_loader} # Store loaders 
        dataset_sizes = {'train': len(train_data), 'val': len(val_data)} # Store dataset sizes

        # Load pretrained ResNet50 model
        model = models.resnet50(pretrained=True)
        num_ftrs = model.fc.in_features # Get number of input features to final layer
        model.fc = nn.Linear(num_ftrs, 10) # Replace final layer for 10 classes

        # Freeze layers up to a certain index
        child_counter = 0
        for child in model.children():
            if child_counter < config.freeze_up_to:
                for param in child.parameters():
                    param.requires_grad = False  #freeze
            else:
                for param in child.parameters():
                    param.requires_grad = True  # Unfreeze
            child_counter += 1
        model=model.to(device)  # Move model to device

        #for param in model.parameters():
            #param.requires_grad = False

        #for param in model.fc.parameters():
            #param.requires_grad = True
        #model = model.to(device)

        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss() # Define loss function
        optimizer = optim.Adam(model.fc.parameters(), lr=config.learning_rate) # Define Optimizer
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # Learning rate scheduler

        # Training Loop
        for epoch in range(config.epochs):
            model.train()  # Set model to training mode
            running_loss = 0.0 # initialize running loss
            running_corrects = 0 #initialize running correct prediction

            for inputs, labels in dataloaders['train']:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad() # Clear gradients
                outputs = model(inputs) # Forward pass
                _, preds = torch.max(outputs, 1) # Get predictions
                loss = criterion(outputs, labels) # Compute loss
                loss.backward() # Backpropagation
                optimizer.step() # Update weights

                running_loss += loss.item() * inputs.size(0) # Total loss
                running_corrects += torch.sum(preds == labels.data) # Count correct

            epoch_loss = running_loss / dataset_sizes['train'] # Calculate average loss  for epoch
            epoch_acc = (running_corrects.double() / dataset_sizes['train']) * 100 # Calculate  accuracy for epoch
            
            # wandb log results
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": epoch_loss,
                "train_acc": epoch_acc
            })
            scheduler.step() # Step the scheduler

        # Validation
        model.eval() # Set model to eval mode
        running_corrects = 0
        with torch.no_grad():
            for inputs, labels in dataloaders['val']:
                # selecting device for speed
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs) # Forward pass
                _, preds = torch.max(outputs, 1) # Get predictions
                running_corrects += torch.sum(preds == labels.data) # Count correct

        val_acc = (running_corrects.double() / dataset_sizes['val']) * 100 # Compute validation accuracy
        wandb.log({"val_acc": val_acc})  # Log validation accuracy to wandb
        print(f"[{run.name}] Validation Accuracy: {val_acc:.2f}%") # Print result

# Sweep Agent (This runs experiments)
wandb.agent(sweep_id, function=train, count=50)


Create sweep with ID: tfufum48
Sweep URL: https://wandb.ai/ma23m011-iit-madras/DL_A2/sweeps/tfufum48


[34m[1mwandb[0m: Agent Starting Run: 6nu45mpx with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	freeze_up_to: 7
[34m[1mwandb[0m: 	learning_rate: 0.001


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 219MB/s]


[earthy-sweep-1] Validation Accuracy: 76.25%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▆▇▇▇▇███
train_loss,█▃▃▂▂▂▂▁▁▁
val_acc,▁

0,1
epoch,10.0
train_acc,75.9576
train_loss,0.71335
val_acc,76.25


[34m[1mwandb[0m: Agent Starting Run: 7kpf2sb9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001




[apricot-sweep-2] Validation Accuracy: 74.80%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▆▇▆▇███
train_loss,█▃▃▃▂▂▂▁▁▁
val_acc,▁

0,1
epoch,10.0
train_acc,76.18762
train_loss,0.71535
val_acc,74.8


[34m[1mwandb[0m: Agent Starting Run: fep9a5d6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	freeze_up_to: 5
[34m[1mwandb[0m: 	learning_rate: 0.01


[jolly-sweep-3] Validation Accuracy: 63.70%


0,1
epoch,▁▃▅▆█
train_acc,▁▆▇▆█
train_loss,█▂▁▄▂
val_acc,▁

0,1
epoch,5.0
train_acc,69.10691
train_loss,1.25452
val_acc,63.7


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y01nibca with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	freeze_up_to: 3
[34m[1mwandb[0m: 	learning_rate: 0.001


[divine-sweep-4] Validation Accuracy: 75.35%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▆▇▇▇▇███
train_loss,█▃▂▂▂▂▂▁▁▁
val_acc,▁

0,1
epoch,10.0
train_acc,75.79758
train_loss,0.71632
val_acc,75.35


[34m[1mwandb[0m: Agent Starting Run: d5ja6seg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 7
[34m[1mwandb[0m: 	learning_rate: 0.001


[easy-sweep-5] Validation Accuracy: 74.90%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▆▆▇▇▇▇█████
train_loss,█▃▂▂▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.69757
train_loss,0.72163
val_acc,74.9


[34m[1mwandb[0m: Agent Starting Run: 1eba5cj4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[eternal-sweep-6] Validation Accuracy: 76.30%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▆▇▇▇▇▇█████
train_loss,█▃▂▂▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,76.0376
train_loss,0.72732
val_acc,76.3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l0uxeak0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[silvery-sweep-7] Validation Accuracy: 76.65%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▆▇▇▇▇█████
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,76.16762
train_loss,0.71609
val_acc,76.65


[34m[1mwandb[0m: Agent Starting Run: y5g5rz1b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[light-sweep-8] Validation Accuracy: 75.65%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▆▆▇▇▇▇█████
train_loss,█▃▃▂▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,76.18762
train_loss,0.72043
val_acc,75.65


[34m[1mwandb[0m: Agent Starting Run: 9ha6rs43 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.1


[glowing-sweep-9] Validation Accuracy: 71.80%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▃▄▄▄▄▅▇████
train_loss,▆▆▇▇██▇▃▂▂▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,70.12701
train_loss,10.29507
val_acc,71.8


[34m[1mwandb[0m: Agent Starting Run: akxvgdaa with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[olive-sweep-10] Validation Accuracy: 75.30%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▆▆▇▆▇▇█████
train_loss,█▃▂▂▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,76.0176
train_loss,0.71428
val_acc,75.3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rabjwauy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[zany-sweep-11] Validation Accuracy: 75.00%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇▇▇███
train_loss,█▃▂▂▂▂▂▁▁▁
val_acc,▁

0,1
epoch,10.0
train_acc,76.55766
train_loss,0.71625
val_acc,75.0


[34m[1mwandb[0m: Agent Starting Run: pq1jhzsi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[hopeful-sweep-12] Validation Accuracy: 77.50%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▄▅▅▆▆▆▇████
train_loss,█▄▄▃▃▃▃▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.31753
train_loss,0.72994
val_acc,77.5


[34m[1mwandb[0m: Agent Starting Run: 9blf40k8 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 3
[34m[1mwandb[0m: 	learning_rate: 0.001


[rose-sweep-13] Validation Accuracy: 74.65%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▅▆▆▆▆█████
train_loss,█▄▄▄▃▄▃▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,72.49725
train_loss,0.81965
val_acc,74.65


[34m[1mwandb[0m: Agent Starting Run: lofveacd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[sweepy-sweep-14] Validation Accuracy: 74.00%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▆▆▆▇▇█████
train_loss,█▃▃▃▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,76.0376
train_loss,0.71476
val_acc,74.0


[34m[1mwandb[0m: Agent Starting Run: je2idijt with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[lucky-sweep-15] Validation Accuracy: 76.10%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▅▅▆▆▆█████
train_loss,█▄▄▄▄▄▃▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,73.06731
train_loss,0.80932
val_acc,76.1


[34m[1mwandb[0m: Agent Starting Run: mjyzn3ys with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 3
[34m[1mwandb[0m: 	learning_rate: 0.001


[logical-sweep-16] Validation Accuracy: 77.60%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▆▆▆▆▇█████
train_loss,█▄▃▃▃▃▃▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.33753
train_loss,0.74982
val_acc,77.6


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i0a3jb23 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 5
[34m[1mwandb[0m: 	learning_rate: 0.001


[breezy-sweep-17] Validation Accuracy: 75.45%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▆▆▇▇▇█████
train_loss,█▃▃▃▂▂▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.70757
train_loss,0.7143
val_acc,75.45


[34m[1mwandb[0m: Agent Starting Run: cm2j3wqq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 3
[34m[1mwandb[0m: 	learning_rate: 0.001


[fine-sweep-18] Validation Accuracy: 75.05%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▆▆▆▆▆█▇█▇█
train_loss,█▄▄▃▃▃▃▁▂▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.86759
train_loss,0.7306
val_acc,75.05


[34m[1mwandb[0m: Agent Starting Run: bvsh5u0g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 12
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001


[chocolate-sweep-19] Validation Accuracy: 75.90%


0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_acc,▁▅▅▅▆▆▆▇▇▇█▇
train_loss,█▄▃▃▃▃▂▁▁▁▁▁
val_acc,▁

0,1
epoch,12.0
train_acc,75.08751
train_loss,0.74159
val_acc,75.9


[34m[1mwandb[0m: Agent Starting Run: rzpzqgf3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	freeze_up_to: 0
[34m[1mwandb[0m: 	learning_rate: 0.001
