In [1]:
#Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

#Load and unzip iNaturalist zip file onto server, then remove zip to optimize performance
zip_path = "drive/MyDrive/nature_12K.zip"
!cp "{zip_path}" .
!unzip -q nature_12K.zip
!rm nature_12K.zip
print("Extracted the dataset");

Mounted at /content/drive
Extracted the dataset


In [2]:
!pip install wandb -qU
import wandb
wandb.login(key='2b25045507d6a89b66edf89be892f3687346ed10')

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.1/267.1 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
from glob import glob
import numpy as np
import time
from torch.optim.lr_scheduler import CosineAnnealingLR

class PreProcessDataset(Dataset):
    def __init__(self, root_dir, mode):
        self.root_dir = root_dir;
        self.mode = mode;
        self.prepareDataset();
        self.imageTransform();

    def prepareDataset(self):
        self.files = [];
        self.labels = [];
        self.classes = [];

        directory_path = os.path.join(self.root_dir, 'train');
        folders = sorted(os.listdir(directory_path));

        for i in folders:
          if i[0]!= ".":
            self.classes.append(i);
        print("Classes:",self.classes);
        if self.mode == 'train':
          for i in range(len(self.classes)):
              files = glob(os.path.join(directory_path, self.classes[i], '*.jpg'));
              self.labels += [i]*len(files);
              self.files += files;
        if self.mode == 'test':
          directory_path = os.path.join(self.root_dir, 'val');
          for i in range(len(self.classes)):
              files = glob(os.path.join(directory_path, self.classes[i], '*.jpg'));
              self.labels += [i]*len(files);
              self.files += files;

    def imageTransform(self):
        self.transform = transforms.Compose([
            transforms.RandomRotation(30),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.Resize((256,256)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]);

    def __getitem__(self, index):
        img = Image.open(self.files[index]).convert('RGB');
        img = self.transform(img);
        label = self.labels[index];
        label = torch.tensor(label, dtype = torch.long);
        return img, label;

    def __len__(self):
        return len(self.files);

def train(model, dataset, optimizer, criterion, device):
    epoch_loss = 0
    epoch_correct_predictions = 0
    epoch_total_samples = 0
    model.train();
    for i, (images,labels) in enumerate(dataset):
        #setting gradients to zero again to prevent any incorrect calculation
        optimizer.zero_grad();
        images, labels = images.to(device), labels.to(device);
        predicted = model(images);
        loss = criterion(predicted,labels);
        loss.backward();
        optimizer.step();

        _, predicted_labels = torch.max(predicted, 1);
        correct_predictions = (predicted_labels == labels).sum().item();
        batch_accuracy = correct_predictions / len(labels);

        epoch_loss += loss.item() * len(labels);
        epoch_correct_predictions += correct_predictions;
        epoch_total_samples += len(labels);


    epoch_loss /= epoch_total_samples;
    epoch_accuracy = epoch_correct_predictions / epoch_total_samples;

    return epoch_loss, epoch_accuracy;


def validate(model, dataset, criterion, device):

    epoch_loss = 0
    epoch_correct_predictions = 0
    epoch_total_samples = 0
    model.train();
    for i, (images,labels) in enumerate(dataset):
        images, labels = images.to(device), labels.to(device);
        predicted = model(images);
        loss = criterion(predicted,labels);

        _, predicted_labels = torch.max(predicted, 1);
        correct_predictions = (predicted_labels == labels).sum().item();
        batch_accuracy = correct_predictions / len(labels);


        epoch_loss += loss.item() * len(labels);
        epoch_correct_predictions += correct_predictions;
        epoch_total_samples += len(labels);


    epoch_loss /= epoch_total_samples;
    epoch_accuracy = epoch_correct_predictions / epoch_total_samples;

    return epoch_loss, epoch_accuracy;


def main():
    wandb.init(project='EE21S125_DL_A2_PartB');
    NN = wandb.config.nn;
    num_dense_neurons = wandb.config.num_dense_neurons;

    num_epochs = 15;
    dataset_directory = 'inaturalist_12K';
    batch_size = 128;

    mode = 'train';
    num_classes = 10;

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu');
    print("Device:", device);

    training_set = PreProcessDataset(dataset_directory,mode);
    train_size = int(0.8*len(training_set));
    validate_size = len(training_set) - train_size;

    train_dataset, validate_dataset = torch.utils.data.random_split(training_set, [train_size, validate_size]);

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=10);
    validate_loader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=True, num_workers=10);

    mode = 'test';
    test_dataset = PreProcessDataset(dataset_directory,mode);
    test_loader = DataLoader(test_dataset, batch_size, shuffle=True, num_workers = 10)


    # Load pre-trained ResNet50 model
    if NN == 'RESNET50':
      model = models.resnet50(pretrained=True)
    elif NN == 'GOOGLENET':
      model = models.googlenet(pretrained = True);

    # Freeze all layers except the last layer
    for param in model.parameters():
        param.requires_grad = False

    # Modify the last layer for the new dataset
    num_ftrs = model.fc.in_features;
    model.fc = nn.Sequential(
        torch.nn.Linear(num_ftrs, num_dense_neurons),
        torch.nn.Mish(),
        torch.nn.Dropout(0.3),
        torch.nn.Linear(num_dense_neurons, num_classes),
    );

    model = model.to(device);

    # Define loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss();
    optimizer = optim.NAdam(model.fc.parameters(), lr=0.001);
    anneal = CosineAnnealingLR(optimizer, T_max=num_epochs/2, eta_min = 0.0001);

    wandb.run.name = ("NN_"+str(NN)+"_dn_"+str(num_dense_neurons));

    for epoch in range(num_epochs):
      start_time = time.time();
      train_loss,train_accuracy = train(model,train_loader,optimizer,criterion,device);
      validate_loss,validate_accuracy = validate(model,validate_loader,criterion,device);
      anneal.step();
      end_time = time.time();
      elapsed_time = end_time - start_time;

      print("Epoch Time Taken:", elapsed_time);
      print("Training Accuracy:", train_accuracy," Training Loss:", train_loss, " Validation Accuracy:", validate_accuracy, " Validate Loss:", validate_loss," Epoch:", epoch +1);
      wandb.log({'train_loss': train_loss, 'train_accuracy':train_accuracy, 'validate_loss': validate_loss, 'validate_accuracy': validate_accuracy, 'epoch':epoch + 1});
    test_loss,test_accuracy = validate(model,test_loader,criterion,device);
    print("Test Accuracy:", test_accuracy, " test Loss:", test_loss);
    wandb.log({'test_loss':test_loss,'test_accuracy':test_accuracy});
    wandb.finish();
    print("COMPLETE");



if __name__ == "__main__":
   sweep_config = {
                    'method': 'grid',
                    'metric': {
                                'name': 'validate_accuracy',
                                'goal': 'maximize'
                              },
                    'parameters': {
                        'nn':{'values':['RESNET50','GOOGLENET']},
                        'num_dense_neurons':{'values': [128,256]},

                    }
                  }
   sweep_id = wandb.sweep(sweep= sweep_config, project='EE21S125_DL_A2_PartB');
   wandb.agent(sweep_id, function = main,count=4);
  #main();




Create sweep with ID: bu1o8u7v
Sweep URL: https://wandb.ai/ee21s125/EE21S125_DL_A2_PartB/sweeps/bu1o8u7v


[34m[1mwandb[0m: Agent Starting Run: oe4eydxg with config:
[34m[1mwandb[0m: 	nn: RESNET50
[34m[1mwandb[0m: 	num_dense_neurons: 256


Device: cuda
Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Epoch Time Taken: 221.68456768989563
Training Accuracy: 0.2965370671333917  Training Loss: 17.213359360218586  Validation Accuracy: 0.397  Validate Loss: 2.9658723335266113  Epoch: 1
Epoch Time Taken: 239.7825231552124
Training Accuracy: 0.5539442430303788  Training Loss: 1.3265563412627097  Validation Accuracy: 0.6175  Validate Loss: 1.1189222469329834  Epoch: 1
Epoch Time Taken: 240.39950442314148
Training Accuracy: 0.5799474934366796  Training Loss: 2.0226475320498425  Validation Accuracy: 0.4635  Validate Loss: 3.00400616645813  Epoch: 2
Epoch Time Taken: 231.05773258209229
Training Accuracy: 0.7048381047630954  Training Loss: 0.879474908258784  Validation Accuracy: 0.69  Validate Loss: 0.9395322566032409  Epoch: 2
Epoch Time Tak

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▄▅▆▆▇▆▇▇▇▇▇████████▇█▆█▂▇▇▇▇
train_loss,█▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁
validate_accuracy,▁▅▂▇▅▇▅▇▅▇▇███████████▅▇▇▇▇▇▆
validate_loss,█▂█▂▃▁▃▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▄▁▂▁▁▁▂

0,1
epoch,15.0
test_accuracy,0.7095
test_loss,0.87745
train_accuracy,0.73934
train_loss,0.77681
validate_accuracy,0.6805
validate_loss,0.96655


COMPLETE


[34m[1mwandb[0m: [32m[41mERROR[0m Run oe4eydxg errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.10/dist-packages/wandb/agents/pyagent.py", line 308, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-4-15e60fd3617e>", line 190, in main
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log({'train_loss': train_loss, 'train_accuracy':train_accuracy, 'validate_loss': validate_loss, 'validate_accuracy': validate_accuracy, 'epoch':epoch + 1});
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/lib/preinit.py", line 36, in preinit_wrapper
[34m[1mwandb[0m: [32m[41mERROR[0m     raise wandb.Error(f"You must call wandb.init() before {name}()")
[34m[1mwandb[0m: [32m[41mERROR[0m wandb.errors.Error: You must call wandb.init() before wandb.log()
[

Epoch Time Taken: 207.722727060318
Training Accuracy: 0.7818477309663708  Training Loss: 0.6633851076606095  Validation Accuracy: 0.7055  Validate Loss: 0.9170907068252564  Epoch: 15


[34m[1mwandb[0m: Agent Starting Run: dyk5meso with config:
[34m[1mwandb[0m: 	nn: GOOGLENET
[34m[1mwandb[0m: 	num_dense_neurons: 256


Device: cuda
Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 163MB/s]


Epoch Time Taken: 113.86201858520508
Training Accuracy: 0.5454431803975497  Training Loss: 1.4485390017011461  Validation Accuracy: 0.589  Validate Loss: 1.2342624940872193  Epoch: 1
Epoch Time Taken: 114.15801429748535
Training Accuracy: 0.6683335416927116  Training Loss: 1.0162424983896603  Validation Accuracy: 0.6805  Validate Loss: 1.0125036778450012  Epoch: 2
Epoch Time Taken: 113.78158593177795
Training Accuracy: 0.6895861982747844  Training Loss: 0.9418232068000308  Validation Accuracy: 0.6995  Validate Loss: 0.9585328998565674  Epoch: 3
Epoch Time Taken: 113.30530452728271
Training Accuracy: 0.716214526815852  Training Loss: 0.8869357986932457  Validation Accuracy: 0.693  Validate Loss: 0.9399560384750366  Epoch: 4
Epoch Time Taken: 113.93232488632202
Training Accuracy: 0.7188398549818728  Training Loss: 0.8484422627620838  Validation Accuracy: 0.685  Validate Loss: 0.9767585287094116  Epoch: 5
Epoch Time Taken: 113.56447672843933
Training Accuracy: 0.7213401675209401  Training

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▆▇▇▇███████▇█
train_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁
validate_accuracy,▁▆▇▇▆▇█▇█▇█▇▇▆▆
validate_loss,█▄▂▂▃▂▁▂▁▂▂▁▂▃▃

0,1
epoch,15.0
test_accuracy,0.6945
test_loss,0.91914
train_accuracy,0.72347
train_loss,0.82166
validate_accuracy,0.6815
validate_loss,0.97176


COMPLETE


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b522b9bbd60>> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe