## **GNR 638:** Machine Learning for Remote Sensing-II
### **Mini Project-1:** Fine grained classification on CUB-200-2011 dataset
> The task is to train a CNN model with an upper limit of 10M parameters to do fine grained classification on CUB-200-2011 dataset. 

### Collaborators: 
[![Munish](https://img.shields.io/badge/22M2153-Munish_Monga-blue)](https://github.com/munish30monga)
[![Sachin](https://img.shields.io/badge/22M2162-Sachin_Giroh-darkgreen)](https://github.com/22M2159)

### Table of Contents:
1. [Introduction](#introduction)
2. [Imporing Libraries](#imporing-libraries)
3. [Hyperparameters](#hyperparameters)
4. [Downloading and Processing CUB Dataset](#downloading-and-processing-cub-dataset)
5. [Preparing the Model](#preparing-the-model)
6. [Training Loop](#training-loop)
7. [Plotting Loss and Accuracy](#plotting-loss-and-accuracy)
8.  [References:](#references)

### Introduction

### Imporing Libraries

In [53]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from torchvision.datasets.utils import download_url
from pathlib import Path
import numpy as np
from prettytable import PrettyTable
import torch
import torch.nn as nn
import timm
import random
from thop import clever_format
from torchsummary import summary
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

### Hyperparameters

In [54]:
dataset_dir = Path('./datasets/cub')
batch_size = 32
num_workers = 8
optimizer = 'Adam' 
random_seed = 42                                                            # for reproducibility
random.seed(random_seed)                                                    # set random seed
epochs = 20                                                                 # number of epochs to train
learning_rate = 0.01                                                        # learning rate for optimizer
base_model = "resnet18"                                                     # base model for Siamese network
accelerator = 'gpu'
devices = 1
optimizer_type = 'Adam'                                                     # optimizer type
learning_rate = 0.001                                                       # learning rate
weight_decay = 0.0001                                                       # weight decay

### Downloading and Processing CUB-200-2011 Dataset <a id="downloading-and-processing-cub-dataset"></a>

In [55]:
# Uncomment & Run only once for downloading data
# !bash down_process_CUB.sh

### CUB-200-2011 Dataloader

In [56]:
class CUB_Dataset(Dataset):
    def __init__(self, dataset_dir, split='train', transform=None, split_ratio=0.2):
        self.dataset_dir = Path(dataset_dir)
        self.transform = transform
        self.split = split
        self.split_ratio = split_ratio
        self.target2class_dict = {}
        self._load_metadata()
    
    def _load_metadata(self):
        images = pd.read_csv(self.dataset_dir / 'CUB_200_2011' / 'images.txt', sep=' ', names=['img_id', 'filepath'])
        image_class_labels = pd.read_csv(self.dataset_dir / 'CUB_200_2011' / 'image_class_labels.txt', sep=' ', names=['img_id', 'target'])
        train_test_split = pd.read_csv(self.dataset_dir / 'CUB_200_2011' / 'train_test_split.txt', sep=' ', names=['img_id', 'is_training_img'])
        classes = pd.read_csv(self.dataset_dir / 'CUB_200_2011' / 'classes.txt', sep=' ', names=['class_id', 'class_name'], index_col=False)
        self.target2class_dict = pd.Series(classes.class_name.values, index=classes.class_id).to_dict()

        data = images.merge(image_class_labels, on='img_id')
        data = data.merge(train_test_split, on='img_id')

        if self.split == 'train' or self.split == 'val':
            train_data = data[data.is_training_img == 1]
            num_train = len(train_data)
            indices = np.arange(num_train)
            np.random.shuffle(indices)
            split = int(np.floor(self.split_ratio * num_train))
            if self.split == 'train':
                self.data = train_data.iloc[indices[split:]]
            else:  # 'val'
                self.data = train_data.iloc[indices[:split]]
        else:
            self.data = data[data.is_training_img == 0]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data.iloc[idx]
        path = self.dataset_dir / 'CUB_200_2011' / 'images' / sample.filepath
        target = sample.target - 1  # Targets start at 1 by default, so shift to 0
        img = Image.open(path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)

        return img, target

In [57]:
class CUB_DataModule(pl.LightningDataModule):
    def __init__(self, dataset_dir, batch_size=32, num_workers=8):
        super().__init__()
        self.dataset_dir = Path(dataset_dir)
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def setup(self, stage=None):
        if stage in ('fit', 'validate', None):
            self.train_dataset = CUB_Dataset(self.dataset_dir, split='train', transform=self.transform)
            self.val_dataset = CUB_Dataset(self.dataset_dir, split='val', transform=self.transform)
        if stage in ('test', None):
            self.test_dataset = CUB_Dataset(self.dataset_dir, split='test', transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)

### Dataset Summary

In [58]:
def dataset_summary(dataset_dir):
    print('=> Dataset Summary:')
    # Initialize datasets to load their metadata  
    train_dataset = CUB_Dataset(dataset_dir, split='train')
    val_dataset = CUB_Dataset(dataset_dir, split='val')
    test_dataset = CUB_Dataset(dataset_dir, split='test')

    # Calculate number of samples for each split
    num_samples_train = len(train_dataset)
    num_samples_val = len(val_dataset)
    num_samples_test = len(test_dataset)
    total_samples = num_samples_train + num_samples_val + num_samples_test
    
    # Create and fill the table
    table = PrettyTable()
    table.field_names = ["Split", "Number of Samples", "Percentage"]
    
    # Calculate and add the percentage for each split
    percentage_train = (num_samples_train / total_samples) * 100
    percentage_val = (num_samples_val / total_samples) * 100
    percentage_test = (num_samples_test / total_samples) * 100
    
    table.add_row(["Train", num_samples_train, f"{percentage_train:.2f}%"])
    table.add_row(["Validation", num_samples_val, f"{percentage_val:.2f}%"])
    table.add_row(["Test", num_samples_test, f"{percentage_test:.2f}%"])
    
    print(table)
    
    num_classes = len(set(train_dataset.data['target']))
    print(f"Number of classes: {num_classes}")
    
    dataset_summary_dict = {
        'train_dataset': train_dataset,
        'val_dataset':val_dataset,
        'test_dataset':test_dataset,
        'num_classes':num_classes
    }
    return dataset_summary_dict

In [59]:
dataset_summary_dict = dataset_summary(dataset_dir)
data_module = CUB_DataModule(dataset_dir, batch_size, num_workers)
data_module.setup()

=> Dataset Summary:
+------------+-------------------+------------+
|   Split    | Number of Samples | Percentage |
+------------+-------------------+------------+
|   Train    |        4796       |   40.69%   |
| Validation |        1198       |   10.16%   |
|    Test    |        5794       |   49.15%   |
+------------+-------------------+------------+
Number of classes: 200


In [60]:
class FlexibleCNN(pl.LightningModule):
    def __init__(self, base_model_name, num_classes, optimizer_type, learning_rate=1e-3, weight_decay=0.0):
        super().__init__()
        self.save_hyperparameters()  
        self.base_model = timm.create_model(base_model_name, pretrained=True)
        self.embedding_size = self.base_model.num_features  # Get the number of features (embedding size) from the base model
        self.base_model = nn.Sequential(*list(self.base_model.children())[:-1]) # Remove the classification head
        self.learning_rate = learning_rate
        self.criterion = nn.CrossEntropyLoss()
        
        self.projection = nn.Sequential(
            nn.Linear(self.embedding_size, num_classes),            # Linear layer
            nn.BatchNorm1d(num_classes)) 

    def forward(self, x):
        x = self.base_model(x)
        x = x.view(x.size()[0], -1)
        x = self.projection(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.tensor(torch.sum(preds == y).item() / len(preds), device=self.device)
        self.log('val_loss', loss, prog_bar=True, logger=True)
        self.log('val_acc', acc, prog_bar=True, logger=True)
        return {'val_loss': loss, 'val_acc': acc}

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.tensor(torch.sum(preds == y).item() / len(preds), device=self.device)
        self.log('test_loss', loss, prog_bar=True, logger=True)
        self.log('test_acc', acc, prog_bar=True, logger=True)
        return {'test_loss': loss, 'test_acc': acc}

    def configure_optimizers(self):
        optimizer = {
            'Adam': torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.hparams.weight_decay),
            'SGD': torch.optim.SGD(self.parameters(), lr=self.learning_rate, weight_decay=self.hparams.weight_decay)
        }[self.hparams.optimizer_type]
        return optimizer

In [61]:
num_classes = dataset_summary_dict['num_classes']
base_model = 'resnet18'
model = FlexibleCNN(base_model, num_classes, optimizer_type, learning_rate, weight_decay)

In [62]:
timm.list_models(pretrained=True)

['bat_resnext26ts.ch_in1k',
 'beit_base_patch16_224.in22k_ft_in22k',
 'beit_base_patch16_224.in22k_ft_in22k_in1k',
 'beit_base_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_224.in22k_ft_in22k',
 'beit_large_patch16_224.in22k_ft_in22k_in1k',
 'beit_large_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_512.in22k_ft_in22k_in1k',
 'beitv2_base_patch16_224.in1k_ft_in1k',
 'beitv2_base_patch16_224.in1k_ft_in22k',
 'beitv2_base_patch16_224.in1k_ft_in22k_in1k',
 'beitv2_large_patch16_224.in1k_ft_in1k',
 'beitv2_large_patch16_224.in1k_ft_in22k',
 'beitv2_large_patch16_224.in1k_ft_in22k_in1k',
 'botnet26t_256.c1_in1k',
 'caformer_b36.sail_in1k',
 'caformer_b36.sail_in1k_384',
 'caformer_b36.sail_in22k',
 'caformer_b36.sail_in22k_ft_in1k',
 'caformer_b36.sail_in22k_ft_in1k_384',
 'caformer_m36.sail_in1k',
 'caformer_m36.sail_in1k_384',
 'caformer_m36.sail_in22k',
 'caformer_m36.sail_in22k_ft_in1k',
 'caformer_m36.sail_in22k_ft_in1k_384',
 'caformer_s18.sail_in1k',
 'caformer_s18.s

### Model Summary

In [63]:
def model_summary(model, print_summary):
    model_n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    params = clever_format(model_n_params, "%.2f")
    print("Number of trainable parameters: {:s}".format(params))
    if print_summary:
        summary(model, (3, 224, 224))
    return model_n_params

In [64]:
model_n_params = model_summary(model, print_summary=False)

Number of trainable parameters: 11.28M


In [69]:
def train_model(model, data_module, max_epochs, accelerator, devices, project_name):
    # Logger
    wandb_logger = WandbLogger(project='GNR_638', log_model='all', name=f'{project_name}')
        
    # Model checkpoint callback to save the best model based on validation loss
    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        filename='model-{epoch:02d}-{val_loss:.2f}',
        save_top_k=1,
        mode='min',
    )
    
    # Initialize a trainer
    trainer = Trainer(
        max_epochs=max_epochs,
        log_every_n_steps=10,
        callbacks=[checkpoint_callback],
        # logger=wandb_logger,
        accelerator=accelerator,
        devices=devices,
                
    )
    
    # Train the model
    trainer.fit(model, datamodule=data_module)
    
    return trainer, model

In [70]:
train_model(model, data_module, max_epochs=5, accelerator=accelerator, devices=devices, project_name='test_run')

/home/cminds/miniconda3/envs/GNR_638/lib/python3.12/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/cminds/miniconda3/envs/GNR_638/lib/python3.12/ ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type             | Params
------------------------------------------------
0 | base_model | Sequential       | 11.2 M
1 | criterion  | CrossEntropyLoss | 0     
2 | projection | Sequential       | 103 K 
------------------------------------------------
11.3 M    Trainable params
0         Non-trainable params
11.3 M    Total params
45.118    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


(<pytorch_lightning.trainer.trainer.Trainer at 0x7f1f05e29f10>,
 FlexibleCNN(
   (base_model): Sequential(
     (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
     (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): ReLU(inplace=True)
     (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
     (4): Sequential(
       (0): BasicBlock(
         (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
         (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         (drop_block): Identity()
         (act1): ReLU(inplace=True)
         (aa): Identity()
         (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
         (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         (act2): ReLU(inplace=True)
       )
       (1): BasicBlock(
       

In [67]:
def test_model(model, data_module, accelerator, devices, project_name, checkpoint_path=None):
    
    # Initialize a trainer
    trainer = Trainer(
        accelerator=accelerator,
        devices=devices,
    )
    
    # If a checkpoint path is provided, load the model from the checkpoint
    if checkpoint_path:
        model = model.load_from_checkpoint(checkpoint_path)
    
    # Test the model
    trainer.test(model, datamodule=data_module)
    
    return trainer

In [68]:
trainer = test_model(model, data_module, accelerator, devices, project_name='test_run')

/home/cminds/miniconda3/envs/GNR_638/lib/python3.12/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/cminds/miniconda3/envs/GNR_638/lib/python3.12/ ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/cminds/miniconda3/envs/GNR_638/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` 

Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.5735243558883667
        test_loss           1.9869091510772705
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
