# Loading data

In [1]:
from torch.utils.data import Dataset
import os
import cv2
import torch
import albumentations as A
import torchmetrics

root_dir = '/data2/eranario/data/PASCAL-VOC-2012'

  from .autonotebook import tqdm as notebook_tqdm


# Dstaset

In [2]:
class PASCALDataset(Dataset):
    
    def __init__(self, root_dir, split='train', year='2012', transform=None):
        super().__init__()
        self.split = split
        self.year = year
        self.transform = transform
        self.num_classes = 20
        
        # directory of images and labels
        self.root_dir = root_dir
        self.img_dir = os.path.join(root_dir, 'VOCdevkit', 'VOC' + year, 'JPEGImages')
        self.labels_dir = os.path.join(root_dir, 'VOCdevkit', 'VOC' + year, 'Annotations')
        
        # splits directory
        self.splits_dir = os.path.join(self.root_dir, 'VOCdevkit', 'VOC' + year, 'ImageSets', 'Main')
        
        # load splits
        self.img_ids = []
        self.images = []
        self.labels = []
        self.load_data()
        print('Loaded {} images and {} labels'.format(len(self.images), len(self.labels)))
        
    def __len__(self):
        return len(self.img_ids)
    
    def __getitem__(self, idx):
        
        # load image
        image = cv2.imread(self.images[idx], cv2.COLOR_BGR2RGB)
        
        # load xml label as dictionary
        label_path = self.labels[idx]
        boxes, labels = self.parse_voc_xml(label_path)
        
        # add transform
        if self.transform:
            augmented = self.transform(image=image, bboxes=boxes, labels=labels)
            image = augmented['image']
            boxes = torch.tensor(augmented['bboxes'], dtype=torch.float32)
            labels = torch.tensor(augmented['labels'], dtype=torch.long)
        
        return image, {'boxes': boxes, 'labels': labels}
    
    def load_data(self):
        
        with open(os.path.join(self.splits_dir, self.split + '.txt')) as f:
            for line in f:
                self.img_ids.append(line.strip())
                
        for img_id in self.img_ids:
            img_file = os.path.join(self.img_dir, img_id + '.jpg')
            ann_file = os.path.join(self.labels_dir, img_id + '.xml')
            self.images.append(img_file)
            self.labels.append(ann_file)
            
        assert len(self.images) == len(self.labels)
        
    def parse_voc_xml(self, annotation_path):
        import xml.etree.ElementTree as ET
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        
        boxes = []
        labels = []
        for obj in root.findall('object'):
            label = obj.find('name').text
            bbox = obj.find('bndbox')
            box = [
                int(bbox.find('xmin').text),
                int(bbox.find('ymin').text),
                int(bbox.find('xmax').text),
                int(bbox.find('ymax').text)
            ]
            boxes.append(box)
            labels.append(label)  # Labels are still strings here

        # Convert labels to integers
        label_map = {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4,
                     'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10,
                     'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15,
                     'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19}
        labels = [label_map[label] for label in labels]
        
        return boxes, labels

# Datamodule

In [3]:
from torch.utils.data import random_split
from torchvision.datasets import VOCDetection
import lightning as pl
from torch.utils.data import DataLoader
from albumentations.pytorch import ToTensorV2

class PASCALDataModule(pl.LightningDataModule):
    
    def __init__(self, root_dir: str, batch_size: int = 32, num_workers: int = 4, image_size = 512):
        """Data Module for handling PASCAL VOC 2012 dataset.

        Args:
            root_dir (str): Root directory of the PASCAL VOC dataset.
            batch_size (int, optional): Number of samples per batch. Defaults to 32.
        """
        super().__init__()
        self.root_dir = root_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.image_size = image_size
        
        self.train_transform = A.Compose([
            A.Resize(self.image_size, self.image_size),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
        
        self.test_transform = A.Compose([
            A.Resize(self.image_size, self.image_size),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

    def prepare_data(self):
        # Download the Pascal VOC dataset
        VOCDetection(self.root_dir, year='2012', image_set='train', download=False)
        VOCDetection(self.root_dir, year='2012', image_set='val', download=False)

    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            print("Setting up training datasets")
            full_train_dataset = PASCALDataset(self.root_dir, split='train', transform=self.train_transform)
            
            train_size = int(0.8 * len(full_train_dataset))
            val_size = len(full_train_dataset) - train_size
            
            self.train_dataset, self.val_dataset = random_split(full_train_dataset, [train_size, val_size])
        
        if stage == 'test' or stage is None:
            self.test_dataset = PASCALDataset(self.root_dir, split='val', transform=self.test_transform)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, collate_fn=self.collate_fn)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, collate_fn=self.collate_fn)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, collate_fn=self.collate_fn)

    @staticmethod
    def collate_fn(batch):
        images, targets = zip(*batch)
        images = torch.stack(images)
        boxes = [target['boxes'] for target in targets]
        labels = [target['labels'] for target in targets]
        return images, {'boxes': boxes, 'labels': labels}


# Create Model

In [4]:
import torch
from torch import nn
import pytorch_lightning as pl
import torchmetrics
from torchvision.models.detection import fasterrcnn_resnet50_fpn

class LitDetectorModel(pl.LightningModule):
    
    def __init__(self, num_classes: int = 1, learning_rate: float = 2e-4):
        """Object Detection model built with PyTorch Lightning using Faster R-CNN.

        Args:
            num_classes (int, optional): Number of classes. Defaults to 1.
            learning_rate (float, optional): Rate at which to adjust model weights. Defaults to 2e-4.
        """
        super().__init__()
        
        # Define properties
        self.save_hyperparameters()
        self.hparams.lr = learning_rate
        self.hparams.num_classes = num_classes
        
        # Define the model
        self.model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
        
        # mAP calculation
        self.val_map_metric = torchmetrics.detection.MeanAveragePrecision(box_format='xyxy')
        self.test_map_metric = torchmetrics.detection.MeanAveragePrecision(box_format='xyxy')
        self.validation_outputs = []
        self.test_outputs = []

    def forward(self, images, targets=None):
        return self.model(images, targets)
        
    def training_step(self, batch, batch_idx):
        images, targets = batch
        targets = self.format_targets(targets)
        loss_dict = self.model(images, targets)
        
        losses = sum(loss for loss in loss_dict.values())
        
        self.log('train_loss', losses, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return losses
    
    def validation_step(self, batch, batch_idx):
        images, targets = batch
        targets = self.format_targets(targets)
        outputs = self.model(images)
        
        preds = [{k: v.detach() for k, v in t.items()} for t in outputs]
        
        # Wrap targets in a list of dictionaries
        formatted_targets = targets
        
        self.val_map_metric.update(preds, formatted_targets)
        self.validation_outputs.append({'preds': preds, 'targets': formatted_targets})
        
        return outputs
    
    def on_validation_epoch_end(self):
        if not self.validation_outputs:
            mAP_result = {'map': torch.tensor(0.0)}
        else:
            mAP_result = self.val_map_metric.compute()
            self.val_map_metric.reset()

        # Log only the keys that contain "map"
        map_keys = {key: value for key, value in mAP_result.items() if 'map' in key}
        for key, value in map_keys.items():
            self.log(f'val_{key}', value, on_epoch=True, prog_bar=True, logger=True)
        
        self.validation_outputs.clear()
    
    def test_step(self, batch, batch_idx):
        images, targets = batch
        targets = self.format_targets(targets)
        outputs = self.model(images)
        
        preds = [{k: v.detach() for k, v in t.items()} for t in outputs]
        
        # Wrap targets in a list of dictionaries
        formatted_targets = targets
        
        self.test_map_metric.update(preds, formatted_targets)
        self.test_outputs.append({'preds': preds, 'targets': formatted_targets})
        
        return outputs
    
    def on_test_epoch_end(self):
        if not self.test_outputs:
            mAP_result = {'map': torch.tensor(0.0)}
        else:
            mAP_result = self.test_map_metric.compute()
            self.test_map_metric.reset()

        # Log only the keys that contain "map"
        map_keys = {key: value for key, value in mAP_result.items() if 'map' in key}
        for key, value in map_keys.items():
            self.log(f'test_{key}', value, on_epoch=True, prog_bar=True, logger=True)
        
        self.test_outputs.clear()
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
    
    def format_targets(self, targets):
        """Convert the targets to the format expected by the model."""
        formatted_targets = []
        for boxes, labels in zip(targets['boxes'], targets['labels']):
            formatted_targets.append({
                'boxes': boxes,
                'labels': labels
            })
        return formatted_targets


# Train Model

In [5]:
from lightning.pytorch.loggers import WandbLogger

data_module = PASCALDataModule(root_dir, batch_size=12)
data_module.setup()
train_loader = data_module.train_dataloader()
val_loader = data_module.val_dataloader()
test_loader = data_module.test_dataloader()
model = LitDetectorModel(num_classes=20)  # Assuming 20 classes
wandb_logger = WandbLogger(
            entity='paibl',
            project='active-learning',
            name='test',
            save_dir='/data2/eranario/intermediate_data/Active-Learning/PASCAL_logs/tests'
        )

trainer = pl.Trainer(max_epochs=1, default_root_dir='/data2/eranario/intermediate_data/Active-Learning/PASCAL_logs/tests', log_every_n_steps=1, logger=wandb_logger)
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
trainer.test(model, test_loader)

Setting up training datasets
Loaded 5717 images and 5717 labels
Loaded 5823 images and 5823 labels


INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpu4noxy9_
INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpu4noxy9_/_remote_module_non_scriptable.py
/home/eranario/miniconda3/envs/lightning/lib/python3.10/site-packages/torchvision/models/_utils.py:208: The parameter 'pretrained' is deprecated since 0.13 and will be removed in 0.15, please use 'weights' instead.
/home/eranario/miniconda3/envs/lightning/lib/python3.10/site-packages/torchvision/models/_utils.py:223: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=None`.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, u

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name            | Type                 | Params
---------------------------------------------------------
0 | model           | FasterRCNN           | 41.4 M
1 | val_map_metric  | MeanAveragePrecision | 0     
2 | test_map_metric | MeanAveragePrecision | 0     
---------------------------------------------------------
41.2 M    Trainable params
222 K     Non-trainable params
41.4 M    Total params
165.566   Total estimated model params size (MB)


Epoch 0:   0%|          | 0/382 [00:00<?, ?it/s]                           

/home/eranario/miniconda3/envs/lightning/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 12. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0: 100%|█████████▉| 381/382 [05:28<00:00,  1.16it/s, v_num=n14m, train_loss_step=0.538]

/home/eranario/miniconda3/envs/lightning/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0: 100%|██████████| 382/382 [05:28<00:00,  1.16it/s, v_num=n14m, train_loss_step=1.490]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation:   0%|          | 0/96 [00:00<?, ?it/s]
Validation DataLoader 0:   0%|          | 0/96 [00:00<?, ?it/s]
Validation DataLoader 0:   1%|          | 1/96 [00:00<00:36,  2.60it/s]
Validation DataLoader 0:   2%|▏         | 2/96 [00:00<00:36,  2.56it/s]
Validation DataLoader 0:   3%|▎         | 3/96 [00:01<00:36,  2.55it/s]
Validation DataLoader 0:   4%|▍         | 4/96 [00:01<00:36,  2.53it/s]
Validation DataLoader 0:   5%|▌         | 5/96 [00:01<00:36,  2.51it/s]
Validation DataLoader 0:   6%|▋         | 6/96 [00:02<00:35,  2.51it/s]
Validation DataLoader 0:   7%|▋         | 7/96 [00:02<00:35,  2.51it/s]
Validation DataLoader 0:   8%|▊         | 8/96 [00:03<00:35,  2.51it/s]
Validation DataLoader 0:   9%|▉         | 9/96 [00:03<00:34,  2.50it/s]
Validation DataLoader 0:  10%|█         | 10/96 [00:03<00:34,  2.50it/s]
Validation DataLoader 0:

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 382/382 [06:20<00:00,  1.01it/s, v_num=n14m, train_loss_step=1.490, val_map=0.00714, val_map_50=0.0247, val_map_75=0.00169, val_map_small=0.000, val_map_medium=0.000221, val_map_large=0.00971, val_map_per_class=-1.00, train_loss_epoch=0.559]


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 486/486 [04:09<00:00,  1.95it/s]


[{'test_map': 0.006323115434497595,
  'test_map_50': 0.02311818115413189,
  'test_map_75': 0.0015275644836947322,
  'test_map_small': 1.2709871270999429e-06,
  'test_map_medium': 0.00021259236382320523,
  'test_map_large': 0.008088461123406887,
  'test_map_per_class': -1.0}]