In [1]:
import argparse
import os
import warnings
import yaml

import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import TQDMProgressBar
from pytorch_lightning.accelerators import find_usable_cuda_devices
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from nanodet.data.collate import naive_collate
from nanodet.data.dataset import build_dataset
from nanodet.evaluator import build_evaluator
from nanodet.trainer.task import TrainingTask
from torchvision.transforms import ToTensor, ToPILImage
from nanodet.util import (
    NanoDetLightningLogger,
    cfg,
    convert_old_model,
    env_utils,
    load_config,
    load_model_weight,
    mkdir,
)
import random
from torch.utils.data import Subset
from IPython.display import Image
from IPython.display import display
from nanodet.data.dataset.coco import CocoDataset

#Set logger and seed
logger = NanoDetLightningLogger('test')
pl.seed_everything(1234)

Global seed set to 1234


1234

In [2]:
#Classes to handle the datasets in the replay scenario

class ReplayDataLoader:
    def __init__(self, dataset1, dataset2, batch_size, shuffle):
        """
        This class is used to create a dataloader that creates batches
        using the task specific dataset and the replay buffer dataset.
        Batches are created using 50% of the task specific dataset and 50% of the replay buffer dataset.
        The iterators are reset when the task specific dataset is exhausted.
        If the replay buffer dataset is exhausted before the task specific dataset, its iterator is reset.
        
        Args:
            dataset1: task specific dataset
            dataset2: replay buffer dataset
            batch_size: Batch size
            shuffle: Whether to shuffle the data
        """
        
        self.dataset1 = dataset1
        self.dataset2 = dataset2
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.dataset1_loader = torch.utils.data.DataLoader(
            self.dataset1,
            batch_size=self.batch_size//2,
            shuffle=self.shuffle,
            num_workers=8,
            pin_memory=True,
            collate_fn=naive_collate,
            drop_last=True,
        )
        self.dataset2_loader = torch.utils.data.DataLoader(
            self.dataset2,
            batch_size=self.batch_size//2,
            shuffle=self.shuffle,
            num_workers=8,
            pin_memory=True,
            collate_fn=naive_collate,
            drop_last=True,
        )
    
    def __iter__(self):
        self.dataset1_iter = iter(self.dataset1_loader)
        self.dataset2_iter = iter(self.dataset2_loader)
        return self
    
    def __next__(self):
        try:
            batch1 = next(self.dataset1_iter)
        except StopIteration:
            raise StopIteration
        try:
            batch2 = next(self.dataset2_iter)
        except StopIteration:
            self.dataset2_iter = iter(self.dataset2_loader)
            batch2 = next(self.dataset2_iter)
            
        merged_batch = {}
        for key in batch1.keys():
            if key == 'img':
                merged_batch[key] = batch1[key] + batch2[key]
            elif key == 'img_info':
                merged_batch[key] = {k: batch1[key][k] + batch2[key][k] for k in batch1[key]}
            elif key in ['gt_bboxes', 'gt_labels', 'gt_bboxes_ignore', 'warp_matrix']:
                #merged_batch[key] = [torch.cat((torch.tensor(b1), torch.tensor(b2))) for b1, b2 in zip(batch1[key], batch2[key])]
                merged_batch[key] = batch1[key] + batch2[key]
            else:
                raise ValueError(f"Key not recognized")
        
        return merged_batch
    
    def __len__(self):
        return len(self.dataset1)
        
class SmartBufferDataset(Dataset):
    """
    This class is used to create a replay buffer dataset that stores random samples of the task specific dataset.
    At init it takes random samples of the first task dataset to fill the buffer.
    Then from task n to task n+1 it, where n>0, it updates 50% of the buffer with the new task dataset.
    
    Args:
        dataset_n: task specific dataset
        buffer_size: size of the replay buffer
    """
    def __init__(self, dataset_n, buffer_size=250):
        self.buffer_size = buffer_size
        #At initialization, take random samples of the task 0 dataset to fill the buffer
        self.buffer_indices = random.sample(range(len(dataset_n)), self.buffer_size)
        self.buffer_dataset = Subset(dataset_n, self.buffer_indices)

    def __getitem__(self, buff_index):
        #Just return a buffer item at the index
        return self.buffer_dataset[buff_index]

    def __len__(self):
        #Return the buffer size
        return self.buffer_size

    def update_buffer(self, dataset_np1, cfg):
        #Smart update, based on the number of classes already contained in the buffer and the ones that are being added to the dataset
        #E.g. Update only 1/16 of the buffer if only 1 new class is added to the dataset and the buffer contains 15 classes
        new_classes = len(cfg.data.train.class_names)
        val_classes = len(cfg.data.val.class_names)
        ideal_samples_per_class = int(self.buffer_size/val_classes)
        update_buffer_indices_n = random.sample(range(self.buffer_size), self.buffer_size - int(ideal_samples_per_class*new_classes))
        update_buffer_indices_np1 = random.sample(range(len(dataset_np1)), int(ideal_samples_per_class*new_classes))
        subset_n = Subset(self.buffer_dataset, update_buffer_indices_n)
        subset_np1 = Subset(dataset_np1, update_buffer_indices_np1)
        self.buffer_dataset = torch.utils.data.ConcatDataset([subset_n, subset_np1])


In [3]:
#Function to create the task configuration file required for training
def create_exp_cfg(yml_path, task):
    all_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    #Load the YAML file
    with open(yml_path, 'r') as file:
        temp_cfg = yaml.safe_load(file)
    #Save dir of the model
    temp_cfg['save_dir'] = 'models/task' + str(task)
    #If base task, training and testing classes are the same
    if task == 0:
        temp_cfg['data']['train']['class_names'] = all_names[:15]
        temp_cfg['data']['val']['class_names'] = all_names[:15]
        temp_cfg['model']['arch']['head']['num_classes'] = 20 #15
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20 #15
    #Else, training only on task specific class, and testing on all classes
    else:
        temp_cfg['data']['train']['class_names'] = [all_names[14+task]]
        temp_cfg['data']['val']['class_names'] = all_names[:15+task]
        temp_cfg['model']['arch']['head']['num_classes'] = 20#15+task
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20#15+task
        temp_cfg['schedule']['load_model'] = 'models/task' + str(task-1) + '/model_last.ckpt'
        
    temp_cfg_name = 'cfg/task' + str(task) + '.yml'
    print(temp_cfg_name)
    #Save the new configuration file
    with open(temp_cfg_name, 'w') as file:
        yaml.safe_dump(temp_cfg, file)

In [4]:
####TEMP INITIALIZATION since the base model is already available
task = 0
create_exp_cfg('cfg/VOC.yml', task)
load_config(cfg, 'cfg/task' + str(task) + '.yml')
#Build datasets and dataloaders based on the task configuration file
train_dataset = build_dataset(cfg.data.train, "train")
#val_dataset = build_dataset(cfg.data.val, "test")
buffer_dataset = SmartBufferDataset(train_dataset)



cfg/task0.yml
Load 4502 xml files and 13564 boxes
creating index...
index created!


In [5]:
###LEARNING STREAM###
#task 0: train on first 15 classes, test on 15 classes
#task 1: train on class n°16, test on 16 classes
#task 2: train on class n°17, test on 17 classes
#task 3: train on class n°18, test on 18 classes
#task 4: train on class n°19, test on 19 classes
#task 5: train on class n°20, test on 20 classes
for task in range (1, 6):
    logger = NanoDetLightningLogger('run_logs/task'+str(task))
    logger.info("Starting task" + str(task))
    logger.info("Setting up data...")
    #Create the task configuration file based on the task number and load the configuration
    create_exp_cfg('cfg/VOC.yml', task)
    load_config(cfg, 'cfg/task' + str(task) + '.yml')
    #Build datasets and dataloaders based on the task configuration file
    train_dataset = build_dataset(cfg.data.train, "train")
    #If task is not 0, create the replay dataset using the buffer
    if task == 0:
        train_dataloader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=cfg.device.batchsize_per_gpu,
            shuffle=True if task == 0 else False, #Shuffling is done inside ReplayDataset class
            num_workers=cfg.device.workers_per_gpu,
            pin_memory=True,
            collate_fn=naive_collate,
            drop_last=True,
        )
    else:
        train_dataloader = ReplayDataLoader(
            train_dataset, 
            buffer_dataset, 
            cfg.device.batchsize_per_gpu,
            shuffle = True
        )

    val_dataset = build_dataset(cfg.data.val, "test")
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=False,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=False,
    )
    evaluator = build_evaluator(cfg.evaluator, val_dataset)
    
    #Create the model based on the task configuration file
    logger.info("Creating model...")
    TrainTask = TrainingTask(cfg, evaluator)
    #Load the model weights if task is not 0
    if "load_model" in cfg.schedule:
        ckpt = torch.load(cfg.schedule.load_model)
        if "pytorch-lightning_version" not in ckpt:
            warnings.warn(
                "Warning! Old .pth checkpoint is deprecated. "
                "Convert the checkpoint with tools/convert_old_checkpoint.py "
            )
            ckpt = convert_old_model(ckpt)
        load_model_weight(TrainTask.model, ckpt, logger)
        logger.info("Loaded model weight from {}".format(cfg.schedule.load_model))
    model_resume_path = (
        os.path.join(cfg.save_dir, "model_last.ckpt")
        if "resume" in cfg.schedule
        else None
    )
    #Set the device to GPU if available
    if cfg.device.gpu_ids == -1:
        logger.info("Using CPU training")
        accelerator, devices, strategy, precision = (
            "cpu",
            None,
            None,
            cfg.device.precision,
        )
    else:
        accelerator, devices, strategy, precision = (
            "gpu",
            cfg.device.gpu_ids,
            None,
            cfg.device.precision,
        )

    if devices and len(devices) > 1:
        strategy = "ddp"
        env_utils.set_multi_processing(distributed=True)

    trainer = pl.Trainer(
        default_root_dir=cfg.save_dir,
        max_epochs=cfg.schedule.total_epochs,
        check_val_every_n_epoch=cfg.schedule.val_intervals,
        accelerator=accelerator,
        devices=[2],
        log_every_n_steps=cfg.log.interval,
        num_sanity_val_steps=0,
        callbacks=[TQDMProgressBar(refresh_rate=0)],
        logger=logger,
        benchmark=cfg.get("cudnn_benchmark", True),
        gradient_clip_val=cfg.get("grad_clip", 0.0),
        strategy=strategy,
        precision=precision,
    )
    trainer.fit(TrainTask, train_dataloader, val_dataloader, ckpt_path=model_resume_path)
    
    #Replay code
    #If task is 0, initialize the replay buffer with the task 0 dataset 
    #if task > 0 update the buffer with the new task dataset
    if task == 0:
        print("Creating buffer dataset")
        buffer_dataset = SmartBufferDataset(train_dataset)
    else:
        print("Updating buffer dataset")
        buffer_dataset.update_buffer(train_dataset, cfg)

[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mStarting task1[0m
[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mStarting task1[0m
INFO:NanoDet:Starting task1
[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mSetting up data...[0m
INFO:NanoDet:Setting up data...


cfg/task1.yml




Load 273 xml files and 625 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:22:49][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


Load 4530 xml files and 13606 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-09 10:22:51][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:22:51][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task0/model_last.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type        | Params
--------------------------------------
0 | model | NanoDetPlus | 1.2 M 
--------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.699     Total estimated model params size (MB)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[1m[35m[NanoDet][0m[34m[02-09 10:22:53][0m[32mINFO:[0m[97mTrain|Epoch1/100|Iter0(1/273)| mem:4.36G| lr:1.00e-07| loss_qfl:0.6390| los

Updating buffer dataset
cfg/task2.yml




Load 97 xml files and 353 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


Load 4605 xml files and 13917 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mLoaded model weight from models/task1/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mLoaded model weight from models/task1/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:29:28][0m[32mINFO:[0m[97mLoaded model weight from models/task1/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task1/model_last.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type        | Params
--------------------------------------
0 | model | NanoDetPlus | 1.2 M 
--------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.699     Total estimated model params size (MB)
[1m[35m[NanoDet][0m[34m[02-09 10:29:29][0m[32mINFO:[0m[97mTrain|Epoch1/100|Iter0

Updating buffer dataset
cfg/task3.yml




Load 372 xml files and 425 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-09 10:33:16][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:16][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:16][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:16][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


Load 4669 xml files and 14313 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-09 10:33:17][0m[32mINFO:[0m[97mLoaded model weight from models/task2/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:17][0m[32mINFO:[0m[97mLoaded model weight from models/task2/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:17][0m[32mINFO:[0m[97mLoaded model weight from models/task2/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:33:17][0m[32mINFO:[0m[97mLoaded model weight from models/task2/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task2/model_last.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type        | Params
--------------------------------------
0 | model | NanoDetPlus | 1.2 M 
--------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.699     Tot

Updating buffer dataset
cfg/task4.yml




Load 263 xml files and 328 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-09 10:41:44][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:44][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:44][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:44][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:44][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


Load 4872 xml files and 14615 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-09 10:41:45][0m[32mINFO:[0m[97mLoaded model weight from models/task3/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:45][0m[32mINFO:[0m[97mLoaded model weight from models/task3/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:45][0m[32mINFO:[0m[97mLoaded model weight from models/task3/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:45][0m[32mINFO:[0m[97mLoaded model weight from models/task3/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:41:45][0m[32mINFO:[0m[97mLoaded model weight from models/task3/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task3/model_last.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type        | Params
--------------------------------------
0 | model | NanoDetPlus | 1.2 M 
---------

Updating buffer dataset
cfg/task5.yml




Load 279 xml files and 367 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


Load 4952 xml files and 14976 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-09 10:48:53][0m[32mINFO:[0m[97mLoaded model weight from models/task4/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task4/model_last.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES:

Updating buffer dataset


# TESTING

To get testing results, use this code in terminal:
```bash
python test.py --task val --config /home/pasti/PycharmProjects/nanodet_cl/eclod/cfg/Replay/task2.yml --model /home/pasti/PycharmProjects/nanodet_cl/eclod/models/Replay/task2/model_last.ckpt
```

# NOTES