In [2]:
import argparse
import os
import warnings
import yaml

import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import TQDMProgressBar
from pytorch_lightning.accelerators import find_usable_cuda_devices
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from nanodet.data.collate import naive_collate
from nanodet.data.dataset import build_dataset
from nanodet.evaluator import build_evaluator
from nanodet.trainer.task import TrainingTask
from torchvision.transforms import ToTensor, ToPILImage
from nanodet.util import (
    NanoDetLightningLogger,
    cfg,
    convert_old_model,
    env_utils,
    load_config,
    load_model_weight,
    mkdir,
)

#Set logger and seed
logger = NanoDetLightningLogger('test')
pl.seed_everything(1234)

Global seed set to 1234


1234

In [5]:
#Function to create the task configuration file required for training
def create_exp_cfg(yml_path, task):
    all_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    #Load the YAML file
    with open(yml_path, 'r') as file:
        temp_cfg = yaml.safe_load(file)
    #Save dir of the model
    temp_cfg['save_dir'] = 'models/task' + str(task)
    #If base task, training and testing classes are the same
    if task == 0:
        temp_cfg['data']['train']['class_names'] = all_names[:15]
        temp_cfg['data']['val']['class_names'] = all_names[:15]
        temp_cfg['model']['arch']['head']['num_classes'] = 20 #15
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20 #15
    #Else, training only on task specific class, and testing on all classes
    else:
        temp_cfg['data']['train']['class_names'] = [all_names[14+task]]
        temp_cfg['data']['val']['class_names'] = all_names[:15+task]
        temp_cfg['model']['arch']['head']['num_classes'] = 20#15+task
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20#15+task
        temp_cfg['schedule']['load_model'] = 'models/task' + str(task-1) + '/model_best/model_best.ckpt'
        
    temp_cfg_name = 'cfg/task' + str(task) + '.yml'
    print(temp_cfg_name)
    #Save the new configuration file
    with open(temp_cfg_name, 'w') as file:
        yaml.safe_dump(temp_cfg, file)

In [7]:
#Learning stream
#task 0: train on first 15 classes, test on 15 classes
#task 1: train on class n°16, test on 16 classes
#task 2: train on class n°17, test on 17 classes
#task 3: train on class n°18, test on 18 classes
#task 4: train on class n°19, test on 19 classes
#task 5: train on class n°20, test on 20 classes
for task in range (0, 5):
    logger = NanoDetLightningLogger('run_logs/task'+str(task))
    logger.info("Starting task" + str(task))
    logger.info("Setting up data...")
    #Create the task configuration file based on the task number and load the configuration
    create_exp_cfg('cfg/VOC.yml', task)
    load_config(cfg, 'cfg/task' + str(task) + '.yml')
    #Build datasets and dataloaders based on the task configuration file
    train_dataset = build_dataset(cfg.data.train, "train")
    val_dataset = build_dataset(cfg.data.val, "test")
    evaluator = build_evaluator(cfg.evaluator, val_dataset)
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=True,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=True,
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=False,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=False,
    )
    #Create the model based on the task configuration file
    logger.info("Creating model...")
    task = TrainingTask(cfg, evaluator)
    #Load the model weights if task is not 0
    if "load_model" in cfg.schedule:
        ckpt = torch.load(cfg.schedule.load_model)
        if "pytorch-lightning_version" not in ckpt:
            warnings.warn(
                "Warning! Old .pth checkpoint is deprecated. "
                "Convert the checkpoint with tools/convert_old_checkpoint.py "
            )
            ckpt = convert_old_model(ckpt)
        load_model_weight(task.model, ckpt, logger)
        logger.info("Loaded model weight from {}".format(cfg.schedule.load_model))
    model_resume_path = (
        os.path.join(cfg.save_dir, "model_last.ckpt")
        if "resume" in cfg.schedule
        else None
    )
    #Set the device to GPU if available
    if cfg.device.gpu_ids == -1:
        logger.info("Using CPU training")
        accelerator, devices, strategy, precision = (
            "cpu",
            None,
            None,
            cfg.device.precision,
        )
    else:
        accelerator, devices, strategy, precision = (
            "gpu",
            cfg.device.gpu_ids,
            None,
            cfg.device.precision,
        )

    if devices and len(devices) > 1:
        strategy = "ddp"
        env_utils.set_multi_processing(distributed=True)

    trainer = pl.Trainer(
        default_root_dir=cfg.save_dir,
        max_epochs=cfg.schedule.total_epochs,
        check_val_every_n_epoch=cfg.schedule.val_intervals,
        accelerator=accelerator,
        devices=[0],
        log_every_n_steps=cfg.log.interval,
        num_sanity_val_steps=0,
        callbacks=[TQDMProgressBar(refresh_rate=0)],
        logger=logger,
        benchmark=cfg.get("cudnn_benchmark", True),
        gradient_clip_val=cfg.get("grad_clip", 0.0),
        strategy=strategy,
        precision=precision,
    )
    trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)

[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mStarting task0[0m
INFO:NanoDet:Starting task0
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:51][0m[32mINFO:[0m[97mSetting up data...[0m
INFO:NanoDet:Setting up data...


cfg/task0.yml




creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-05 17:00:52][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:52][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:52][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:52][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:52][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type        | Params
--------------------------------------
0 | model | NanoDetPlus | 1.2 M 
--------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.699     Total estimated model params size (MB)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[1m[35m[NanoDet][0m[34m[02-05 17:00:55][0m[32mINFO:[0m[97mTrain|Epoch1/100|Iter0(1/78)| mem:4.34G| lr:1.00e-07| loss_qfl:0.7161| loss_bbox:1.0482| loss_dfl:0.5199| [0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:55][0m[32mINFO:[0m[97mTrain|Epoch1/100|Iter0(1/78)| mem:4.34G| lr:1.00e-07| loss_qfl:0.7161| loss_bbox:1.0482| loss_dfl:0.5199| [0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:55][0m[32mINFO:[0m[97mTrain|Epoch1/100|Iter0(1/78)| mem:4.34G| lr:1.00e-07| loss_qfl:0.7161| loss_bbox:1.0482| loss_dfl:0.5199| [0m
[1m[35m[NanoDet][0m[34m[02-05 17:00:55][0m[32mINFO:[0m[97mTrain|Epoch

Loading and preparing results...
DONE (t=2.46s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=15.13s).
Accumulating evaluation results...


[1m[35m[NanoDet][0m[34m[02-05 17:05:30][0m[32mINFO:[0m[97m
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.215
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.395
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.206
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.007
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.062
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.307
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.273
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.412
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.444
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.030
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.258
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= lar

DONE (t=5.92s).


[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
INFO:NanoDet:Saving model to models/task0/model_best/nanodet_model_best.pth
[1m[35m[NanoDet][0m[34m[02-05 17:05:31][0m[32mINFO:[0m[97mVal_metrics: {'mAP': 0.21534234848034042, 'AP_50': 0.3954651408092633, 'AP_75': 0.2056239678430764, 'AP_small': 0.007166601243742006, 'AP_m': 0.061911468169523726, 'AP_l': 0.3071423814042188}[

Loading and preparing results...
DONE (t=2.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=15.25s).
Accumulating evaluation results...


[1m[35m[NanoDet][0m[34m[02-05 17:10:07][0m[32mINFO:[0m[97m
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.257
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.454
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.252
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.009
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.081
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.359
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.293
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.429
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.462
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.040
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.299
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= lar

DONE (t=5.61s).


[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
INFO:NanoDet:Saving model to models/task0/model_best/nanodet_model_best.pth
[1m[35m[NanoDet][0m[34m[02-05 17:10:08][0m[32mINFO:[0m[97mVal_metrics: {'mAP': 0.2567867358243099, 'AP_50': 0.45447055164516315, 'AP_75': 0.2524480897448554, 'AP_small': 0.00929627938811334, 'AP_m': 0.08145247242311514, 'AP_l': 0.3588006707881706}[0m

Loading and preparing results...
DONE (t=2.28s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=12.80s).
Accumulating evaluation results...


[1m[35m[NanoDet][0m[34m[02-05 17:14:42][0m[32mINFO:[0m[97m
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.275
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.480
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.271
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.015
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.095
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.379
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.299
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.445
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.475
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.049
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.311
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= lar

DONE (t=6.44s).


[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mSaving model to models/task0/model_best/nanodet_model_best.pth[0m
INFO:NanoDet:Saving model to models/task0/model_best/nanodet_model_best.pth
[1m[35m[NanoDet][0m[34m[02-05 17:14:43][0m[32mINFO:[0m[97mVal_metrics: {'mAP': 0.2752407590962705, 'AP_50': 0.4798011816465133, 'AP_75': 0.27091718438701745, 'AP_small': 0.015237976817805678, 'AP_m': 0.09492742060825167, 'AP_l': 0.378765832707061}[0m

cfg/task1.yml




creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-05 17:15:48][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_best/model_best.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task0/model_best/model_best.ckpt
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 I

cfg/task2.yml




creating index...
index created!


KeyboardInterrupt: 

In [ ]:
#PROBLEMS
#0. Model AP when training on new task goes immediately to 0
#   Looks like to model is learning pottedplant with the name "aeroplane"
#   xml dataset returns coco_dict, in cocodict need to report all the categories, FIXED
#1. Id of new class need to be consistent with id of validation dataset (sub-rroblem of 0.), FIXED
#2. Evaluator results is not printing on txt file per class AP, FIXED
#3. Weights are loaded from previous model skipping completely the heads.
#   As the model has not always the same number of classes, the model loads its weight but doesn't load the heads, this means that the model
#   immediately forgets the previous task, FIXED
# 5. TO check that the adaptation from one task to another is working, we can check that the new "adpted model" works on the previous tasks as before,This before starting to learn on the new task. -> WORKS BUT NOT PERFECTLY!!