In [1]:
import argparse
import os
import warnings
import yaml

import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import TQDMProgressBar
from pytorch_lightning.accelerators import find_usable_cuda_devices
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from nanodet.data.collate import naive_collate
from nanodet.data.dataset import build_dataset
from nanodet.evaluator import build_evaluator
from nanodet.trainer.task import TrainingTask
from nanodet.trainer.latent_dist_task import LatentDistTrainingTask
from torchvision.transforms import ToTensor, ToPILImage
from nanodet.util import (
    NanoDetLightningLogger,
    cfg,
    convert_old_model,
    env_utils,
    load_config,
    load_model_weight,
    mkdir,
)

#Set logger and seed
logger = NanoDetLightningLogger('test')
pl.seed_everything(9)

#Function to create the task configuration file required for training
def create_exp_cfg(yml_path, task):
    all_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    #Load the YAML file
    with open(yml_path, 'r') as file:
        temp_cfg = yaml.safe_load(file)
    #Save dir of the model
    temp_cfg['save_dir'] = 'models/task' + str(task)
    #If base task, training and testing classes are the same
    if task == 0:
        temp_cfg['data']['train']['class_names'] = all_names[:15]
        temp_cfg['data']['val']['class_names'] = all_names[:15]
        temp_cfg['model']['arch']['head']['num_classes'] = 20 #15
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20 #15
    #Else, training only on task specific class, and testing on all classes
    else:
        temp_cfg['data']['train']['class_names'] = [all_names[14+task]]
        temp_cfg['data']['val']['class_names'] = all_names[:15+task]
        temp_cfg['model']['arch']['head']['num_classes'] = 20#15+task
        #temp_cfg['model']['arch']['aux_head']['num_classes'] = 20#15+task
        temp_cfg['schedule']['load_model'] = 'models/task' + str(task-1) + '/model_last.ckpt'
        
    temp_cfg_name = 'cfg/task' + str(task) + '.yml'
    print(temp_cfg_name)
    #Save the new configuration file
    with open(temp_cfg_name, 'w') as file:
        yaml.safe_dump(temp_cfg, file)

Global seed set to 9


In [3]:
###LEARNING STREAM###
#task 0: train on first 15 classes, test on 15 classes
#task 1: train on class n°16, test on 16 classes
#task 2: train on class n°17, test on 17 classes
#task 3: train on class n°18, test on 18 classes
#task 4: train on class n°19, test on 19 classes
#task 5: train on class n°20, test on 20 classes
#torch.set_printoptions(profile="full")
#opt_epochs = [60, 80, 40, 60 ,40]
for task in range (5, 6):
    logger = NanoDetLightningLogger('run_logs/task'+str(task))
    logger.info("Starting task" + str(task))
    logger.info("Setting up data...")
    #Create the task configuration file based on the task number and load the configuration
    create_exp_cfg('cfg/VOC_dist1.yml', task)
    load_config(cfg, 'cfg/task' + str(task) + '.yml')
    #Build datasets and dataloaders based on the task configuration file
    train_dataset = build_dataset(cfg.data.train, "train")
    #If task is not 0, create the replay dataset using the buffer
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=True,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=True,
    )
    val_dataset = build_dataset(cfg.data.val, "test")
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=False,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=False,
    )
    evaluator = build_evaluator(cfg.evaluator, val_dataset)
    
    #Create the model based on the task configuration file
    logger.info("Creating models")
    if task == 0:
        TrainTask = TrainingTask(cfg, evaluator)
    else:
        TrainTask = LatentDistTrainingTask(cfg, evaluator)
        #Load the model weights if task is not 0
        if "load_model" in cfg.schedule:
            ckpt = torch.load(cfg.schedule.load_model)
            load_model_weight(TrainTask.model, ckpt, logger)
            load_model_weight(TrainTask.teacher, ckpt, logger)
            logger.info("Loaded model weight from {}".format(cfg.schedule.load_model))
    
    model_resume_path = (
        os.path.join(cfg.save_dir, "model_last.ckpt")
        if "resume" in cfg.schedule
        else None
    )
    #Set the device to GPU if available
    if cfg.device.gpu_ids == -1:
        logger.info("Using CPU training")
        accelerator, devices, strategy, precision = (
            "cpu",
            None,
            None,
            cfg.device.precision,
        )
    else:
        accelerator, devices, strategy, precision = (
            "gpu",
            cfg.device.gpu_ids,
            None,
            cfg.device.precision,
        )

    if devices and len(devices) > 1:
        strategy = "ddp"
        env_utils.set_multi_processing(distributed=True)

    trainer = pl.Trainer(
        default_root_dir=cfg.save_dir,
        max_epochs=100,
        check_val_every_n_epoch=10,
        accelerator=accelerator,
        devices=[2],
        log_every_n_steps=cfg.log.interval,
        num_sanity_val_steps=0,
        callbacks=[TQDMProgressBar(refresh_rate=0)],# TrainTask.early_stop_callback],
        logger=logger,
        benchmark=cfg.get("cudnn_benchmark", True),
        gradient_clip_val=cfg.get("grad_clip", 0.0),
        strategy=strategy,
        precision=precision,
    )
    trainer.fit(TrainTask, train_dataloader, val_dataloader, ckpt_path=model_resume_path)
    state_dict = TrainTask.model.state_dict()
    new_state_dict = {k: v for k, v in state_dict.items() if "teacher" not in k}
    
    torch.save({'state_dict': new_state_dict}, 'models/task' + str(task) + '/model_last.ckpt')

[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mStarting task5[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mStarting task5[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mStarting task5[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:27][0m[32mINFO:[0m[97mSetting up data...[0m


cfg/task5.yml




Load 279 xml files and 367 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-21 09:58:28][0m[32mINFO:[0m[97mCreating models[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:28][0m[32mINFO:[0m[97mCreating models[0m
[1m[35m[NanoDet][0m[34m[02-21 09:58:28][0m[32mINFO:[0m[97mCreating models[0m
INFO:NanoDet:Creating models


Load 4952 xml files and 14976 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


FileNotFoundError: [Errno 2] No such file or directory: 'models/task4/model_last.ckpt'

In [4]:
task = 1
logger = NanoDetLightningLogger('run_logs/task'+str(task))
logger.info("Starting task" + str(task))
logger.info("Setting up data...")
#Create the task configuration file based on the task number and load the configuration
create_exp_cfg('cfg/VOC_dist.yml', task)
load_config(cfg, 'cfg/task' + str(task) + '.yml')
#Build datasets and dataloaders based on the task configuration file
train_dataset = build_dataset(cfg.data.train, "train")
#If task is not 0, create the replay dataset using the buffer
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=cfg.device.workers_per_gpu,
    pin_memory=True,
    collate_fn=naive_collate,
    drop_last=True,
)
val_dataset = build_dataset(cfg.data.val, "test")
val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=cfg.device.batchsize_per_gpu,
    shuffle=False,
    num_workers=cfg.device.workers_per_gpu,
    pin_memory=True,
    collate_fn=naive_collate,
    drop_last=False,
)
evaluator = build_evaluator(cfg.evaluator, val_dataset)

#Create the model based on the task configuration file
logger.info("Creating models")
if task == 0:
    TrainTask = TrainingTask(cfg, evaluator)
else:
    TrainTask = LatentDistTrainingTask(cfg, evaluator)
    #Load the model weights if task is not 0
    if "load_model" in cfg.schedule:
        ckpt = torch.load(cfg.schedule.load_model)
        load_model_weight(TrainTask.model, ckpt, logger)
        load_model_weight(TrainTask.teacher, ckpt, logger)
        logger.info("Loaded model weight from {}".format(cfg.schedule.load_model))


[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mStarting task1[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mStarting task1[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mStarting task1[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mStarting task1[0m
INFO:NanoDet:Starting task1
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:17][0m[32mINFO:[0m[97mSetting up data...[0m
INFO:NanoDet:Setting up data...


cfg/task1.yml




Load 273 xml files and 625 boxes
creating index...
index created!


[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mCreating models[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mCreating models[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mCreating models[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mCreating models[0m
INFO:NanoDet:Creating models


Load 4530 xml files and 13606 boxes
creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
[1m[35m[NanoDet][0m[34m[02-20 15:03:18][0m[32mINFO:[0m[97mLoaded model weight from models/task0/model_last.ckpt[0m
INFO:NanoDet:Loaded model weight from models/task0/model_last.ckpt


In [5]:
for batch in train_dataloader:
    batch = TrainTask._preprocess_batch_input(batch)
    break

In [7]:
img = batch["img"]
feat = TrainTask.model.backbone(img)
for tensor in feat:
    print(tensor.shape)

torch.Size([1, 116, 28, 40])
torch.Size([1, 232, 14, 20])
torch.Size([1, 464, 7, 10])


In [8]:
img = batch["img"]
feat = TrainTask.teacher.backbone(img)
for tensor in feat:
    print(tensor.shape)

torch.Size([1, 116, 28, 40])
torch.Size([1, 232, 14, 20])
torch.Size([1, 464, 7, 10])


In [6]:
feat = TrainTask.model.backbone(img)
print(feat)
stud_fpn_feat = TrainTask.model.fpn(stud_feat)
teach_int_head_out = []
for feat, cls_convs in zip(stud_fpn_feat, TrainTask.model.head.cls_convs):
    for conv in cls_convs:
        feat = conv(feat)
    teach_int_head_out.append(feat)

In [14]:
print(teach_int_head_out[1].shape)

torch.Size([1, 96, 16, 20])


In [30]:
import torch.nn as nn
stud_feat = TrainTask.model.backbone(img)
lista = []
lista1 = []
for tensor in stud_feat:
    tensor_prob = nn.functional.softmax(tensor, dim=1)
    lista.append(tensor)
    lista1.append(tensor_prob)
    print(tensor_prob.shape)
    print(tensor.shape)
mse_loss = nn.MSELoss()
for tensor1, tensor2 in zip(stud_feat,stud_feat):
    loss = mse_loss(tensor1, tensor2)
    print(loss)
    

torch.Size([1, 116, 32, 40])
torch.Size([1, 116, 32, 40])
torch.Size([1, 232, 16, 20])
torch.Size([1, 232, 16, 20])
torch.Size([1, 464, 8, 10])
torch.Size([1, 464, 8, 10])
tensor(0., grad_fn=<MseLossBackward0>)
tensor(0., grad_fn=<MseLossBackward0>)
tensor(0., grad_fn=<MseLossBackward0>)


In [27]:
import torch
import torch.nn.functional as F
outputs = []
for feat, cls_convs in zip(
    stud_fpn_feat,
    TrainTask.model.head.cls_convs,
):
    for conv in cls_convs:
        feat = conv(feat)
    outputs.append(feat)
    

mse_list = []
for tensor1, tensor2 in zip(outputs,outputs):
    mse = F.mse_loss(tensor1, tensor2)
    mse_list.append(mse.item())   
print(mse_list)
'''
    output = gfl_cls(feat)
    outputs.append(output.flatten(start_dim=2))
outputs = torch.cat(outputs, dim=2).permute(0, 2, 1)
'''

[0.0, 0.0, 0.0, 0.0]


'\n    output = gfl_cls(feat)\n    outputs.append(output.flatten(start_dim=2))\noutputs = torch.cat(outputs, dim=2).permute(0, 2, 1)\n'

In [10]:
teacher_state_dict = TrainTask.teacher.head.state_dict()
student_state_dict = TrainTask.model.head.state_dict()
print(TrainTask.teacher.head.cls_convs[0][1])

DepthwiseConvModule(
  (depthwise): Conv2d(96, 96, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=96, bias=False)
  (pointwise): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (dwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): LeakyReLU(negative_slope=0.1, inplace=True)
)


In [11]:
head = TrainTask.teacher.head

In [12]:
for conv in head.cls_convs:
    print(conv)

ModuleList(
  (0): DepthwiseConvModule(
    (depthwise): Conv2d(96, 96, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=96, bias=False)
    (pointwise): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (dwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (pwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (1): DepthwiseConvModule(
    (depthwise): Conv2d(96, 96, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=96, bias=False)
    (pointwise): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (dwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (pwnorm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): LeakyReLU(negative_slope=0.1, inplace=True)
  )
)
ModuleList(
  (0): DepthwiseConvModule(
    (depthwise): Conv2d(96

In [3]:
##################
# COPY HEAD TEST #
##################
# Get the state dict of both models
teacher_state_dict = TrainTask.teacher.head.gfl_cls.state_dict()
student_state_dict = TrainTask.model.head.gfl_cls.state_dict()
# Update the specific layer weights in the student model with the teacher model weights
for name, param in teacher_state_dict.items():
    # Slice the weights tensor along the out_channels dimension from 0 to 15
    student_state_dict[name][:17] = param[:17]
# Load the updated state dict into the student model
TrainTask.model.head.gfl_cls.load_state_dict(student_state_dict)
trainer.save_checkpoint("models/task1/model_last.ckpt")