In [1]:
import sys
sys.path.append("..")
import math
import torch
import torch as th
from PIL import Image
import numpy as np

import torchvision as thv
# import torchmetrics as thm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import pytorch_lightning as pl

from src.datamodule import BarcodeDM
from src.config import Config
from src.io import load_object
import albumentations as albu

import logging
logging.getLogger("lightning").setLevel(logging.DEBUG)

from collections import defaultdict

In [2]:
cfg = Config.from_yaml("../config/baseline_detect.yml")
data = BarcodeDM(cfg.data_config, task=cfg.task, dry_run=True)
data.prepare_data()
data.setup()

In [3]:
from clearml import Task

In [4]:
class DetectModel(pl.LightningModule):
    def __init__(self, cfg: Config):
        super().__init__()
        self.cfg = cfg
        self.model = thv.models.detection.fasterrcnn_resnet50_fpn(weights="COCO_V1")
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
        self.val_map = MeanAveragePrecision()
        self.test_map = MeanAveragePrecision()
        self.train_loss_dict = defaultdict(list)
        self.params = self.cfg.dict()
        self.save_hyperparameters(self.params)
        self.task = Task.init(
            project_name=self.cfg.project_name, 
            task_name=self.cfg.task_name
        )
        self.clearml_log = self.task.get_logger()
        self.task.connect(self.params)

    
    def forward(self, x: torch.Tensor):
        print(f"forward:")
        return self.model(x)
    
    def configure_optimizers(self):
        return load_object(self.cfg.optimizer)(
            self.model.parameters(), lr=self.cfg.lr, **self.cfg.optimizer_kwargs,
        )

    
    def training_step(self, batch, batch_idx):
        images, targets = batch
        loss_dict = self.model(images, targets)
        loss = sum(loss for name, loss in loss_dict.items() if name != "loss_classifier")
        self.log("train_loss", loss, on_step=True, on_epoch=True)
        for k,v in loss_dict.items():
            self.train_loss_dict[k].append(v.item())
            self.log(f"{k}_train", v.item(), on_step=True, on_epoch=True)
        #     self.clearml_log.report_single_value(f"{k}_train", v.item())
        # self.clearml_log.report_single_value(f"loss_train", loss.item())
        return {
            'loss': loss, 
            'log': loss_dict, 
            'progress_bar': loss_dict
        }

    def validation_step(self, batch, batch_idx):
        # print(f"[VAL_STEP][{batch_idx=}] begin: {len(batch)=}")
        images, targets = batch
        with th.no_grad():
            pred = self.model(images)
        # print(f"[VAL_STEP][{batch_idx=}] {pred[0]['boxes'][:10]=}")
        # print(f"[VAL_STEP][{batch_idx=}] {targets[0]['boxes']=}")
        self.val_map.update(
            preds=pred,target=targets
        )
        self.log_dict(
            {f"{k}_val": v for k,v in self.val_map.compute().items()}, 
            on_step=False, 
            on_epoch=True,
            prog_bar=False
        )
        

    def test_step(self, batch, batch_idx):
        print(f"[TST_STEP][{batch_idx=}] begin:{len(batch)=} {batch_idx=}")
        images, targets = batch
        with th.no_grad():
            pred = self.model(images)
        # print(f"test_step:{pred[0]['boxes'][:10]=}")
        # print(f"test_step:{targets[0]['boxes']=}")
        self.test_map.update(preds=pred, target=targets)
        self.log_dict(
            {f"{k}_test": v for k,v in self.test_map.compute().items()}, 
            on_step=True, 
            on_epoch=True,
            prog_bar=False
        )


    def on_train_epoch_end(self)->None:
        losses_str = []
        for name, values in self.train_loss_dict.items():
            losses_str.append(f"{name}={np.mean(values)}")
            self.log(f"{name}_train_epoch_end", np.mean(values))
        # print(f"[TRAIN_END] train losses: {', '.join(losses_str)}")
    
    def on_validation_epoch_end(self) -> None:
        print(f"on_validation_epoch_end:begin")
        print(f"[VAL_END] val metrics: {self.val_map.compute()=}")
        self.val_map.reset()

    def on_test_epoch_end(self) -> None:
        print(f"{self.test_map.compute()}")
        self.test_map.reset()




In [5]:

model = DetectModel(cfg)
trainer = pl.Trainer(
    max_epochs=20,
    # accelerator=config.accelerator,
    # devices=[config.device],
    # callbacks=[
    #     checkpoint_callback,
    #     EarlyStopping(monitor=config.monitor_metric, patience=4, mode=config.monitor_mode),
    #     LearningRateMonitor(logging_interval='epoch'),
    # ],
)
trainer.fit(model=model, datamodule=data)

ClearML Task: created new task id=7af018603bbf46da90081e239ee67990
2023-06-16 00:43:27,800 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/fe34d86eb64e454682a8a1885164edfe/experiments/7af018603bbf46da90081e239ee67990/output/log



Can't initialize NVML

GPU available: False, used: False


ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name     | Type                 | Params
--------------------------------------------------
0 | model    | FasterRCNN           | 41.3 M
1 | val_map  | MeanAveragePrecision | 0     
2 | test_map | MeanAveragePrecision | 0     
--------------------------------------------------
41.1 M    Trainable params
222 K     Non-trainable params
41.3 M    Total params
165.197   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]


Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.



on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}



The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


Validation: 0it [00:00, ?it/s]

on_validation_epoch_end:begin
[VAL_END] val metrics: self.val_map.compute()={'map': tensor(0.), 'map_50': tensor(0.), 'map_75': tensor(0.), 'map_small': tensor(-1.), 'map_medium': tensor(-1.), 'map_large': tensor(0.), 'mar_1': tensor(0.), 'mar_10': tensor(0.), 'mar_100': tensor(0.), 'mar_small': tensor(-1.), 'mar_medium': tensor(-1.), 'mar_large': tensor(0.), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.)}


`Trainer.fit` stopped: `max_epochs=20` reached.


In [None]:
image, target = data.train_dataset[0]

In [None]:
model.model.eval()
model.model(image.unsqueeze(0))

In [None]:
target

In [None]:
p = {
        "boxes": th.FloatTensor([[542, 210, 685, 489], [1, 2, 3, 4]]),
        "labels": th.LongTensor([1,1]),
        "scores": th.FloatTensor([0.7,0.8])
    }

In [None]:
metric = MeanAveragePrecision()
metric.update([p],[target])

In [None]:
metric.compute()

In [None]:
from torchmetrics.detection.mean_ap import box_iou

In [None]:
box_iou?