In [61]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data

In [62]:
from data_utils.data_loaders import FocusCNNLoader

In [63]:
loader = FocusCNNLoader(
    batch_size=8,
    shuffle=True,
    num_workers=1,
    images_dir = "/home/ubuntu/focus-convolutional-neural-network/data/processed/COCO-2017/CocoFocusCNN/train/images",
    csv_path="/home/ubuntu/focus-convolutional-neural-network/data/processed/COCO-2017/CocoFocusCNN/train/labels.csv",
    labels={
        "0": "none",
        "1": "person",
        "2": "car",
        "3": "bicycle"
    },
    tf_image_size=(640, 640),
    save_out_dir="/home/ubuntu/focus-convolutional-neural-network/res/focus_cnn/coco_focuscnn/trainer/labels/",
    validation_split=0.15,
    is_test=False
)

In [64]:
dl = loader.get_train_loader()
x = next(iter(dl))
print(f"{x[0].shape=}")
print(f"{x[1].shape=}")

x[0].shape=torch.Size([8, 3, 640, 640])
x[1].shape=torch.Size([8])


# Models

In [65]:
from models.focuscnn.FocusCNN import FocusCNN
from models.focus.ResFocusNetwork import ResFocusNetwork
from models.classifier.MultiClassifier import MultiClassifier
from collections import OrderedDict

In [66]:
import torch

In [67]:
from pipeline import pipeline_utils

In [68]:
classifier = MultiClassifier(
    n_classes=4,
    backbone="resnet34",
)
classifier_path = "/home/ubuntu/focus-convolutional-neural-network/res/classifiers/coco_classifier_multi/trainer/0627_075340/models/checkpoint-epoch73.pth"

focus_networks = OrderedDict({
    "1": ResFocusNetwork(
        loss_lambda_tr=1,
        loss_lambda_rot=1,
        loss_lambda_sc=1,
        backbone="resnet34",
        threshold=0.5,
        inp_img_size=(640, 640),
        loss_rot=False
    ),
    "2": ResFocusNetwork(
        loss_lambda_tr=1,
        loss_lambda_rot=1,
        loss_lambda_sc=1,
        backbone="resnet34",
        threshold=0.5,
        inp_img_size=(640, 640),
        loss_rot=False
    ),
    "3": ResFocusNetwork(
        loss_lambda_tr=1,
        loss_lambda_rot=1,
        loss_lambda_sc=1,
        backbone="resnet34",
        threshold=0.5,
        inp_img_size=(640, 640),
        loss_rot=False
    ),
})
focus_networks_paths = OrderedDict({
    "1": "/home/ubuntu/focus-convolutional-neural-network/res/focus/coco_focus_person/trainer/0628_212101_without_rotation/models/checkpoint-epoch63.pth",
    "2": "/home/ubuntu/focus-convolutional-neural-network/res/focus/coco_focus_car/trainer/0701_161142_without_rotation/models/checkpoint-epoch58.pth",
    "3": "/home/ubuntu/focus-convolutional-neural-network/res/focus/coco_focus_bicycle/trainer/0713_without_rot/models/checkpoint-epoch92.pth"
})

In [69]:
model = FocusCNN(
    classifier_model=classifier,
    focus_models=focus_networks,
    inp_img_size=(640, 640),
    out_img_size=(300, 300)
)

In [70]:
model.load_model(
    classifier_model_path=classifier_path,
    focus_models_path=focus_networks_paths
)

In [88]:
dl_train = loader.get_train_loader()

In [72]:
iter_dl = iter(dl_train)
bn_1 = next(iter_dl)
bn_2 = next(iter_dl)

In [73]:
model.train()

In [74]:
model.to("cuda")

In [75]:
data = bn_1

In [76]:
inp_img = pipeline_utils.to_device(data[0], "cuda")

In [89]:
target = pipeline_utils.to_device(data[2], "cuda")

In [78]:
out = model(x=inp_img, target=target)

In [79]:
loss_val = out["loss"]
loss_val

tensor(0.9679, device='cuda:0', grad_fn=<NllLossBackward0>)

In [80]:
loss_val.backward()

In [90]:
target_cls = out["target_cls"]
target_cls

tensor([1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0])

In [91]:
out = pipeline_utils.to_device(out, device="cpu")
target = pipeline_utils.to_device(target, device="cpu")

In [92]:
target

{'1': {'label': tensor([1, 1, 1, 1, 1, 0, 0, 1]),
  'transform': tensor([[ 0.1730, -0.0170, -0.0918],
          [-0.2036,  0.0398, -0.4686],
          [-0.1710, -0.2410, -1.4278],
          [-0.3457, -0.1821, -0.4380],
          [ 0.0599, -0.2799, -0.8261],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0411,  0.0105, -1.4114]])},
 '2': {'label': tensor([0, 0, 0, 0, 0, 0, 2, 2]),
  'transform': tensor([[ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000],
          [ 0.2736,  0.5696, -1.2906],
          [-0.5880, -0.0714, -1.6666]])},
 '3': {'label': tensor([0, 0, 0, 0, 0, 0, 0, 0]),
  'transform': tensor([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.],
          [0.

In [83]:
preds = model.get_prediction(output=out, img_ids=data[1])
preds

[{'boxes': tensor([[ 72.9087,  26.9309, 654.9722, 608.9945]]),
  'scores': tensor([1.]),
  'labels': tensor([1])},
 {'boxes': tensor([[ 40.5618, 121.0110, 450.6645, 531.1137]]),
  'scores': tensor([1.0000]),
  'labels': tensor([1])},
 {'boxes': tensor([[194.6780, 162.8454, 347.1106, 315.2781]]),
  'scores': tensor([1.0000]),
  'labels': tensor([1])},
 {'boxes': tensor([[ -7.0633,  50.0305, 400.6338, 457.7276]]),
  'scores': tensor([0.9997]),
  'labels': tensor([1])},
 {'boxes': tensor([[238.8703,  94.1497, 483.4637, 338.7432]]),
  'scores': tensor([0.9998]),
  'labels': tensor([1])},
 {'boxes': tensor([]),
  'scores': tensor([]),
  'labels': tensor([], dtype=torch.int64)},
 {'boxes': tensor([[305.4766, 428.2527, 469.9732, 592.7493]]),
  'scores': tensor([0.9896]),
  'labels': tensor([2])},
 {'boxes': tensor([[256.6360, 238.1834, 407.3045, 388.8519],
          [ 35.4372, 253.3440, 169.9179, 387.8247]]),
  'scores': tensor([1.0000, 0.6850]),
  'labels': tensor([1, 2])}]

In [84]:
preds = model.get_prediction(output=out, img_ids=data[1])
target = model.prepare_target_for_map(target_bboxes=data[3], target_image_ids=data[1])

In [85]:
preds, target

([{'boxes': tensor([[ 72.9087,  26.9309, 654.9722, 608.9945]]),
   'scores': tensor([1.]),
   'labels': tensor([1])},
  {'boxes': tensor([[ 40.5618, 121.0110, 450.6645, 531.1137]]),
   'scores': tensor([1.0000]),
   'labels': tensor([1])},
  {'boxes': tensor([[194.6780, 162.8454, 347.1106, 315.2781]]),
   'scores': tensor([1.0000]),
   'labels': tensor([1])},
  {'boxes': tensor([[ -7.0633,  50.0305, 400.6338, 457.7276]]),
   'scores': tensor([0.9997]),
   'labels': tensor([1])},
  {'boxes': tensor([[238.8703,  94.1497, 483.4637, 338.7432]]),
   'scores': tensor([0.9998]),
   'labels': tensor([1])},
  {'boxes': tensor([]),
   'scores': tensor([]),
   'labels': tensor([], dtype=torch.int64)},
  {'boxes': tensor([[305.4766, 428.2527, 469.9732, 592.7493]]),
   'scores': tensor([0.9896]),
   'labels': tensor([2])},
  {'boxes': tensor([[256.6360, 238.1834, 407.3045, 388.8519],
           [ 35.4372, 253.3440, 169.9179, 387.8247]]),
   'scores': tensor([1.0000, 0.6850]),
   'labels': tensor([1

In [86]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision

# Initialize the metric
metric = MeanAveragePrecision()

metric.update(preds, target)

metric.compute()

{'map': tensor(0.0926),
 'map_50': tensor(0.1683),
 'map_75': tensor(0.1122),
 'map_small': tensor(-1.),
 'map_medium': tensor(-1.),
 'map_large': tensor(0.0926),
 'mar_1': tensor(0.1333),
 'mar_10': tensor(0.1333),
 'mar_100': tensor(0.1333),
 'mar_small': tensor(-1.),
 'mar_medium': tensor(-1.),
 'mar_large': tensor(0.1333),
 'map_per_class': tensor(-1.),
 'mar_100_per_class': tensor(-1.),
 'classes': tensor([1, 2], dtype=torch.int32)}