In [None]:
# %load_ext autoreload
# %autoreload 2

import torchvision
import numpy as np
from matplotlib import pyplot as plt
import torchvision.transforms.v2 as transforms
import torch
from training.encoder import CenternetEncoder
from utils.tmp_visualizer import get_image_with_bboxes
from models.centernet import ModelBuilder
from data.dataset import Dataset

input_height, input_width = 256, 256

print("GPU is available: ", torch.cuda.is_available())

plt.rcParams["figure.figsize"] = (12.0, 8.0)

In [None]:
dataset_val = torchvision.datasets.VOCDetection(
    root="../VOC", year="2007", image_set="val", download=False
)
dataset_val = torchvision.datasets.wrap_dataset_for_transforms_v2(dataset_val)

# these 10 pictures from the VOC dataset were randomly selected for model training
trainingdata_indices = torch.tensor(
    [955, 1025, 219, 66, 1344, 222, 865, 2317, 86, 1409]
)

print(len(dataset_val))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModelBuilder(alpha=0.25, filters_size=[128, 64, 32]).to(device)
model.load_state_dict(
    torch.load("../models/checkpoints/tmp_trained_model.pt", weights_only=True)
)

In [None]:
# just to explore heatmap for some specific image used in model training

image_index = 5  # 0 (car) is also a good choice

img, lbl = dataset_val[trainingdata_indices[image_index]]
print(lbl)

#
image_with_boxes = get_image_with_bboxes(img, lbl["boxes"], lbl["labels"])
plt.imshow(image_with_boxes)

In [None]:
transform_resize = transforms.Compose(
    [transforms.Resize(size=(input_width, input_height))]
)
img_transformed, bboxes, labels = transform_resize(img, lbl["boxes"], lbl["labels"])

image_with_boxes = get_image_with_bboxes(img_transformed, bboxes, labels)
plt.imshow(image_with_boxes)

In [None]:
encoder = CenternetEncoder(input_height, input_width)
lbl_encoded = encoder(bboxes, labels)

for i in range(20):
    hm_chosen_current = lbl_encoded[..., i]
    print(f"i = {i + 1}; np.amax(hm_chosen) = {np.amax(hm_chosen_current)}")
print()

In [None]:
i = lbl["labels"][0]  # corresponds to 'person'
hm_chosen = lbl_encoded[..., i - 1]
print(f"i = {i}; np.amax(hm_chosen) = {np.amax(hm_chosen)}")
print()

ind_max = np.argwhere(hm_chosen == np.amax(hm_chosen))
for ind in ind_max:
    print("rect center:", ind * 4)
    print("coors", lbl_encoded[..., 20:][ind[0], ind[1]])
    print()

plt.imshow(lbl_encoded[..., lbl["labels"][0] - 1])

In [None]:
plt.rcParams["figure.figsize"] = (10.0, 8.0)

for y in range(5):
    for i in range(4):
        plt_idx = i + y * 4 + 1
        plt.subplot(4, 5, plt_idx)
        plt.imshow(lbl_encoded[..., plt_idx - 1])
        plt.axis("off")
plt.show()

In [None]:
# Filter the entire VOC dataset to get 12 images containing persons. Label = 15
number_of_persons = 12
person_label = 15

person_images = []
testdata_indexes = []

for index, (img, lbl) in enumerate(dataset_val):
    if person_label in lbl["labels"]:
        testdata_indexes.append(index)
        person_images.append({"index": index, "image": img, "lbl": lbl})
    if len(testdata_indexes) == number_of_persons:
        break

print(f"There are {len(person_images)} persons in dataset")

# Visualize first 10 persons from the dataset.
# They form my test data.
for y in range(4):
    for i in range(3):
        plt_idx = i + y * 3 + 1
        plt.subplot(3, 4, plt_idx)

        img_transformed, bboxes, labels = transform_resize(
            person_images[plt_idx - 1]["image"],
            person_images[plt_idx - 1]["lbl"]["boxes"],
            person_images[plt_idx - 1]["lbl"]["labels"],
        )

        image_with_boxes = get_image_with_bboxes(img_transformed, bboxes, labels)
        plt.imshow(image_with_boxes)

        plt.axis("off")
plt.show()

In [None]:
model.eval()

# I cannot get predictions without train(True)
model.train(True)

In [None]:
transform = transforms.Compose(
    [
        transforms.Resize(size=(input_width, input_height)),
        transforms.ToImage(),
        transforms.ToDtype(torch.float32, scale=True),
    ]
)
torch_dataset = Dataset(dataset=dataset_val, transformation=transform, encoder=encoder)

training_data = torch.utils.data.Subset(torch_dataset, trainingdata_indices)
test_data = torch.utils.data.Subset(torch_dataset, testdata_indexes)

# comment this line when you need calculations on really test data
test_data = training_data

# Here I am getting the loss for test data received with trained model

# todo (AA): I don't completely understand what this prediction contain
batch_generator = torch.utils.data.DataLoader(test_data, num_workers=4, batch_size=12)
for input_data, gt_data in batch_generator:
    input_contiguous = input_data.to(device).contiguous()
    gt_data_device = gt_data.to(device)
    # result = model.forward(img_reshaped.to(device), gt=gt_data_device)

    result = model.forward(input_contiguous, gt=gt_data_device)
    print(result["loss"])

    # it gives the same result as above
    # result = model(input_contiguous)
    # print(model.loss(gt_data_device, result)['loss'])