# Example of how to execute the model by modules

In [14]:
import json
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import kornia as K
from kornia import augmentation as A
from kornia.augmentation import AugmentationSequential
import matplotlib.pyplot as plt

# if cuda is avaliable, use it
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

images_path = '../dataset/train/images'
annotations_path = '../dataset/train/annotations'

cuda


Method to translate format from json to kornia and vice versa


In [15]:
def convert_to_kornia_format(data):
    # Extract the bounding boxes and keypoints from the dictionary
    bboxes = data['bboxes']
    keypoints = data['keypoints']

    # Convert the bounding boxes to the Kornia format
    bbox_list = []
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        bbox_list.append([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])
    bbox_tensor = torch.tensor(bbox_list).unsqueeze(0)

    # Convert the keypoints to the Kornia format
    keypoint_list = []
    for kpts in keypoints:
        for kpt in kpts:
            x, y, _ = kpt
            keypoint_list.append([x, y])
    keypoint_tensor = torch.tensor(keypoint_list).unsqueeze(0)

    return bbox_tensor, keypoint_tensor

In [16]:
def kornia_to_torch_format(bbox_tensor, keypoint_tensor, labels=None):
    """
    Convert bbox_tensor and keypoint_tensor in Kornia format to torch's expected format.
    
    Parameters:
    - bbox_tensor (torch.Tensor): Bounding box tensor in Kornia format
    - keypoint_tensor (torch.Tensor): Keypoint tensor in Kornia format
    - labels (list[int]): List of class labels for each bounding box. If None, default to label=1 for all boxes.
    
    Returns:
    - dict: A dictionary with 'boxes', 'labels', and 'keypoints' in the format expected by torch.
    """
    # Convert bbox_tensor from Kornia's format to torch's [x1, y1, x2, y2] format
    boxes = torch.stack([bbox_tensor[0,:,0,0], bbox_tensor[0,:,0,1], bbox_tensor[0,:,2,0], bbox_tensor[0,:,2,1]], dim=1)
    
    # If labels aren't provided, assume a default label of 1 for all bounding boxes
    if labels is None:
        labels = torch.ones((bbox_tensor.shape[1],), dtype=torch.int64)
    else:
        labels = torch.tensor(labels, dtype=torch.int64).to(device)
    
    # Convert keypoint_tensor to the desired [x, y, visibility] format
    keypoints = torch.zeros((bbox_tensor.shape[1], keypoint_tensor.shape[1]//bbox_tensor.shape[1], 3))
    for i in range(bbox_tensor.shape[1]):
        keypoints[i, :, :2] = keypoint_tensor[0, i*2:(i+1)*2, :]
        keypoints[i, :, 2] = 1  # setting visibility to 1
    
    return {"boxes": boxes.float().to(device), "labels": labels.to(device), "keypoints": keypoints.to(device)}



First let's go to load image and annotations from the dataset. 

In [17]:
# get all images in the folder using os
image_filenames = [filename for filename in os.listdir(images_path) if filename.endswith('.jpg')]

for image_filename in image_filenames:
    
    image = cv2.imread(os.path.join(images_path, image_filename))

    # convert to tensor
    image_tensor: torch.Tensor = K.image_to_tensor(image).to(device)

    # bgr to rgb
    image_tensor = K.color.bgr_to_rgb(image_tensor)

    image_tensor = K.enhance.normalize(image_tensor, torch.tensor(0.), torch.tensor(255.)).to(device)

    annotation_filename = os.path.join(annotations_path, image_filename[:-4] + '.json')
    with open(annotation_filename, 'r') as f:
        annotation = json.load(f)
    print('JSON:')    
    print(annotation)
    print('-'*200)
    print('Kornia')
    bbox_tensor, keypoint_tensor = convert_to_kornia_format(annotation)
    print(bbox_tensor)
    print(keypoint_tensor)
    print('-'*200)
    print('Training:') 
    targets = kornia_to_torch_format(bbox_tensor, keypoint_tensor)
    print(targets)
    break
    

JSON:
{'bboxes': [[692, 346, 904, 504], [1076, 364, 1236, 572]], 'keypoints': [[[881, 479, 1], [709, 372, 1]], [[1212, 387, 1], [1102, 552, 1]]]}
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Kornia
tensor([[[[ 692,  346],
          [ 904,  346],
          [ 904,  504],
          [ 692,  504]],

         [[1076,  364],
          [1236,  364],
          [1236,  572],
          [1076,  572]]]])
tensor([[[ 881,  479],
         [ 709,  372],
         [1212,  387],
         [1102,  552]]])
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Training:
{'boxes': tensor([[ 692.,  346.,  904.,  504.],
        [1076.,  364., 1236.,  572.]], device='cuda:0'), 'labels': tensor([1, 1], device='cuda:0

In [18]:
import torch
import torchvision
from torchvision.models.detection import KeypointRCNN_ResNet50_FPN_Weights
# if cuda is avaliable, use it
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# empty cuda cache
torch.cuda.empty_cache()

model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=KeypointRCNN_ResNet50_FPN_Weights.DEFAULT)
model.to(device).eval()
# print(model)

transform_result = model.transform(image_tensor.unsqueeze(0), [targets])
print(transform_result)

cuda
(<torchvision.models.detection.image_list.ImageList object at 0x7f14fc0db850>, [{'boxes': tensor([[480.4354, 239.9574, 627.6208, 349.5333],
        [747.0355, 252.4407, 858.1188, 396.6926]], device='cuda:0'), 'labels': tensor([1, 1], device='cuda:0'), 'keypoints': tensor([[[611.6526, 332.1954,   1.0000],
         [492.2380, 257.9889,   1.0000]],

        [[841.4563, 268.3917,   1.0000],
         [765.0865, 382.8222,   1.0000]]], device='cuda:0')}])


In [19]:
print(model.backbone)
features = model.backbone(transform_result[0].tensors)
print('0')

BackboneWithFPN(
  (body): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): FrozenBatchNorm2d(64, eps=0.0)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=0.0)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): FrozenBatchNorm2d(64, eps=0.0)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): FrozenBatchNorm2d(256, eps=0.0)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): FrozenBatchNorm2d(256, eps=0.0)
        )
      )
      (1): Bottleneck(
        (conv1): C

In [20]:
proposals, proposal_losses = model.rpn(transform_result[0], features, [targets])
print('0')

0


In [21]:
detections, detector_losses = model.roi_heads(features, proposals, transform_result[0].image_sizes, [targets])
# detections = model.transform.postprocess(detections,  transform_result[0].image_sizes, image_tensor.shape[-2:])
model_detector_losses = model(image_tensor.unsqueeze(0), [targets])
print(detector_losses)

{}
