# Example of train with a KeypointRCNN_ResNet50_FPN pre-trained model

In the code below, we are going to use the KeypointRCNN_ResNet50_FPN_Weights model pre-trained on the COCO dataset to train on our own dataset.

Import libraries

In [47]:
import json
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import kornia as K
from kornia import augmentation as A
from kornia.augmentation import AugmentationSequential
import matplotlib.pyplot as plt

# if cuda is avaliable, use it
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

train_images_path = 'dataset/train/images'
train_annotations_path = 'dataset/train/annotations'
test_images_path = 'dataset/test'
test_annotations_path = 'dataset/test/annotations'

class CustomDataset(Dataset):
    def __init__(self, images_path, annotations_path, device):
        self.images_path = images_path
        self.annotations_path = annotations_path
        self.device = device
        self.image_filenames = [filename for filename in os.listdir(images_path) if filename.endswith('.jpg')]

       # Declare an augmentation pipeline
        self.transform =K.augmentation.AugmentationSequential(
            K.augmentation.RandomVerticalFlip(), 
            K.augmentation.RandomHorizontalFlip(),
            K.augmentation.RandomRotation(30),
            data_keys=["input", "bbox", "keypoints"])

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        image_filename = self.image_filenames[idx]
        image = self.load_image(os.path.join(self.images_path, image_filename))
        annotation_filename = os.path.join(self.annotations_path, image_filename[:-4] + '.json')
        with open(annotation_filename, 'r') as f:
            annotation = json.load(f)

        target = []      
        target.append({
            'boxes': torch.tensor(annotation['bboxes']).float().to(self.device),
            'labels': torch.as_tensor([1 for _ in annotation['bboxes']], dtype=torch.int64).to(self.device),
            'keypoints': torch.tensor(np.array(annotation['keypoints'])[:,:,:2] ).float().to(self.device)
        })

        # transform image and keypoints
        transformed = self.transform(image, target[0]['boxes'],  target[0]['keypoints'])

        # create target tensor
        target = torch.ones([transformed[1].shape[0],transformed[1].shape[1],3]).to(device)

        # assign keypoints and add visibility
        target[:,:,:2] = transformed[1]

        return transformed[0].squeeze(0), target.reshape(transformed[1].shape[0] * transformed[1].shape[1],3)

    def load_image(self, image_path: str)-> torch.Tensor:
        """
        Method to load image
        params:
            image_path: str = path of the image
        return:
            tensor: torch.Tensor = image tensor
        """

        # load image and convert to tensor
        image: np.ndarray = cv2.imread(image_path)

        # convert to tensor
        image_tensor: torch.Tensor = K.image_to_tensor(image)

        # bgr to rgb
        image_tensor = K.color.bgr_to_rgb(image_tensor)

        return K.enhance.normalize(image_tensor, torch.tensor(0.), torch.tensor(255.)).to(self.device)

def collate_fn(batch):
    images, annotations = zip(*batch)

    import torch

    keypoints = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

    ones = torch.ones((keypoints.shape[0], 1))
    keypoints = torch.cat((keypoints, ones), dim=1)

    keypoints = keypoints.view(keypoints.shape[0], -1, 3)

    return  torch.stack(images),  torch.stack(annotations)

train_dataset = CustomDataset(train_images_path, train_annotations_path, device)
test_dataset = CustomDataset(test_images_path, test_annotations_path, device)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# Get a batch of images and annotations from the train dataloader
images, targets = next(iter(train_dataloader))

# Get the first image and its annotations from the batch
image = images[0].numpy().transpose((1, 2, 0)).copy().astype(np.uint8)
annotations = targets[0]

# Draw the bounding boxes and keypoints on the image using OpenCV
for bbox, keypoints in zip(annotations['boxes'], annotations['keypoints']):
    bbox = bbox.numpy().astype(int)
    keypoints = keypoints.numpy().reshape(-1, 3).astype(int)
    image = cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 3)
    for x, y, v in keypoints:
        if v > 0:
            image = cv2.circle(image, (x, y), 10, (0, 0, 255), 3)

# Convert the image to RGB and Show the image using Matplotlib 
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.show()

cuda


RuntimeError: stack expects each tensor to be equal size, but got [3, 1080, 1920] at entry 0 and [3, 1920, 1080] at entry 2

Create device

## Load Dataset

Dataset paths:

Create the custom dataset to load image tensor and annotations

Define data augmentation pipeline:

Load dataset:

Plot one image from the dataset