In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#%pip install scikit-image
#%pip install torch torchvision

In [137]:
import numpy as np
from skimage import io
from skimage.transform import resize
import matplotlib.pyplot as plt
import random
import matplotlib.patches as patches
from utils import *
from model import *
import os
import xml.etree.ElementTree as ET

import torch
import torchvision
from torchvision import ops
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence

## Load Images

The annotations should also contain the corresponding image path

In [138]:
class ObjectDetectionDataset(Dataset):
    '''
    A Pytorch Dataset class to load the images and their corresponding annotations.
    
    Returns
    ------------
    images: torch.Tensor of size (B, C, H, W)
    gt bboxes: torch.Tensor of size (B, max_objects, 4)
    gt classes: torch.Tensor of size (B, max_objects)
    '''
    def __init__(self, annotation_path, img_dir, img_size, name2idx):
        self.annotation_path = annotation_path
        self.img_dir = img_dir
        self.img_size = img_size
        self.name2idx = name2idx
        
        self.img_data_all, self.gt_bboxes_all, self.gt_classes_all = self.get_data()
        
    def __len__(self):
        return self.img_data_all.size(dim=0)
    
    def __getitem__(self, idx):
        return self.img_data_all[idx], self.gt_bboxes_all[idx], self.gt_classes_all[idx]
        
    def get_data(self):
        img_data_all = []
        gt_idxs_all = []
        
        gt_boxes_all, gt_classes_all, img_paths = parse_annotation(self.annotation_path, self.img_dir, self.img_size)
        
        for i, img_path in enumerate(img_paths):
            
            # skip if the image path is not valid
            if (not img_path) or (not os.path.exists(img_path)):
                continue
                
            # read and resize image
            img = io.imread(img_path)
            img = resize(img, self.img_size)
            
            # convert image to torch tensor and reshape it so channels come first
            img_tensor = torch.from_numpy(img).permute(2, 0, 1)
            
            # encode class names as integers
            gt_classes = gt_classes_all[i]
            gt_idx = torch.Tensor([self.name2idx[name] for name in gt_classes])
            
            img_data_all.append(img_tensor)
            gt_idxs_all.append(gt_idx)
        
        # pad bounding boxes and classes so they are of the same size
        gt_bboxes_pad = pad_sequence(gt_boxes_all, batch_first=True, padding_value=-1)
        gt_classes_pad = pad_sequence(gt_idxs_all, batch_first=True, padding_value=-1)
        
        # stack all images
        img_data_stacked = torch.stack(img_data_all, dim=0)
        
        return img_data_stacked.to(dtype=torch.float32), gt_bboxes_pad, gt_classes_pad


## Converting the GT xmls to the format that can be fed to Faster R-CNN

In [128]:
def merge_and_convert_to_cvat(img_path, xml_path, output_filename):
    # Create a root element for the merged XML
    merged_root = ET.Element('annotations')
    xml_files = [ os.path.join(xml_path, x) for x in os.listdir(xml_path)]

    # Loop through each XML file
    for xml_file in xml_files:
        # Parse the XML file
        with open(xml_file, 'r') as file:
            xml_content = file.read()

        xml_root = ET.fromstring(xml_content)
        image_filename = os.path.join(img_path, xml_file[-23:-4]+'.jpg')


        # Iterate over each 'space' element in the parsed XML and convert to CVAT format
        for space_element in xml_root.findall('.//space'):
            annotation = ET.Element('annotation')
            image_filename_elem = ET.SubElement(annotation, 'filename')
            image_filename_elem.text = image_filename

            # Convert 'space' element to CVAT format
            cvat_box = ET.SubElement(annotation, 'box')
            attributes = ET.SubElement(cvat_box, 'attributes')
            attribute = ET.SubElement(attributes, 'attribute', {'name': 'occupied'})
            attribute.text = space_element.get('occupied')

            points = space_element.findall('.//point')
            x_values = [int(point.get('x')) for point in points]
            y_values = [int(point.get('y')) for point in points]

            x_min, x_max = min(x_values), max(x_values)
            y_min, y_max = min(y_values), max(y_values)

            ET.SubElement(cvat_box, 'xtl').text = str(x_min)
            ET.SubElement(cvat_box, 'ytl').text = str(y_min)
            ET.SubElement(cvat_box, 'xbr').text = str(x_max)
            ET.SubElement(cvat_box, 'ybr').text = str(y_max)

            # Append the CVAT annotation to the merged XML
            merged_root.append(annotation)

    # Create the merged XML tree
    merged_tree = ET.ElementTree(merged_root)

    # Save the merged XML to a file
    with open(output_filename, 'wb') as file:
        merged_tree.write(file)


In [129]:
# Converting the .xmls in data/Sunny_most_empty/labels_xml
xml_folder_path = '../data/PKLot/Sunny_most_empty/labels_xml'
annotation_path = '../data/PKLot/Sunny_most_empty/sunny_data_frcnn.xml'
image_dir = '../data/PKLot/Sunny_most_empty/images'

merge_and_convert_to_cvat(image_dir, xml_folder_path, annotation_path)

In [139]:
img_width = 1280
img_height = 720


name2idx = {'empty_park': 0, 'occupied_park': 1}
idx2name = {v:k for k, v in name2idx.items()}

# Create Dataset and Dataloaders

In [140]:
od_dataset = ObjectDetectionDataset(annotation_path, image_dir, (img_height, img_width), name2idx)

RuntimeError: received an empty list of sequences