In [1]:
import cv2
import numpy as np
import os
from shapely.geometry import Polygon
import xml.etree.ElementTree as ET
import pathlib
from PIL import Image
import json
import uuid

In [2]:
def get_items(root, ids):
    images = []
    all_boxes = []
    all_masks = []
    all_labels = []

    for image_id in ids:
        image = _get_images(root, image_id)
        boxes, labels = _get_annotations(root, image_id)
        masks = _get_masks(root, image_id)

        images.append(image)
        all_boxes.append(boxes)
        all_masks.append(masks)
        all_labels.append(labels)

    return images, all_boxes, all_masks, all_labels


def _read_image_ids(image_sets_file):
    ids = []
    with open(image_sets_file) as f:
        for line in f:
            ids.append(line.rstrip())
    return ids


def _get_images(root, image_id):
    image_file = os.path.join(root, "JPEGImages", image_id + ".jpg")
    image = cv2.imread(str(image_file))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image


def _get_masks(root, image_id):
    mask_file = os.path.join(root, "SegmentationClass", image_id + ".png")
    mask = np.array(Image.open(mask_file).convert("P"))

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    polygons = []
    for contour in contours:
        contour = np.squeeze(contour, axis=1)
        if len(contour) >= 4:
            polygon = Polygon(contour)
            polygons.append(polygon)

    return polygons


def _get_annotations(root, image_id):
    annotation_file = os.path.join(root, "Annotations", image_id + ".xml")
    objects = ET.parse(annotation_file).findall("object")
    boxes = []
    labels = []
    for object in objects:
        class_name = object.find("name").text.lower().strip()
        bbox = object.find("bndbox")
        x1 = float(bbox.find("xmin").text) - 1
        y1 = float(bbox.find("ymin").text) - 1
        x2 = float(bbox.find("xmax").text) - 1
        y2 = float(bbox.find("ymax").text) - 1
        boxes.append([x1, y1, x2, y2])
        labels.append(class_name)

    # return bbox y labels
    return (np.array(boxes, dtype=np.float32), np.array(labels))


# create the ids for images
root = pathlib.Path("/home/emanuele/Dottorato/dataset-vari/VOC2012")
images_file = os.path.join(root, "ImageSets", "Segmentation", "dataset.txt")
ids = _read_image_ids(images_file)
print(len(ids))

2913


In [3]:
images, boxes, polygons, labels = get_items(root, ids)

In [5]:
instances_voc12 = {
    "info": {
        "description": "VOC 2012 Dataset Annotations files",
        "version": "1.0",
        "year": 2024,
        "contributor": "CILAB",
        "date_created": "02/01/2024",
    },
    "images": [
        # {
        #     "file_name": "2007_000032",
        #     "url": "VOC2012/JPEGImages/2007_000032.jpg",
        #     "height": 281,
        #     "width": 500,
        #     "id": 0,
        # },
        # {...},
    ],
    "annotations": [
        # {
        #     "segmentation": [
        #         [
        #             [117.0, 89.0],
        #             [116.0, 90.0],
        #             [109.0, 90.0],
        #             [107.0, 92.0],
        #             [134.0, 171.0],
        #             [128.0, 171.0],
        #             [127.0, 170.0],
        #             [127.0, 137.0],
        #         ]
        #     ],
        #     "area": 20098.5,
        #     "image_id": 40,
        #     "bbox": [118.0, 176.0, 330.0, 277.0],
        #     "category_id": 9,
        #     "id": 64,
        # },
        # {...},
    ],
    "categories": [
        # {"id": 0, "name": "sheep"},
        # {"id": 1, "name": "bird"},
        # {"id": 2, "name": "bus"},
        # {"id": 3, "name": "cow"},
    ],
}
instances_voc12

{'info': {'description': 'VOC 2012 Dataset Annotations files',
  'version': '1.0',
  'year': 2024,
  'contributor': 'CILAB',
  'date_created': '02/01/2024'},
 'images': [],
 'annotations': [],
 'categories': []}

In [42]:
categories = [
    {"id": i, "name": name} for i, name in enumerate(set(np.concatenate(labels)))
]
category_to_id = {category["name"]: category["id"] for category in categories}

for enum, (box, polygon, label) in enumerate(zip(boxes, polygons, labels)):
    for b, p, l in zip(box, polygon, label):
        print(enum, b, list(p.exterior.coords), category_to_id[l])
    break

0 [103.  77. 374. 182.] [(28.0, 184.0), (25.0, 187.0), (25.0, 188.0), (24.0, 189.0), (24.0, 190.0), (20.0, 194.0), (20.0, 202.0), (22.0, 204.0), (22.0, 213.0), (24.0, 215.0), (24.0, 216.0), (28.0, 220.0), (28.0, 231.0), (29.0, 232.0), (29.0, 236.0), (33.0, 240.0), (37.0, 240.0), (41.0, 236.0), (41.0, 228.0), (40.0, 227.0), (40.0, 220.0), (43.0, 217.0), (43.0, 215.0), (45.0, 213.0), (45.0, 205.0), (44.0, 204.0), (44.0, 203.0), (43.0, 202.0), (43.0, 201.0), (42.0, 200.0), (42.0, 197.0), (41.0, 196.0), (41.0, 195.0), (40.0, 194.0), (40.0, 189.0), (36.0, 185.0), (34.0, 185.0), (33.0, 184.0), (28.0, 184.0)] 2
0 [132.  87. 196. 122.] [(23.0, 101.0), (22.0, 102.0), (18.0, 102.0), (17.0, 103.0), (12.0, 103.0), (11.0, 104.0), (7.0, 104.0), (6.0, 105.0), (3.0, 105.0), (3.0, 106.0), (2.0, 107.0), (2.0, 108.0), (0.0, 110.0), (0.0, 111.0), (22.0, 111.0), (23.0, 110.0), (46.0, 110.0), (46.0, 106.0), (45.0, 105.0), (45.0, 104.0), (44.0, 103.0), (35.0, 103.0), (34.0, 102.0), (25.0, 102.0), (24.0, 101.

In [6]:
def create_lvis_style_annotation(ids, images, boxes, polygons, labels, annotations):
    # generate set of categories
    annotations_images = []
    annotations_segmentations = []

    annotations_categories = [
        {"id": i, "name": name} for i, name in enumerate(set(np.concatenate(labels)))
    ]
    category_to_id = {
        category["name"]: category["id"] for category in annotations_categories
    }

    for enum, id_ in enumerate(ids):
        # print(ids[i])
        image = {
            "file_name": id_,  # This is the only field that is compulsory
            "url": f"JPEGImages/{id_}.jpg",
            "height": images[enum].shape[0],
            "width": images[enum].shape[1],
            "id": enum,
        }
        annotations_images.append(image)

    i = 0
    for enum, (box, polygon, label) in enumerate(zip(boxes, polygons, labels)):
        for b, p, l in zip(box, polygon, label):
            annotation = {
                "segmentation": [list(p.exterior.coords)],
                "area": p.area,
                "image_id": enum,
                "bbox": b.tolist(),  # Assuming box is a list/array of [x_min, y_min, x_max, y_max]
                "category_id": category_to_id[l],
                "id": i,
            }
            annotations_segmentations.append(annotation)
            i += 1

    annotations["images"] = annotations_images
    annotations["annotations"] = annotations_segmentations
    annotations["categories"] = annotations_categories
    return annotations

In [7]:
# generate file, if you want to use it in the future
annotations = create_lvis_style_annotation(ids, images, boxes, polygons, labels, instances_voc12)

In [8]:
with open('instances_voc12.json', 'w') as file:
    json.dump(annotations, file)

In [None]:
with open('annotations.json', 'r') as f:
    data = json.load(f)

In [4]:
data = {
    "license": 4,
    "file_name": "000000397133.jpg",
    "coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg",
    "height": 427,
    "width": 640,
    "date_captured": "2013-11-14 17:02:52",
    "flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg",
    "id": 397133,
}
image = {
    "file_name": "2007_000042",
    "url": "JPEGImages/2007_000042.jpg",
    "height": 335,
    "width": 500,
    "id": 3,
}
dataset_path = "/home/emanuele/Dottorato/dataset-vari/VOC12"
(f'{dataset_path}/{image["url"]}')

'/home/emanuele/Dottorato/dataset-vari/VOC12/JPEGImages/2007_000042.jpg'