In [70]:
import os
from ote_sdk.entities.datasets import DatasetEntity, DatasetIterator
from ote_sdk.entities.label import Domain
from ote_sdk.entities.shapes.rectangle import Rectangle
from ote_sdk.entities.shapes.polygon import Polygon
from ote_sdk.entities.subset import Subset

from datumaro import Dataset as DMDataset
from datumaro import DatasetItem as DMDatasetItem
from datumaro import Bbox as DMBbox
from datumaro import Polygon as DMPolygon

In [71]:
# create sample DatasetEntity with coco
coco_dataset_root = "/home/yunchu/data/coco"
print(os.path.exists(coco_dataset_root))
ann_train = "annotations/semi_supervised/instances_train2017.1_1.0.json"
imgs_train = "images/train2017"
ann_val = "annotations/semi_supervised/instances_val2017.1_0.1.json"
imgs_val = "images/val2017"

train_ann_file = os.path.join(coco_dataset_root, ann_train)
print(os.path.exists(train_ann_file))
train_img_root = os.path.join(coco_dataset_root, imgs_train)

val_ann_file = os.path.join(coco_dataset_root, ann_val)
print(os.path.exists(val_ann_file))
val_img_root = os.path.join(coco_dataset_root, imgs_train)

True
True
True


In [77]:
# use existing code to create DatasetEntity from coco dataset
from detection_tasks.extension.datasets.data_utils import load_dataset_items_coco_format, get_classes_from_annotation

classes = get_classes_from_annotation(ann_file)
print(classes)
items = []

items.extend(
    load_dataset_items_coco_format(
        ann_file_path=train_ann_file,
        data_root_dir=train_img_root,
        domain=Domain.INSTANCE_SEGMENTATION,
        subset=Subset.TRAINING,
        labels_list=[],
        with_mask=False,
    )
)

items.extend(
    load_dataset_items_coco_format(
        ann_file_path=val_ann_file,
        data_root_dir=val_img_root,
        domain=Domain.INSTANCE_SEGMENTATION,
        subset=Subset.VALIDATION,
        labels_list=[],
        with_mask=False,
    )
)

dataset_entity = DatasetEntity(items=items)
print(len(dataset_entity.get_labels()), len(classes))



['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creat

In [73]:
dm_items = []
for item in DatasetIterator(dataset_entity):
    # print(item.media._Image__file_path)
    id = item.media._Image__file_path
    anns = []
    for ann in item.annotation_scene.annotations:
        if isinstance(ann.shape, Rectangle):
            anns.append(
                DMBbox(
                    x=ann.shape.x1,
                    y=ann.shape.y1,
                    w=ann.shape.width,
                    h=ann.shape.height,
                    label=classes.index(ann.get_labels()[0].name)
                )
            )
        # elif isinstance(ann.shape, Polygon):
        #     anns.append(
        #         DMPolygon(
        #             ann.shape.points
        #         )
        #     )
        else:
            print(f"unexpected annotation type: {type(ann.shape)}")
    dm_items.append(
        DMDatasetItem(
            id=id,
            annotations=anns
        )
    )
dset = DMDataset.from_iterable(dm_items, categories=classes)

In [74]:
for item in dset:
    print(item.id, item.annotations)



/home/yunchu/data/coco/images/train2017/000000032907.jpg [Bbox(id=0, attributes={}, group=0, points=[0.0, 0.11999999731779099, 0.10999999940395355, 0.27000001072883606], label=58, z_order=0), Bbox(id=0, attributes={}, group=0, points=[0.5, 0.029999999329447746, 0.6299999952316284, 0.25], label=58, z_order=0), Bbox(id=0, attributes={}, group=0, points=[0.6499999761581421, 0.029999999329447746, 0.7400000095367432, 0.25999999046325684], label=58, z_order=0), Bbox(id=0, attributes={}, group=0, points=[0.7900000214576721, 0.0, 1.0, 0.17000000178813934], label=2, z_order=0), Bbox(id=0, attributes={}, group=0, points=[0.3400000035762787, 0.25999999046325684, 0.6499999761581421, 0.6800000071525574], label=61, z_order=0), Bbox(id=0, attributes={}, group=0, points=[0.7300000190734863, 0.0, 0.800000011920929, 0.05999999865889549], label=2, z_order=0)]
/home/yunchu/data/coco/images/train2017/000000281221.jpg [Bbox(id=0, attributes={}, group=0, points=[0.5199999809265137, 0.23000000417232513, 0.800