In [0]:
import glob
from os import path, makedirs, remove
import imgviz
import numpy as np
import labelme
import collections
import datetime
import json
import uuid
import pycocotools.mask
from pathlib import Path
from tqdm.auto import tqdm

In [0]:
def labelme2voc(input_jsons, output_dir, labels):
    class_names = []
    class_name_to_id = {}
    for i, line in enumerate(open(labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        class_name_to_id[class_name] = class_id
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        class_names.append(class_name)
    class_names = tuple(class_names)

    print("Generating voc dataset")
    for filename in tqdm(input_jsons):
        label_file = labelme.LabelFile(filename=filename)
        base = filename.stem
        out_img_file = path.join(output_dir, "JPEGImages", base + ".jpg")
        out_clsp_file = path.join(output_dir, "SegmentationClassPNG", base + ".png")

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        imgviz.io.imsave(out_img_file, img)

        cls, ins = labelme.utils.shapes_to_label(
            img_shape=img.shape,
            shapes=label_file.shapes,
            label_name_to_value=class_name_to_id,
        )

        # class label
        labelme.utils.lblsave(out_clsp_file, cls)


def labelme2coco(input_jsons, output_dir, labels):
    now = datetime.datetime.now()

    data = dict(
        info=dict(
            description=None,
            url=None,
            version=None,
            year=now.year,
            contributor=None,
            date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f")),
        licenses=[dict(url=None, id=0, name=None, )],
        images_paths=[],
        type="instances",
        annotations=[],
        categories=[],
    )

    class_name_to_id = {}
    for i, line in enumerate(open(labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        if class_name == "":
            continue
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        elif class_id == 0:
            assert class_name == "_background_"
            continue
        class_name_to_id[class_name] = class_id
        data["categories"].append(
            dict(supercategory=None, id=class_id, name=class_name, )
        )

    out_ann_file = path.join(output_dir, "annotations.json")

    print("Generating coco dataset")
    for image_id, filename in enumerate(tqdm(input_jsons)):

        label_file = labelme.LabelFile(filename=filename)

        base = filename.stem
        out_img_file = path.join(output_dir, base + ".jpg")

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        data["images_paths"].append(
            dict(
                license=0,
                url=None,
                file_name=path.relpath(out_img_file,
                                       path.dirname(out_ann_file)),
                height=img.shape[0],
                width=img.shape[1],
                date_captured=None,
                id=image_id,
            )
        )

        masks = {}  # for area
        segmentations = collections.defaultdict(list)  # for segmentation
        for shape in label_file.shapes:
            points = shape["points"]
            label = shape["label"]
            group_id = shape.get("group_id")
            shape_type = shape.get("shape_type", "polygon")
            mask = labelme.utils.shape_to_mask(
                img.shape[:2], points, shape_type
            )

            if group_id is None:
                group_id = uuid.uuid1()

            instance = (label, group_id)

            if instance in masks:
                masks[instance] = masks[instance] | mask
            else:
                masks[instance] = mask

            if shape_type == "rectangle":
                (x1, y1), (x2, y2) = points
                x1, x2 = sorted([x1, x2])
                y1, y2 = sorted([y1, y2])
                points = [x1, y1, x2, y1, x2, y2, x1, y2]
            else:
                points = np.asarray(points).flatten().tolist()

            segmentations[instance].append(points)
        segmentations = dict(segmentations)

        for instance, mask in masks.items():
            cls_name, group_id = instance
            if cls_name not in class_name_to_id:
                continue
            cls_id = class_name_to_id[cls_name]

            mask = np.asfortranarray(mask.astype(np.uint8))
            mask = pycocotools.mask.encode(mask)
            area = float(pycocotools.mask.area(mask))
            bbox = pycocotools.mask.toBbox(mask).flatten().tolist()

            data["annotations"].append(
                dict(
                    id=len(data["annotations"]),
                    image_id=image_id,
                    category_id=cls_id,
                    segmentation=segmentations[instance],
                    area=area,
                    bbox=bbox,
                    iscrowd=0,
                )
            )

    with open(out_ann_file, "w") as f:
        json.dump(data, f)


def labelall(input_jsons, output_dir, labels):
    print('')
    print("Creating dataset:", output_dir)

    # make dirs or delete existing files
    makedirs(path.join(output_dir), exist_ok=True)
    makedirs(path.join(output_dir, "JPEGImages"), exist_ok=True)
    makedirs(path.join(output_dir, "SegmentationClassPNG"), exist_ok=True)            
    for file in [x for x in Path(output_dir).glob('**/*') if x.suffix in [".jpg", ".png"]]:
      remove(file)

    labelme2voc(input_jsons, output_dir, labels)
    labelme2coco(input_jsons, output_dir, labels)
    
    print('')
    print('#################')