*** IMPORTANT *** 
This code was written for training purposes. The code is shared only for knowledge sharing purposes. The code should not be used in any production environments.

## Create a custom COCO data set for object segmentation using Pascal VOC format

You can run this notebook to generate the coco json format for a dataset in Pascal VOC format. 

Then you can run the viwer.ipynb notebook to visualize the coco annotations. 


Further instruction on how to create your own datasets, read the [tutorial](https://www.dlology.com/blog/how-to-create-custom-coco-data-set-for-object-detection/).

This code is based on the git repo https://github.com/Tony607/voc2coco

In [2]:
import sys
import os
import json
import xml.etree.ElementTree as ET
import glob
from skimage import draw
import numpy as np

START_BOUNDING_BOX_ID = 1
PRE_DEFINE_CATEGORIES = None

In [3]:
def get(root, name):
    vars = root.findall(name)
    return vars

In [4]:
def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise ValueError("Can not find %s in %s." % (name, root.tag))
    if length > 0 and len(vars) != length:
        raise ValueError(
            "The size of %s is supposed to be %d, but is %d."
            % (name, length, len(vars))
        )
    if length == 1:
        vars = vars[0]
    return vars

In [5]:
def get_filename_as_int(filename):
    try:
        filename = filename.replace("\\", "/")
        filename = os.path.splitext(os.path.basename(filename))[0]
        return int(filename)
    except:
        raise ValueError("Filename %s is supposed to be an integer." % (filename))

In [6]:
def get_categories(xml_files):
    """Generate category name to id mapping from a list of xml files.
    
    Arguments:
        xml_files {list} -- A list of xml file paths.
    
    Returns:
        dict -- category name to id mapping.
    """
    classes_names = []
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall("object"):
#             print(member)
            classes_names.append(member[1].text)
    classes_names = list(set(classes_names))
    classes_names.sort()
    return {name: i for i, name in enumerate(classes_names)}

In [7]:
# This method gets the segmentation for polyons. It should not be used to return RLE values because the assumption is
# the annotated data does not contain crowds instead contain individual elements.

def get_segmentation(object):
    segmentations = []
    segement_polygons = object.findall('segment_polygons')
    # print(len(segement_polygons))
    if len(segement_polygons) > 0:
        assert len(segement_polygons) == 1 # there should be only one segment_polygon
        polygons = segement_polygons[0].findall('polygon')
        for polygon in polygons:
            x_y_points = []
            points = polygon.findall('point')
            for point in points:
                # for v in point.findall('value'):
                #     print(v.text)
                itert = point.itertext()
                x_y_points.append(int(next(itert)))
                x_y_points.append(int(next(itert)))
                # assert next(itert) == None

            # seg = poly2mask(x_points, y_points, [265, 256])  # TODO what should the shape be. is it correct to get the mask?
            # print(seg.shape)
            segmentations.append(x_y_points)
    # print(segmentations)
    return segmentations

In [8]:
def poly2mask(vertex_row_coords, vertex_col_coords, shape):
    fill_row_coords, fill_col_coords = draw.polygon(vertex_row_coords, vertex_col_coords, shape)
    mask = np.zeros(shape, dtype=np.bool)
    mask[fill_row_coords, fill_col_coords] = True
    return mask

In [9]:
def convert(xml_files, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    image_id_counter = 0
    if PRE_DEFINE_CATEGORIES is not None:
        categories = PRE_DEFINE_CATEGORIES
    else:
        categories = get_categories(xml_files)
    bnd_id = START_BOUNDING_BOX_ID
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        # path = get(root, "path")
        # if len(path) == 1:
        #     filename = os.path.basename(path[0].text)
        # elif len(path) == 0:
        #     filename = get_and_check(root, "filename", 1).text
        # else:
        #     raise ValueError("%d paths found in %s" % (len(path), xml_file))
        # ## The filename must be a number
        filename = xml_file.split('/')[-1].split('.')[0] + '.jpg'
        # print(filename)
        # print('image counter', image_id_counter)
        image_id = image_id_counter  # get_filename_as_int(filename)

        size = get_and_check(root, "size", 1)
        width = int(get_and_check(size, "width", 1).text)
        height = int(get_and_check(size, "height", 1).text)
        image = {
            "file_name": filename,
            "height": height,
            "width": width,
            "id": image_id,  # TODO This id is not universal.
        }
        json_dict["images"].append(image)
        # Currently we do not support segmentation.
        #  segmented = get_and_check(root, 'segmented', 1).text
        #  assert segmented == '0'
        for obj in get(root, "object"):
            category = get_and_check(obj, "name", 1).text
            if category not in categories:
                new_id = len(categories)
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, "bndbox", 1)
            xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1
            ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1
            xmax = int(get_and_check(bndbox, "xmax", 1).text)
            ymax = int(get_and_check(bndbox, "ymax", 1).text)
            assert xmax > xmin
            assert ymax > ymin
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            o_segmentation = get_segmentation(obj)
            if len(o_segmentation) > 0:
                ann = {
                    "area": o_width * o_height,
                    "iscrowd": 0,
                    "image_id": image_id,
                    "bbox": [xmin, ymin, o_width, o_height],
                    "category_id": category_id,
                    "id": bnd_id,
                    "ignore": 0,
                    "segmentation": o_segmentation,
                }
            # else:
            #     ann = {
            #         "area": o_width * o_height,
            #         "iscrowd": 0,
            #         "image_id": image_id,
            #         "bbox": [xmin, ymin, o_width, o_height],
            #         "category_id": category_id,
            #         "id": bnd_id,
            #         "ignore": 0,
            #         # "segmentation": o_segmentation,
            #     }

                json_dict["annotations"].append(ann)
                bnd_id = bnd_id + 1
        image_id_counter = image_id_counter + 1

    for cate, cid in categories.items():
        cat = {"supercategory": "none", "id": cid, "name": cate}
        json_dict["categories"].append(cat)

    os.makedirs(os.path.dirname(json_file), exist_ok=True)
    json_fp = open(json_file, "w")
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()

In [10]:

#     import argparse

#     parser = argparse.ArgumentParser(
#         description="Convert Pascal VOC annotation to COCO format."
#     )
#     parser.add_argument("xml_dir", help="Directory path to xml files.", type=str)
#     parser.add_argument("json_file", help="Output COCO format json file.", type=str)
#     args = parser.parse_args()
#     print(args)

xml_dir = "./test-data/VOC/Annotations"
json_file = "./test-data/coco/output.json"

xml_files = glob.glob(os.path.join(xml_dir, "*.xml"))

# If you want to do train/test split, you can pass a subset of xml files to convert function.
print("Number of xml files: {}".format(len(xml_files)))

convert(xml_files, json_file)
print("Success: {}".format(json_file))

Number of xml files: 2
Success: ./test-data/coco/output.json
