# Create custom COCO data set for image segmentation

In [6]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from PIL import Image,ImageDraw
import json
import glob
import PIL.Image

In [25]:
# reading image annotations
annotations ={}
with open('train/images/annotations/truth2d.txt') as f:
    for ind, line in enumerate(f):
        itm = line.rstrip().split(',')
        file = itm[0]
        annot_suits = []
        annot_joins = []
        if len(itm)>1:
            itm = ",".join(itm[1:]).split('[')
            annot_joins = [float(i) for i in itm[0].split(',') if i]
            for iit in itm[1:]:
                annot_temp =[]
                for it in iit.split('('):
                    if it:
                        annot_temp.append(tuple(float(i) for i in it.replace(')',"").replace(']',"").split(",")))
                annot_suits.append(annot_temp)
                annotations[file] = {'joints':annot_joins,'suits':annot_suits}

In [26]:
len(annotations)

83

In [13]:
anot_boxes = {}
for key,value in annotations.items():
    anot_boxes[key] = value['suits']

In [33]:
def get_coordinates(suits):
    coordinates = []
    if suits:
        for suit in suits:
            temp = []
            for coords in suit:
                for c_x, c_y in zip(coords[0::2],coords[1::2]):
                    temp.append([c_x,c_y])
            coordinates.append(temp)
    return coordinates

In [34]:
def get_labelme_annotations(image_dir):
    images =[]
    if not os.path.exists('anots'):
        os.mkdir('anots')
    for image_type in ['*.png', '*.jpg']:
        images.extend(glob.glob(os.path.join(image_dir,image_type)))
    for num, img_path in enumerate(images):
        image_key = img_path.split(os.sep)[-1]
        img = cv2.imread(img_path,1)
        height, width = img.shape[:2]
        pointers = get_coordinates(anot_boxes.get(image_key,None))
        if len(pointers) > 0:
            label_me_obj = {
                "version": "4.0.0",
                "flags": {},
                "shapes": []
            }
            label_me_obj["imagePath"]= img_path
            label_me_obj["imageData"]= None
            label_me_obj["imageHeight"]= height
            label_me_obj["imageWidth"]= width
            
            for pointer in pointers:
                shape = {
                    "label": "spacesuit",
                    "points": pointer,
                    "group_id": None,
                    "shape_type": "polygon",
                    "flags": {}}
                label_me_obj["shapes"].append(shape)
                
            with open(os.path.join('anots','%s.json'%(image_key.split('.')[0])),'w') as fp:
                json.dump(label_me_obj,fp)
        else:
            print(image_key,pointers)

In [44]:
class labelme2coco(object):
    def __init__(self, labelme_json=[], save_json_path="./coco.json"):
        """
        :param labelme_json: the list of all labelme json file paths
        :param save_json_path: the path to save new json
        """
        self.labelme_json = labelme_json
        self.save_json_path = save_json_path
        self.images = []
        self.categories = []
        self.annotations = []
        self.label = []
        self.annID = 1
        self.height = 0
        self.width = 0

        self.save_json()

    def data_transfer(self):
        for num, json_file in enumerate(self.labelme_json):
            with open(json_file, "r") as fp:
                data = json.load(fp)
                self.images.append(self.image(data, num))
                for shapes in data["shapes"]:
                    label = shapes["label"].split("_")
                    if label not in self.label:
                        self.label.append(label)
                    points = shapes["points"]
                    self.annotations.append(self.annotation(points, label, num))
                    self.annID += 1

        # Sort all text labels so they are in the same order across data splits.
        self.label.sort()
        for label in self.label:
            self.categories.append(self.category(label))
        for annotation in self.annotations:
            annotation["category_id"] = self.getcatid(annotation["category_id"])

    def image(self, data, num):
        image = {}
        image["height"] = data['imageHeight']
        image["width"] = data['imageWidth']
        image["id"] = num
        image["file_name"] = data["imagePath"].split(os.sep)[-1]

        self.height = image["height"]
        self.width = image["width"]

        return image

    def category(self, label):
        category = {}
        category["supercategory"] = label[0]
        category["id"] = len(self.categories)
        category["name"] = label[0]
        return category

    def annotation(self, points, label, num):
        annotation = {}
        contour = np.array(points)
        x = contour[:, 0]
        y = contour[:, 1]
        area = 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
        annotation["segmentation"] = [list(np.asarray(points).flatten())]
        annotation["iscrowd"] = 0
        annotation["area"] = area
        annotation["image_id"] = num

        annotation["bbox"] = list(map(float, self.getbbox(points)))

        annotation["category_id"] = label[0]  # self.getcatid(label)
        annotation["id"] = self.annID
        return annotation

    def getcatid(self, label):
        for category in self.categories:
            if label == category["name"]:
                return category["id"]
        print("label: {} not in categories: {}.".format(label, self.categories))
        exit()
        return -1

    def getbbox(self, points):
        polygons = points
        mask = self.polygons_to_mask([self.height, self.width], polygons)
        return self.mask2box(mask)

    def mask2box(self, mask):

        index = np.argwhere(mask == 1)
        rows = index[:, 0]
        clos = index[:, 1]

        left_top_r = np.min(rows)  # y
        left_top_c = np.min(clos)  # x

        right_bottom_r = np.max(rows)
        right_bottom_c = np.max(clos)

        return [
            left_top_c,
            left_top_r,
            right_bottom_c - left_top_c,
            right_bottom_r - left_top_r,
        ]

    def polygons_to_mask(self, img_shape, polygons):
        mask = np.zeros(img_shape, dtype=np.uint8)
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        return mask

    def data2coco(self):
        data_coco = {}
        data_coco["images"] = self.images
        data_coco["categories"] = self.categories
        data_coco["annotations"] = self.annotations
        return data_coco

    def save_json(self):
        print("save coco json")
        self.data_transfer()
        self.data_coco = self.data2coco()

        print(self.save_json_path)
        os.makedirs(
            os.path.dirname(os.path.abspath(self.save_json_path)), exist_ok=True
        )
        json.dump(self.data_coco, open(self.save_json_path, "w"), indent=4)


In [36]:
image_dir = 'train/images'

In [37]:
get_labelme_annotations(image_dir)

626913.jpg []
952799.jpg []
204037.jpg []


In [38]:
labelme_json = glob.glob(os.path.join('anots',"*.json"))

In [45]:
labelme2coco(labelme_json,'trainval.json')

save coco json
trainval.json


<__main__.labelme2coco at 0x1669b0cd0>