In [1]:
def clean_file_name(roboflow_format):
    real_name = roboflow_format.split('_', 1)[0] + ".jpg"
    return real_name

In [3]:
import os
import json
import subprocess
import numpy as np
import pandas as pd
from skimage.measure import find_contours
import shutil

class CocoDatasetHandler:

    def __init__(self, jsonpath, imgpath):
        #  CocoDatasetHandler('./test_0/_annotations.coco.json', 'test_0/')
        with open(jsonpath, 'r') as jsonfile:
            ann = json.load(jsonfile)

        images = pd.DataFrame.from_dict(ann['images']).set_index('id')
        images['clean_file_name'] = images['file_name'].apply(clean_file_name)
        annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id')
        categories = pd.DataFrame.from_dict(ann['categories']).set_index('id')

        # print(images)
        # print(annotations)
        # print(categories)
        annotations = annotations.merge(categories, left_on='category_id', right_index=True)
        # annotations = annotations.groupby('image_id').agg({'bbox': lambda x: add_sub_list(x), 'name': lambda x: add_sub_list(x)})

        annotations = annotations.merge(images, left_on='image_id', right_index=True)
        annotations = annotations.assign(
            shapes=annotations.apply(self.coco2shape, axis=1))


        self.annotations = annotations
        self.labelme = {}
        self.roboflow = []
        self.imgpath = imgpath
        tmp_images = pd.DataFrame.from_dict(ann['images'])
        tmp_images['clean_file_name'] = tmp_images['file_name'].apply(clean_file_name)
        self.images = tmp_images.set_index('clean_file_name')
    
    def coco2shape(self, row):
        
        if row.iscrowd == 1:
            shapes = self.rle2shape(row)
        elif row.iscrowd == 0:
            shapes = self.polygon2shape(row)
        return shapes

    def rle2shape(self, row):
        print("rle",row)
        rle, shape = row['segmentation']['counts'], row['segmentation']['size']
        mask = self._rle_decode(rle, shape)
        padded_mask = np.zeros(
            (mask.shape[0]+2, mask.shape[1]+2),
            dtype=np.uint8,
        )
        padded_mask[1:-1, 1:-1] = mask
        points = find_contours(mask, 0.5)
        shapes = [
            [[int(point[1]), int(point[0])] for point in polygon]
            for polygon in points
        ]
        return shapes

    def _rle_decode(self, rle, shape):
        mask = np.zeros([shape[0] * shape[1]], np.bool)
        for idx, r in enumerate(rle):
            if idx < 1:
                s = 0
            else:
                s = sum(rle[:idx])
            e = s + r
            if e == s:
                continue
            assert 0 <= s < mask.shape[0]
            assert 1 <= e <= mask.shape[0], "shape: {}  s {}  e {} r {}".format(shape, s, e, r)
            if idx % 2 == 1:
                mask[s:e] = 1
        # Reshape and transpose
        mask = mask.reshape([shape[1], shape[0]]).T
        return mask

    def polygon2shape(self, row):
        # print("poligon",row.image_id)
        # shapes: (n_polygons, n_points, 2)

        # coco: 
        # For object detection annotations, the format is "bbox" : [x,y,width,height]
        # Where:
        # x, y: the upper-left coordinates of the bounding box
        # width, height: the dimensions of your bounding box

        shapes = [
            [
                [round(row.bbox[0],2), round(row.bbox[1],2)],
                [round(row.bbox[0] + row.bbox[2], 2), round(row.bbox[1] + row.bbox[3], 2)]
            ]
        ]
        # print(shapes)
        return shapes

    def concat(self, coco):
        
        dfs = [coco.annotations, self.annotations]
        self.annotations = pd.concat(dfs)
        print(len(coco.images), len(self.images))
        imgs = [coco.images.reset_index(level=0), self.images.reset_index(level=0)]
        self.images = pd.concat(imgs).drop_duplicates(subset=['clean_file_name']).set_index('clean_file_name').drop(['id'], axis=1)
        # idx = self.images.index.drop_duplicates()
        # self.images = self.images.loc[idx]

    def coco2labelme(self):
        fillColor = [255, 0, 0, 128]
        lineColor = [0, 255, 0, 128]

        groups = self.annotations.groupby('clean_file_name')
        
        for file_idx, (clean_file_name, df) in enumerate(groups):
            
            record = {
                'imageData': None,
                'fillColor': fillColor,
                'lineColor': lineColor,
                "version": "5.0.5",
                "flags": {},
                'imagePath': clean_file_name,
                'imageHeight': int(self.images.loc[clean_file_name].height),
                'imageWidth': int(self.images.loc[clean_file_name].width),
            }
            record['shapes'] = []

            instance = {
                'line_color': None,
                'fill_color': None,
                'shape_type': "rectangle",
            }

            filename = self.images.loc[clean_file_name].file_name

            for inst_idx, (_, row) in enumerate(df.iterrows()):
                # print(row)
                for polygon in row.shapes:
                    copy_instance = instance.copy()
                    copy_instance.update({
                        'label': row['name'],
                        'group_id': None,
                        # 'group_id': inst_idx,
                        'points': polygon
                    })
                    record['shapes'].append(copy_instance)
            if filename not in self.roboflow:
                self.roboflow.append(filename)
            if clean_file_name not in self.labelme.keys():
                self.labelme[clean_file_name] = record

    def save_labelme(self, file_names, dirpath, save_json_only=False):
        # print(os.path.dirname(os.path.abspath(__file__)))
        
        # uncomment if dir not exists
        
        if not os.path.exists(dirpath):
            os.makedirs(dirpath)
        else:
            raise ValueError(f"{dirpath} has existed")

        for file in file_names:
            filename = os.path.basename(os.path.splitext(file)[0])
            print(filename)
            name = filename.split('_')[0]
            newfile = name+".jpg"

            with open(os.path.join(dirpath, name+'.json'), 'w') as jsonfile:
                json.dump(self.labelme[newfile], jsonfile, ensure_ascii=True, indent=2)

            if not save_json_only:
                print(os.path.join(self.imgpath, file))
                
                # shutil.copy2('/src/dir/file.ext', '/dst/dir/newname.ext') # complete target filename given
                # shutil.copy2('/src/file.ext', '/dst/dir') # target filename is /dst/dir/file.ext
                
                shutil.copy2(os.path.join(self.imgpath, file), dirpath) # target filename is /dst/dir/file.ext

                # subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath])
                # comment if file name is clean (not from roboflow)
                os.rename(os.path.join(dirpath, file), os.path.join(dirpath, newfile))

In [4]:
ds = CocoDatasetHandler('./merged_data/_annotations.coco_test_0.json', './merged_data')
# print(ds.annotations)


In [5]:
ds1 = CocoDatasetHandler('./merged_data/_annotations.coco_test_1.json', './merged_data')
ds2 = CocoDatasetHandler('./merged_data/_annotations.coco_train_0.json', './merged_data')
ds3 = CocoDatasetHandler('./merged_data/_annotations.coco_train_1.json', './merged_data')
ds4 = CocoDatasetHandler('./merged_data/_annotations.coco_valid_0.json', './merged_data')
ds5 = CocoDatasetHandler('./merged_data/_annotations.coco_valid_1.json', './merged_data')

In [6]:
ds.concat(ds1)
ds.concat(ds2)
ds.concat(ds3)
ds.concat(ds4)
ds.concat(ds5)

10 58
467 66
70 528
56 553
20 599


In [12]:
ds5.annotations['clean_file_name'].drop_duplicates()

id
0      162.jpg
1      407.jpg
9      501.jpg
21     544.jpg
71     653.jpg
93     706.jpg
97     731.jpg
115    167.jpg
144    735.jpg
156    192.jpg
170    441.jpg
181    321.jpg
199    112.jpg
208    354.jpg
226     11.jpg
Name: clean_file_name, dtype: object

In [11]:
ds.annotations['clean_file_name'].drop_duplicates()

id
0      162.jpg
1      407.jpg
9      501.jpg
21     544.jpg
71     653.jpg
        ...   
266    575.jpg
273    234.jpg
277     82.jpg
285    485.jpg
292    303.jpg
Name: clean_file_name, Length: 598, dtype: object

In [13]:
ds.images['clean_file_name']

KeyError: 'clean_file_name'

In [7]:
ds.coco2labelme()


In [8]:
ds.save_labelme(ds.roboflow, 'labelme')

1_png.rf.d7c38eb8c45de551acd3f8327b4fb132
./merged_data\1_png.rf.d7c38eb8c45de551acd3f8327b4fb132.jpg
10_png.rf.159acb106ced442105e6e247517efcf1
./merged_data\10_png.rf.159acb106ced442105e6e247517efcf1.jpg
100_png.rf.22c8a74f3c5b442d3bccae4ef05e5b8a
./merged_data\100_png.rf.22c8a74f3c5b442d3bccae4ef05e5b8a.jpg
101_png.rf.33f6fbf8e430c78564227249fc11cf22
./merged_data\101_png.rf.33f6fbf8e430c78564227249fc11cf22.jpg
102_png.rf.3bc41792142408872102e8dcb97e5f38
./merged_data\102_png.rf.3bc41792142408872102e8dcb97e5f38.jpg
103_png.rf.d6173a5e0eaa65eec96541d08225c793
./merged_data\103_png.rf.d6173a5e0eaa65eec96541d08225c793.jpg
104_png.rf.f28d016ca0d8cc8c1a4d71dc8d65d93e
./merged_data\104_png.rf.f28d016ca0d8cc8c1a4d71dc8d65d93e.jpg
105_png.rf.b57710a3a751d057477cfcf4c11b814a
./merged_data\105_png.rf.b57710a3a751d057477cfcf4c11b814a.jpg
106_png.rf.6eaa9ca90d0d1062c6993e9a79d8e4e1
./merged_data\106_png.rf.6eaa9ca90d0d1062c6993e9a79d8e4e1.jpg
107_png.rf.6e6d4ff8094063afb49996b5ef7da94d
./merged