In [11]:
import cv2
import json
import numpy as np
import pandas as pd

In [12]:
# json読み込み
HEIGHT = 768
WIDTH = 768
jsonPath = "../data/polygons.jsonl"
with open(jsonPath) as json_file:
    json_list = list(json_file)

In [13]:
def coordinates_to_masks( coordinates, shape):
    masks = []
    for coord in coordinates:
        mask = np.zeros(shape, dtype=np.uint8)
        cv2.fillPoly(mask, [np.array(coord)], 1)
        masks.append(mask)
    return masks

def mask2enc(mask, n=1):
    pixels = mask.T.flatten()
    encs = []
    for i in range(1,n+1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0: encs.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            runs[1::2] -= runs[::2]
            encs.append(' '.join(str(x) for x in runs))
    return encs

In [14]:
dataset = []
for item_str in json_list:
    item = json.loads(item_str)
    anno = item['annotations']
    row = {}
    for ann in anno:
        row = {}
        row["id"] = item["id"]
        row["type"] = ann["type"]
        row["coordinates"] = ann["coordinates"]
        row["mask"] = coordinates_to_masks(ann["coordinates"], (HEIGHT, WIDTH))[0]
        row["rle"] = mask2enc(row["mask"])
        dataset.append(row)
dataset = pd.DataFrame(dataset, columns=["id", "type", "coordinates", "mask", "rle"])

In [15]:
dataset.head()

Unnamed: 0,id,type,coordinates,mask,rle
0,0006ff2aa7cd,glomerulus,"[[[167, 249], [166, 249], [165, 249], [164, 24...","[[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",[2 105 769 107 1537 111 2305 113 3073 114 3841...
1,0006ff2aa7cd,blood_vessel,"[[[283, 109], [282, 109], [281, 109], [280, 10...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[208982 10 209748 17 210514 22 211280 27 21204...
2,0006ff2aa7cd,blood_vessel,"[[[104, 292], [103, 292], [102, 292], [101, 29...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[55540 4 56306 8 57073 11 57840 14 58608 15 59...
3,0006ff2aa7cd,blood_vessel,"[[[505, 442], [504, 442], [503, 442], [502, 44...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[375967 11 376732 20 377497 26 378263 30 37902...
4,0006ff2aa7cd,blood_vessel,"[[[375, 477], [374, 477], [373, 477], [372, 47...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[250808 5 251575 7 252342 10 253110 11 253878 ...


In [16]:
dataset = dataset[dataset['type'] == 'blood_vessel']
dataset.reset_index(inplace=True, drop=True)

In [17]:
dataset['unique_id'] = dataset['id'] + '_' + dataset.index.astype(str)
dataset.set_index('unique_id', inplace=True)
dataset.head()

Unnamed: 0_level_0,id,type,coordinates,mask,rle
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0006ff2aa7cd_0,0006ff2aa7cd,blood_vessel,"[[[283, 109], [282, 109], [281, 109], [280, 10...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[208982 10 209748 17 210514 22 211280 27 21204...
0006ff2aa7cd_1,0006ff2aa7cd,blood_vessel,"[[[104, 292], [103, 292], [102, 292], [101, 29...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[55540 4 56306 8 57073 11 57840 14 58608 15 59...
0006ff2aa7cd_2,0006ff2aa7cd,blood_vessel,"[[[505, 442], [504, 442], [503, 442], [502, 44...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[375967 11 376732 20 377497 26 378263 30 37902...
0006ff2aa7cd_3,0006ff2aa7cd,blood_vessel,"[[[375, 477], [374, 477], [373, 477], [372, 47...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[250808 5 251575 7 252342 10 253110 11 253878 ...
0006ff2aa7cd_4,0006ff2aa7cd,blood_vessel,"[[[368, 410], [367, 410], [366, 410], [365, 41...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",[257640 10 258405 18 259171 24 259938 28 26070...


In [18]:
dataset.to_csv('labels.csv', index=False)