In [None]:
!pip install git+git://github.com/waspinator/pycococreator.git@0.2.0
!pip install git+git://github.com/waspinator/coco.git@2.1.0

In [None]:
import datetime
import json
import os
import re
from glob import glob
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
import pandas as pd

from skimage.io import imread
import matplotlib.pyplot as plt
from tqdm import tqdm


dataset_train = '../input/sartorius-cell-instance-segmentation/train'
csv_train = '../input/sartorius-cell-instance-segmentation/train.csv'
IMAGE_DIR = dataset_train

df = pd.read_csv(csv_train )  # read csv file


In [None]:
INFO = {
    "description": "Kaggle Dataset",
    "url": "https://github.com/pmj110119",
    "version": "0.1.0",
    "year": 2021,
    "contributor": "pmj110119",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

CATEGORIES = [
    {
        'id': 1,
        'name': 'cell',
        'supercategory': 'cell',
    },
]

In [None]:
def rle_decode(mask_rle, shape=(520, 704)):
    s = mask_rle.split()
    starts =  np.asarray(s[0::2], dtype=int)
    lengths = np.asarray(s[1::2], dtype=int)

    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def rle_decode(mask_rle, shape=(520, 704), color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    # Split the string by space, then convert it into a integer array
    s = np.array(mask_rle.split(), dtype=int)

    # Every even value is the start, every odd value is the "run" length
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths

    # The image image is actually flattened since RLE is a 1D "run"
    if len(shape)==3:
        h, w, d = shape
        img = np.zeros((h * w, d), dtype=np.float32)
    else:
        h, w = shape
        img = np.zeros((h * w,), dtype=np.float32)

    # The color here is actually just any integer you want!
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
        
    # Don't forget to change the image back to the original shape
    return img.reshape(shape)

In [None]:
# 最终放进json文件里的字典
coco_output = {
    "info": INFO,
    "licenses": LICENSES,
    "categories": CATEGORIES,
    "images": [],   # 放一个空列表占位置，后面再append
    "annotations": []
}

image_id = 1
segmentation_id = 1

image_paths = glob(os.path.join(IMAGE_DIR,'*.png'))
# 遍历每一张图片
for image_path in tqdm(image_paths,total=len(image_paths)):
    if image_id > 5:    # delete this when used
        break
    # 提取图片信息
    image = Image.open(image_path)
    image_name = os.path.basename(image_path)   # 不需要具体的路径，只要图片文件名
    image_info = pycococreatortools.create_image_info(
        image_id, image_name, image.size)
    coco_output["images"].append(image_info)

    # 内层循环是mask，把每一张图片的mask搜索出来
    rle_masks = df.loc[df['id'] == image_name[:-4], 'annotation'].tolist()
    for index in range(len(rle_masks)):
        binary_mask = rle_decode(rle_masks[index])
        class_id = 1    # 所有图片的类别都是1，ship
        category_info = {'id': class_id, 'is_crowd': 0}
        annotation_info = pycococreatortools.create_annotation_info(
            segmentation_id, image_id, category_info, binary_mask,
            image.size, tolerance=1)
    
        # save result
        coco_output["annotations"].append(annotation_info)
        
        # 无论标注是否被写入数据集，均分配一个编号
        segmentation_id = segmentation_id + 1   
        
    image_id = image_id + 1
    
with open('instances_cell_train2021.json', 'w') as output_json_file:
    json.dump(coco_output, output_json_file,indent=4)


## Check COCO

In [None]:
from pycocotools.coco import COCO

annFile='instances_cell_train2021.json'
coco = COCO(annFile)

In [None]:
print('categories:',coco.dataset['categories'])
print('image nums:',len(coco.dataset['images']))
print('annotation nums:',len(coco.dataset['annotations']))

In [None]:
from skimage import io

# select one at random
imgIds = coco.getImgIds(catIds=[1])
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
print('file_name:',img['file_name'])

# load and display origin image
I = io.imread('%s/%s'%(IMAGE_DIR,img['file_name']))
plt.axis('off')
plt.imshow(I)
plt.show()

# load and display instance annotations
plt.imshow(I); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=[1], iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)