In [1]:
import tensorflow as tf
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import os, sys
sys.path.append(os.path.abspath("./PythonAPI"))
from PythonAPI.pycocotools.coco import COCO
from PIL import Image
from utils import int64_feature, float_feature, bytes_feature ,ImageCoder, norm
fig = plt.figure()

<matplotlib.figure.Figure at 0x7f992c4f1250>

In [2]:
dataset_dir = "/data/dataset/coco"
annotation_dir = os.path.join(dataset_dir, 'annotations')
image_dir = dataset_dir
record_dir = os.path.join(dataset_dir, 'records')
split_name = "train2014"
annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name))

In [3]:
coco = COCO(annFile)

loading annotations into memory...
Done (t=16.14s)
creating index...
index created!


In [4]:
cats = coco.loadCats(coco.getCatIds())
print ('%s has %d images' %(split_name, len(coco.imgs)))
imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs]

train2014 has 82783 images


In [5]:
num_to_split = 10
num_per_split = len(imgs) // num_to_split

In [6]:
def _real_id_to_cat_id(catId):
    """Note coco has 80 classes, but the catId ranges from 1 to 90!"""
    real_id_to_cat_id = \
    {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17,
     17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28, 27: 31, 28: 32, 29: 33, 30: 34,
     31: 35, 32: 36, 33: 37, 34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47, 43: 48, 44: 49,
     45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63,
     59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81,
     73: 82, 74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
    return real_id_to_cat_id[catId]

def _cat_id_to_real_id(readId):
    """Note coco has 80 classes, but the catId ranges from 1 to 90!"""
    cat_id_to_real_id = \
    {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16,
     18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30,
     35: 31, 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, 47: 42, 48: 43, 49: 44,
     50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58,
     64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72,
     82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80}
    return cat_id_to_real_id[readId]

In [7]:
def _convert_to_example(image_id, image_data, label_data,
                           height, width,
                           num_instances, gt_boxes, masks):
    """ just write a raw input"""
    return tf.train.Example(features=tf.train.Features(feature={
        'image/img_id': int64_feature(image_id),
        'image/encoded': bytes_feature(image_data),
        'image/height': int64_feature(height),
        'image/width': int64_feature(width),
        'label/num_instances': int64_feature(num_instances),  # N
        'label/gt_boxes': bytes_feature(gt_boxes),  # of shape (N, 5), (x1, y1, x2, y2, classid)
        'label/gt_masks': bytes_feature(masks),  # of shape (N, height, width)
        'label/encoded': bytes_feature(label_data),  # deprecated, this is used for pixel-level segmentation (whole map)
    }))

In [None]:
def _get_dataset_filename(dataset_dir, split_name, split_ind, num_to_split):
    output_filename = 'coco_%s_%05d-of-%05d.tfrecord' % (
      split_name, split_ind, num_to_split)
    return os.path.join(dataset_dir, output_filename)

def showLabel(image):
    image_ids = set(image[image>0])
    colors = {}
    for _id in image_ids:
        colors[_id] = [int(np.random.random()*255), int(np.random.random()*255), int(np.random.random()*255)]
    Unlabelled = [0,0,0]
    r = image.copy()
    g = image.copy()
    b = image.copy()
    for l in image_ids:
        r[image==l] = colors[l][0]
        g[image==l] = colors[l][1]
        b[image==l] = colors[l][2]
    rgb = np.zeros((image.shape[0], image.shape[1], 3))
    rgb[:,:,0] = r/1.0
    rgb[:,:,1] = g/1.0
    rgb[:,:,2] = b/1.0
    return np.uint8(rgb)

def _get_coco_masks(coco, img_id, height, width, img_name):
    annIds = coco.getAnnIds(imgIds=[img_id], iscrowd=None)
    # assert  annIds is not None and annIds > 0, 'No annotaion for %s' % str(img_id)
    anns = coco.loadAnns(annIds)
    # assert len(anns) > 0, 'No annotaion for %s' % str(img_id)
    masks = []
    classes = []
    bboxes = []
    mask = np.zeros((height, width), dtype=np.float32)
    segmentations = []
    for ann in anns:
        m = coco.annToMask(ann) # zero one mask
        assert m.shape[0] == height and m.shape[1] == width, \
                'image %s and ann %s dont match' % (img_id, ann)
        masks.append(m)
        cat_id = _cat_id_to_real_id(ann['category_id'])
        classes.append(cat_id)
        bboxes.append(ann['bbox'])
        m = m.astype(np.float32) * cat_id
        mask[m > 0] = m[m > 0]

    masks = np.asarray(masks)
    classes = np.asarray(classes)
    bboxes = np.asarray(bboxes)
    # to x1, y1, x2, y2
    if bboxes.shape[0] <= 0:
        bboxes = np.zeros([0, 4], dtype=np.float32)
        classes = np.zeros([0], dtype=np.float32)
        print ('None Annotations %s' % img_name)
    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
    gt_boxes = np.hstack((bboxes, classes[:, np.newaxis]))
    gt_boxes = gt_boxes.astype(np.float32)
    masks = masks.astype(np.uint8)
    mask = mask.astype(np.uint8)
    assert masks.shape[0] == gt_boxes.shape[0], 'Shape Error'

    return gt_boxes, masks, mask

In [None]:
for split_ind in range(num_to_split):
    start_id = split_ind * num_per_split
    end_id = min((split_ind + 1) * num_per_split, len(imgs))
    record_filename = _get_dataset_filename(record_dir, split_name, split_ind, num_to_split)
    print(record_filename)
    with tf.python_io.TFRecordWriter(record_filename) as tfrecord_writer:
        for i in range(start_id, end_id):
            img_id = imgs[i][0]
            img_name = imgs[i][1]['file_name']
            split = img_name.split('_')[1]
            img_name = os.path.join(image_dir, split, img_name)
            height, width = imgs[i][1]['height'], imgs[i][1]['width']
            if str(img_id) == '320612':
                # bad image
                continue
            gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name)
            # gt_boxes => bounding box, masks => instance mask , mask => whole image mask
            """
            if i % 100 == 0:
                # show image to debug
                im = Image.open(img_name)
                f, axarr = plt.subplots(2)
                axarr[0].imshow(im)
                axarr[1].imshow(showLabel(mask))
            """
            # Save Image to tfrecord
            img = np.array(Image.open(img_name))
            if img.size == height * width:
                # No RGB channel
                print ('Gray Image %s' % str(img_id))
                im = np.empty((height, width, 3), dtype=np.uint8)
                im[:, :, :] = img[:, :, np.newaxis]
                img = im

            img = img.astype(np.uint8)
            assert img.size == width * height * 3, '%s' % str(img_id)

            img_raw = img.tostring()
            mask_raw = mask.tostring()

            example = _convert_to_example(
              img_id,
              img_raw,
              mask_raw,
              height, width, gt_boxes.shape[0],
              gt_boxes.tostring(), masks.tostring())

            tfrecord_writer.write(example.SerializeToString())


/data/dataset/coco/records/coco_train2014_00000-of-00010.tfrecord
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000262184.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000362257.jpg
Gray Image 86
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000000250.jpg
Gray Image 131366
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000262623.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000000508.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000524927.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000525020.jpg
Gray Image 263002
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000394126.jpg
Gray Image 525513
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000371863.jpg
Gray Image 394547
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000263478.jpg
Gray Image 1350
None Annotations /data/dataset/coco/train2014/COCO_t



None Annotations /data/dataset/coco/train2014/COCO_train2014_000000265207.jpg
Gray Image 3293
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000004481.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000003692.jpg
Gray Image 134918
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000003941.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000397187.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000401670.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000266274.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000004308.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000266518.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000004517.jpg
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000135849.jpg
Gray Image 821
None Annotations /data/dataset/coco/train2014/COCO_train2014_000000267353.jpg
None Annotation