In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import pickle
import pycocotools.mask as RLE
import skimage
import skimage.io 


In [2]:
train_root = pathlib.Path('./train')
train_image = [x for x in sorted(train_root.glob("*.png")) if 'label' not in x.name][0]
train_labels = sorted(train_root.glob("*.npy"))[0]

In [3]:
img = skimage.io.imread(train_image, as_gray=True)
labels = np.load(train_labels)

In [5]:
def split(data, n_row=2, n_col=2):
    """
    Splits array into (approximately) even sub-arrays.
    Used in this example to generate a dataset with multiple smaller images
    from a single larger image. 
    """
    r, c = data.shape
    
    row_splits = np.linspace(0, r, n_row+1, dtype=np.int)
    col_splits = np.linspace(0, c, n_col+1, dtype=np.int)

    
    splits = []
    row_idx = []
    col_idx = []
    for r1, r2 in zip(row_splits[:-1], row_splits[1:]):
        for c1, c2 in zip(col_splits[:-1], col_splits[1:]):
            splits.append(data[r1:r2, c1:c2])
            row_idx.append([r1,r2])
            col_idx.append([c1,c2])
    
    return splits, row_idx, col_idx
    

In [7]:
im_processed_root = pathlib.Path('train_images_preprocessed')

masks_rle = []
label_paths = []

for im, row_idx, col_idx in zip(*split(labels)):
    unique = np.unique(im)
    if unique[0] == 0:
        unique = unique[1:]
    masks_rle = []
    bbox_xyxy_abs = []
    for i in unique:
        mask = im == i
        yy, xx = np.where(mask)
        bbox_xyxy_abs.append([xx.min(), yy.min(), xx.max(), yy.max()])
        masks_rle.append(RLE.encode(np.asfortranarray(mask)))
    bbox_xyxy_abs = np.asarray(bbox_xyxy_abs)
    
    results = {'mask': masks_rle,
              'boxes': bbox_xyxy_abs}
        
    fname = '{}_r1r2c1c2_{:03}-{:03}-{:03}-{:03}.pickle'.format(train_labels.stem, *row_idx, *col_idx)
    with open(im_processed_root / fname, 'wb') as f:
        pickle.dump(results, f)

    
    
    

In [8]:
for im, row_idx, col_idx in zip(*split(img)):
    fname = '{}_r1r2c1c2_{:03}-{:03}-{:03}-{:03}.png'.format(train_image.stem, *row_idx, *col_idx)
    im = skimage.img_as_ubyte(im)

    skimage.io.imsave(im_processed_root / fname, im)

In [43]:
from detectron2.structures import BoxMode
def get_data_dicts(data_root):
    png_files = sorted(data_root.glob('*.png'))
    pickle_files = sorted(data_root.glob('*.pickle'))
    dataset_dicts = []
    for i, (img_root, ann_root) in enumerate(zip(png_files, pickle_files)):
        assert img_root.stem == ann_root.stem.replace('label_','')

        
        r1, r2, c1, c2 = [int(x) for x in img_root.stem.split('_')[-1].split('-')]
        
        record = {}
        record['file_name'] = str(img_root)
        record['height'] = r2-r1
        record['width'] = c2-c1
        record['image_id'] = i
        
        record['mask_format'] = 'bitmask'
        
        objs = []
        with open(ann_root, 'rb') as f:
            annos = pickle.load(f)
        
        for mask, box in zip(*annos.values()):
            obj = {'bbox': box,
                  'bbox_mode': BoxMode.XYXY_ABS,
                   'segmentation': [mask],
                   'category_id': 0
                  }
            objs.append(obj)

        record['annotations'] = objs
        record['num_instances'] = len(objs)
        dataset_dicts.append(record)
    return dataset_dicts
        
        
        

In [44]:
ddicts = get_data_dicts(im_processed_root)

In [52]:
[len(x['annotations']) for x in ddicts]

[323, 301, 324, 348]