In [None]:
import os
import glob

import numpy as np
import pandas as pd

import cv2
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

In [None]:
class FLAGS:
    
    data_dir = '/kaggle/input/sartorius-cell-instance-segmentation'
    save_dir = '/kaggle/working'

In [None]:
def rle2mask(rle, shape):
    
    mask = np.zeros(shape, dtype=int).flatten()

    rle = np.array(rle.split(), dtype=int)

    starts = rle[0::2] - 1
    lengths = rle[1::2]
    ends = starts + lengths

    for lo, hi in zip(starts, ends):
        mask[lo:hi] = 1

    mask = mask.reshape(shape)
    
    return mask

In [None]:
df = pd.read_csv(f'{FLAGS.data_dir}/train.csv')
df = df.set_index('id')

files = sorted(glob.glob(f'{FLAGS.data_dir}/train/*.png'))
print(f'Number of files: {len(files)}')

for path in tqdm(files):

    image_id = path.split('/')[-1][:-4]
    image_df = df.loc[image_id]
    
    annotations = image_df['annotation'].values
    shape = tuple(image_df.iloc[0][['height', 'width']].values)
        
    mask = []
    for color, rle in enumerate(annotations):
        inst_mask = rle2mask(rle, shape)
        mask.append(inst_mask)
    mask = np.argmax(mask, 0).astype(np.uint16)
    
    save_path = f'{FLAGS.save_dir}/{image_id}.png'
    cv2.imwrite(save_path, mask)
    
print(f'Processing complete')

## Visualize Masks (sanity check)

In [None]:
colored_mask = []

for val in np.unique(mask)[1:]:
    
    color = np.random.choice(256, 3)
    
    mask_ = mask==val
    mask_ = np.dstack([mask_]*3)
    mask_ = mask_ * color
    
    colored_mask.append(mask_)
    
colored_mask = np.array(colored_mask)
colored_mask = np.sum(colored_mask, 0)

In [None]:
image = cv2.imread(path, -1)

plt.figure(figsize=(10,10))
plt.imshow(image, cmap='gray')
plt.axis("off")
plt.show()

plt.figure(figsize=(10,10))
plt.imshow(colored_mask)
plt.axis("off")
plt.show()