In [None]:
!pip install pycocotools

# Introduction

In this visualization, we use pycocotools to decode the RLEs. To do so, we first convert the segmentation to COCO compatible RLE, then we use maskUtils in pycocotools to encode-decode RLEs to masks. This can be useful if you are converting data to COCO format. 
In addition, we also visualize all annotated classes overlayed on the single slice for better understanding of the dataset.

References:

1. https://www.kaggle.com/code/fabiendaniel/image-with-masks-quick-overview
2. https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/mask.py

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from glob import glob
import pycocotools.mask as maskUtils

# Some Basic Statistics of Dataset

In [None]:
train_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
print('=> Shape of the train dataframe {}'.format(train_df.shape))
print('=> Number of unique classes = {}'.format(train_df['class'].nunique()))
print('=> Unique classes = {}'.format(train_df['class'].unique()))
print('=> Unique ids = {}'.format(train_df.id.nunique()))
for cat in train_df['class'].unique():
    print('=> Number of {} in train = {}'.format(cat,sum(train_df['class'] == cat)))
list_images = glob('../input/uw-madison-gi-tract-image-segmentation/train/*/*/scans/*.png')    
print('=> Number of images in train set = {}'.format(len(list_images)))
train_df.head()

In [None]:
image_properties = pd.DataFrame([(c, c.split('/')[-3], c.split('/')[-1]) for c in list_images], columns = ['file_path', 'case_day', 'file'])
image_properties['slice'] = image_properties['file'].apply(lambda x: f"slice_{x.split('_')[1]}")
image_properties['height'] = image_properties['file'].apply(lambda x: int(x.split('_')[2]))
image_properties['width']  = image_properties['file'].apply(lambda x: int(x.split('_')[3]))
image_properties['id']= image_properties['case_day'] + '_' + image_properties['slice']
train_df_merged = pd.merge(train_df, image_properties, on='id', how='left')
X = train_df_merged[train_df_merged['segmentation'].notnull()]
print('=> Images with Annotations : {}'.format(X.shape))
X.head()

# COCO Compatiable RLE and Mask Generation

RLE is a simple yet efficient format for storing binary masks. RLEfirst divides a vector (or vectorized image) into a series of piecewise constant regions and then for each piece simply stores the length of that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] (note that the odd counts are always the numbers of zeros). Instead of storing the counts directly, additional compression is achieved with a variable bitrate representation based on a common scheme called LEB128.

In [None]:
def get_mask(segmentation,height,width):
    counts =list(map(int, segmentation.split()))
    counts_01 = [counts[0]-1,counts[1]]
    for i in range(2,len(counts)-1,2):
        counts_01.append(counts[i] - (counts[i-1]+counts[i-2]))
        counts_01.append(counts[i+1])
    counts_01.append((height*width)- (counts[len(counts)-1]+counts[len(counts)-2]))
    rle = maskUtils.frPyObjects([{'counts':counts_01,'size':[height,width]}], height,width)
    m = maskUtils.decode(rle)
    m = np.squeeze(m)
    m = m.T 
    return m

In [None]:

def plot_(image,masks,cols,cats):
    fig, ax = plt.subplots(1,3, figsize=(12,16))
    
    ax[0].set_title('Image')
    ax[0].imshow(image)

    ax[1].set_title('Mask')
    img = np.zeros( (masks[0].shape[0], masks[0].shape[1], 3) )
    for mask in masks:
          img[mask == 1] = 1
    ax[1].imshow(img)

    ax[2].set_title('overlays {}'.format(cats))
    ax[2].imshow(image)
    
    for mask,col in zip(masks,cols):
        img = np.ones( (mask.shape[0], mask.shape[1], 3) )
        for i in range(3):
            img[:,:,i] = col[i]
        ax[2].imshow(np.dstack( (img, mask*0.5) ))
        
    plt.show()
    



color_dict = {
    'large_bowel':[255,255,0], #yellow
    'small_bowel':[255, 0, 0], #red
    'stomach':[  0, 0, 255 ] #blue
}

for i in range(0,10):
#     image = np.array(Image.open(sample['whole_path']))
    idx = np.random.randint(0,X.shape[0] - 1)
    sample = X.iloc[idx]
    slice_ids = X.index[X['id'] == sample['id']].tolist()
    
    masks = []
    cols = []
    cats = []
    for id_ in slice_ids:
        slice_ = train_df_merged.iloc[id_]
        mask = get_mask(slice_['segmentation'],int(slice_['height']),int(slice_['width']))
        masks.append(mask)
        cols.append(color_dict[slice_['class']])
        cats.append(slice_['class'])
    image = np.array(Image.open(slice_['file_path']))
    plot_(image,masks,cols,cats)