In [None]:
from pathlib import Path
import random
from PIL import Image as Img
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skimage import color

# Load data

In [None]:
home_dir = Path('/kaggle/input/uw-madison-gi-tract-image-segmentation/')
data_dir = home_dir / 'train'

df_train = pd.read_csv(home_dir / 'train.csv')
df_train.sample(3)

Extracts image properties from the file name as eg. height or width 

In [None]:
list_images = map(str, data_dir.rglob('*/*/*/*'))
image_properties = pd.DataFrame([(c, c.split('/')[-3], c.split('/')[-1]) for c in list_images], columns = ['whole_path', 'case_day', 'file'])
image_properties['slice'] = image_properties['file'].apply(lambda x: f"slice_{x.split('_')[1]}")
image_properties['height'] = image_properties['file'].apply(lambda x: int(x.split('_')[2]))
image_properties['width']  = image_properties['file'].apply(lambda x: int(x.split('_')[3]))
image_properties['id']     = image_properties['case_day'] + '_' + image_properties['slice']

df_train = pd.merge(df_train, image_properties, on='id', how='left')
df_train.sample(3)

# Images overview

In [None]:
list_images = list(data_dir.rglob('*/*/*/*'))

nx, ny = 6, 2
fig, axes = plt.subplots(ny, nx, figsize=(24, 9))
images = random.choices(list_images, k=nx*ny)

for num, img in enumerate(images):
    i = num % nx
    j = num // nx
    image = Img.open(img)
    image = np.array(image)
#     iar_shp = np.array(image).shape
    axes[j, i].axis('off')
    axes[j, i].set_title(str(img.parent).strip('/scans').split('/')[-1], color='red')
    axes[j, i].imshow(image / image.max())
    
plt.subplots_adjust(wspace=0.05, hspace=0.05)
plt.show()

# Overlay images with masks

In [None]:
def rle_decode(rle, height, width , fill=255):
    s = rle.split()
    start, length = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    start -= 1
    mask = np.zeros(height*width, dtype=np.uint8)
    for i, l in zip(start, length):
        mask[i:i+l] = fill
    mask = mask.reshape(width,height).T
    mask = np.ascontiguousarray(mask)
    return mask

In [None]:
X = df_train[df_train['segmentation'].notnull()]

nx, ny = 3, 4
fig, axes = plt.subplots(ny, nx, figsize=(20, 15))


for i in range(ny):

    sample = X.iloc[random.randint(0, X.shape[0])]

    height, width = 255, 255
    mask = rle_decode(sample['segmentation'], sample['height'], sample['width'], 255)
    mask = (mask / 255).astype(int).T

    if i == 0 : axes[i, 0].set_title("mask", color='red')
    axes[i, 0].set_ylabel(sample['id'])
    axes[i, 0].imshow(mask)

    image = Img.open(sample['whole_path'])
    if i == 0 : axes[i, 1].set_title("image", color='red')
    axes[i, 1].imshow(np.array(image))

    result_image = color.label2rgb(mask, np.array(image) / np.array(image).max())
    if i == 0 : axes[i, 2].set_title("mask_image overlay", color='red')
    axes[i, 2].imshow(result_image)

