In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from PIL import Image
tqdm.pandas()
from IPython.display import FileLink
from IPython import display

# Prepare Dataframe

In [None]:
class CFG:
    train_dir = "/kaggle/input/uw-madison-gi-tract-image-segmentation/train"
    train_df_csv_path = "/kaggle/input/uw-madison-gi-tract-image-segmentation/train.csv"
train_df = pd.read_csv(CFG.train_df_csv_path)
# case number and day number
train_df["case"] = train_df.id.apply(lambda x: str(x).strip().split("_")[0])
train_df["day"] = train_df.id.apply(lambda x: str(x).strip().split("_")[1])
# path to scans dir
def return_scans_dir(row):
    case = row["case"]
    day = row["day"]
    path = os.path.join(CFG.train_dir, case)
    path = os.path.join(path, f"{case}_{day}")
    return os.path.join(path, "scans")

train_df["scans_dir"] = train_df.apply(lambda row: return_scans_dir(row), axis=1)
def return_slice_path(row):
    scans_dir = row["scans_dir"]
    slide_id = str(row["id"]).strip().split("_")[-1]
    files = os.listdir(scans_dir)
    for file in files:
        if slide_id in file:
            return os.path.join(scans_dir, file)
    return np.nan

train_df["slice_path"] = train_df.progress_apply(lambda row: return_slice_path(row), axis=1)
train_df["slice_name"] = train_df.slice_path.apply(lambda x: str(x).strip().split("/")[-1])
def return_slice_shape(name):
    name = name.replace(".png", "").strip().split("_")
    return (int(name[2]), int(name[3]))

def return_slice_spacing(name):
    name = name.replace(".png", "").strip().split("_")
    return (float(name[4]), float(name[5]))
    
train_df["size"] = train_df.slice_name.apply(lambda x: return_slice_shape(x))
train_df["spacing"] = train_df.slice_name.apply(lambda x: return_slice_spacing(x))
train_df = train_df.reset_index(drop=True)

In [None]:
train_df

# RLE Decoder

In [None]:
def rle2mask(rle, shape , fill=1):
    height, width = shape
    s = rle.split()
    start, length = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    start -= 1
    mask = np.zeros(height*width, dtype=np.uint8)
    for i, l in zip(start, length):
        mask[i:i+l] = fill
    mask = mask.reshape(width,height)
    mask = np.ascontiguousarray(mask)
    return mask

# Plot 

In [None]:
def plot_images(df):
    '''
    Plots all the image with the mask from the dataframe df provided
    '''
    
    df = df.reset_index(drop=True)
    
    size = 2.75 # size of each plot
    columns = 8
    
    rows = int(len(df)/columns)
    fig = plt.figure(figsize=(size*columns, size*rows))
    for i in range(1, columns*rows +1):
        # Load scan and plot it
        image = Image.fromarray(np.array(Image.open(df['slice_path'][i-1])).astype("uint16"))
        image = np.array(image)
        image = image/image.max()
        fig.add_subplot(rows, columns, i)
        plt.axis('off')
        plt.imshow(image, cmap='gray', interpolation='none')
        # If segmentation exists, then plot it
        if type(df['segmentation'][i-1])==str:
            mask = rle2mask(df['segmentation'][i-1], df['size'][i-1])
            plt.imshow(mask, cmap='Spectral_r', interpolation='none', alpha=0.35)
    plt.show()
    # Link to download the plot
    fig.savefig(df['id'][0]+'.png')
    display.display(FileLink(df['id'][0]+'.png'))

In [None]:
# Visualize the full scan of case41_day0 with its mask of 'stomach'
case = 'case41'
day = 'day0'
label = 'stomach'

plot_images(train_df[(train_df['case']==case) & (train_df['day']==day) & (train_df['class']==label)])