# UW-Madison GI Tract Image Segmentation

In [None]:
# libraries
import os
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

import cv2
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
from skimage import color

In [None]:
class CFG:
    train_dir = "/kaggle/input/uw-madison-gi-tract-image-segmentation/train"
    train_csv_path = "/kaggle/input/uw-madison-gi-tract-image-segmentation/train.csv"

In [None]:
train = pd.read_csv(CFG.train_csv_path).dropna()
train.head()

In [None]:
train["class"].value_counts()

In [None]:
# case number and day number
train["case"] = train.id.apply(lambda x: str(x).strip().split("_")[0])
train["day"] = train.id.apply(lambda x: str(x).strip().split("_")[1])

# path to scans dir
def return_scans_dir(row):
    case = row["case"]
    day = row["day"]
    path = os.path.join(CFG.train_dir, case)
    path = os.path.join(path, f"{case}_{day}")
    return os.path.join(path, "scans")

train["scans_dir"] = train.apply(lambda row: return_scans_dir(row), axis=1)

def return_slice_path(row):
    scans_dir = row["scans_dir"]
    slide_id = str(row["id"]).strip().split("_")[-1]
    files = os.listdir(scans_dir)
    for file in files:
        if slide_id in file:
            return os.path.join(scans_dir, file)
    return np.nan

train["slice_path"] = train.progress_apply(lambda row: return_slice_path(row), axis=1)
train["slice_name"] = train.slice_path.apply(lambda x: str(x).strip().split("/")[-1])

In [None]:
def return_slice_shape(name):
    name = name.replace(".png", "").strip().split("_")
    return (int(name[2]), int(name[3]))

def return_slice_spacing(name):
    name = name.replace(".png", "").strip().split("_")
    return (float(name[4]), float(name[5]))
    
train["size"] = train.slice_name.apply(lambda x: return_slice_shape(x))
train["spacing"] = train.slice_name.apply(lambda x: return_slice_spacing(x))

In [None]:
train.head()

In [None]:
def rle2mask(mask_rle, shape):
    s = mask_rle.split(" ")
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    ends = starts + lengths - 1
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for low, high in zip(starts-1, ends):
        img[low : high] = 1
    return img.reshape(shape).T

In [None]:
def plot_slices(num=2):
    fig, axs = plt.subplots(num, 3, figsize=(12, 4*num+2))
    for i, SLICE_NUM in enumerate(np.random.choice(np.arange(train.shape[0]-2), size=num, replace=False)):  
        
        img = cv2.imread(train.slice_path.values[SLICE_NUM], cv2.IMREAD_GRAYSCALE)
        mask = rle2mask(train.segmentation.values[SLICE_NUM], shape=train["size"].values[SLICE_NUM])
        class_name = train['class'].values[SLICE_NUM]
        
        axs[i,0].imshow(img)
        axs[i, 1].imshow(mask);
        mask_overlay = img + 1.5*(max(img.ravel()))*mask
        axs[i, 2].imshow(mask_overlay)
        
        axs[i, 0].set_title(f"slice: {class_name}");
        axs[i, 1].set_title(f"mask: {class_name}");
        axs[i, 2].set_title("slice+mask");
    plt.show()
    
plot_slices(num=5)