In [None]:
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from glob import glob
import cv2
import os

In [None]:
# https://www.kaggle.com/mushfirat rle decode
def rle2mask(mask_rle, shape):
    shape = (shape[1], shape[0])
    s = mask_rle.split(" ")
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    ends = starts + lengths - 1
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for low, high in zip(starts-1, ends):
        img[low : high] = 1
    return img.reshape(shape).T

# https://www.kaggle.com/dschettler8845 rle decode
def rle_decode(mask_rle, shape, color=1):
    """ TBD
    
    Args:
        mask_rle (str): run-length as string formated (start length)
        shape (tuple of ints): (height,width) of array to return 
    
    Returns: 
        Mask (np.array)
            - 1 indicating mask
            - 0 indicating background

    """
    # Split the string by space, then convert it into a integer array
    s = np.array(mask_rle.split(), dtype=int)

    # Every even value is the start, every odd value is the "run" length
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths

    # The image image is actually flattened since RLE is a 1D "run"
    if len(shape)==3:
        h, w, d = shape
        img = np.zeros((h * w, d), dtype=np.float32)
    else:
        h, w = shape
        img = np.zeros((h * w,), dtype=np.float32)

    # The color here is actually just any integer you want!
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
        
    # Don't forget to change the image back to the original shape
    return img.reshape(shape)


In [None]:
# Get shapes and filepaths and other necessary stuff
def get_filepath_from_partial_identifier(_ident, file_list):
    return [x for x in file_list if _ident in x][0]

def df_preprocessing(df, globbed_file_list, is_test=False):
    """ The preprocessing steps applied to get column information """
    # 1. Get Case-ID as a column (str and int)
    df["case_id_str"] = df["id"].apply(lambda x: x.split("_", 2)[0])
    df["case_id"] = df["id"].apply(lambda x: int(x.split("_", 2)[0].replace("case", "")))

    # 2. Get Day as a column
    df["day_num_str"] = df["id"].apply(lambda x: x.split("_", 2)[1])
    df["day_num"] = df["id"].apply(lambda x: int(x.split("_", 2)[1].replace("day", "")))

    # 3. Get Slice Identifier as a column
    df["slice_id"] = df["id"].apply(lambda x: x.split("_", 2)[2])

    # 4. Get full file paths for the representative scans
    df["_partial_ident"] = (globbed_file_list[0].rsplit("/", 4)[0]+"/"+ # /kaggle/input/uw-madison-gi-tract-image-segmentation/train/
                           df["case_id_str"]+"/"+ # .../case###/
                           df["case_id_str"]+"_"+df["day_num_str"]+ # .../case###_day##/
                           "/scans/"+df["slice_id"]) # .../slice_#### 
    _tmp_merge_df = pd.DataFrame({"_partial_ident":[x.rsplit("_",4)[0] for x in globbed_file_list], "f_path":globbed_file_list})
    df = df.merge(_tmp_merge_df, on="_partial_ident").drop(columns=["_partial_ident"])

    # 5. Get slice dimensions from filepath (int in pixels)
    df["slice_h"] = df["f_path"].apply(lambda x: int(x[:-4].rsplit("_",4)[1]))
    df["slice_w"] = df["f_path"].apply(lambda x: int(x[:-4].rsplit("_",4)[2]))

    # 6. Pixel spacing from filepath (float in mm)
    df["px_spacing_h"] = df["f_path"].apply(lambda x: float(x[:-4].rsplit("_",4)[3]))
    df["px_spacing_w"] = df["f_path"].apply(lambda x: float(x[:-4].rsplit("_",4)[4]))

    if not is_test:
        # 7. Merge 3 Rows Into A Single Row (As This/Segmentation-RLE Is The Only Unique Information Across Those Rows)
        l_bowel_df = df[df["class"]=="large_bowel"][["id", "segmentation"]].rename(columns={"segmentation":"lb_seg_rle"})
        s_bowel_df = df[df["class"]=="small_bowel"][["id", "segmentation"]].rename(columns={"segmentation":"sb_seg_rle"})
        stomach_df = df[df["class"]=="stomach"][["id", "segmentation"]].rename(columns={"segmentation":"st_seg_rle"})
        df = df.merge(l_bowel_df, on="id", how="left")
        df = df.merge(s_bowel_df, on="id", how="left")
        df = df.merge(stomach_df, on="id", how="left")
        df = df.drop_duplicates(subset=["id",]).reset_index(drop=True)
        df["lb_seg_flag"] = df["lb_seg_rle"].apply(lambda x: not pd.isna(x))
        df["sb_seg_flag"] = df["sb_seg_rle"].apply(lambda x: not pd.isna(x))
        df["st_seg_flag"] = df["st_seg_rle"].apply(lambda x: not pd.isna(x))
        df["n_segs"] = df["lb_seg_flag"].astype(int)+df["sb_seg_flag"].astype(int)+df["st_seg_flag"].astype(int)

    # 8. Reorder columns to the a new ordering (drops class and segmentation as no longer necessary)
    new_col_order = ["id", "f_path", "n_segs",
                     "lb_seg_rle", "lb_seg_flag",
                     "sb_seg_rle", "sb_seg_flag", 
                     "st_seg_rle", "st_seg_flag",
                     "slice_h", "slice_w", "px_spacing_h", 
                     "px_spacing_w", "case_id_str", "case_id", 
                     "day_num_str", "day_num", "slice_id",]
    if is_test: new_col_order.insert(1, "class")
    new_col_order = [_c for _c in new_col_order if _c in df.columns]
    df = df[new_col_order]
    
    return df

all_train_images = glob("../input/uw-madison-gi-tract-image-segmentation/train/**/*.png", recursive=True)
train_df = df_preprocessing(train_df, all_train_images)
train_df

In [None]:
CASE_AND_DAY = "case36_day0"
N_RLES = 10

demo_df = train_df[(train_df.id.str.contains(CASE_AND_DAY)) & (train_df.n_segs>0)].sample(N_RLES).reset_index(drop=True)

for _, row in demo_df.iterrows():
    lb_rle = row["lb_seg_rle"]
    sb_rle = row["sb_seg_rle"]
    st_rle = row["st_seg_rle"]
    mask_shape_1 = (row["slice_h"], row["slice_w"])
    mask_shape_2 = (row["slice_w"], row["slice_h"])
    
    if not pd.isna(lb_rle):
        lb_mask_1 = rle2mask(lb_rle, shape=mask_shape_1)
        lb_mask_2 = rle_decode(lb_rle, shape=mask_shape_2)
    else:
        lb_mask_1 = np.zeros(mask_shape, dtype=np.uint8)
        lb_mask_2 = np.zeros(mask_shape, dtype=np.uint8)
    
    if not pd.isna(sb_rle):
        sb_mask_1 = rle2mask(sb_rle, shape=mask_shape_1)
        sb_mask_2 = rle_decode(sb_rle, shape=mask_shape_2)
    else:
        sb_mask_1 = np.zeros(mask_shape, dtype=np.uint8)
        sb_mask_2 = np.zeros(mask_shape, dtype=np.uint8)
        
    if not pd.isna(st_rle):
        st_mask_1 = rle2mask(st_rle, shape=mask_shape_1)
        st_mask_2 = rle_decode(st_rle, shape=mask_shape_2)
    else:
        st_mask_1 = np.zeros(mask_shape, dtype=np.uint8)
        st_mask_2 = np.zeros(mask_shape, dtype=np.uint8)
    
    plt.figure(figsize=(20,20))
    
    plt.subplot(2,3,1)
    plt.imshow(lb_mask_1)
    
    plt.subplot(2,3,2)
    plt.imshow(sb_mask_1)
    
    plt.subplot(2,3,3)
    plt.imshow(st_mask_1)
    
    plt.subplot(2,3,4)
    plt.imshow(lb_mask_2)
    
    plt.subplot(2,3,5)
    plt.imshow(sb_mask_2)
    
    plt.subplot(2,3,6)
    plt.imshow(st_mask_2)
    
    plt.tight_layout()
    plt.show()