In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
'''for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))'''

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -q timm
!pip install -q tqdm
!pip install -q pytorch-lightning

In [None]:
import timm
from tqdm.notebook import tqdm
import pytorch_lightning as pl

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import random_split

import torchvision
from torchvision.utils import make_grid
from torchvision import transforms

import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# Sklearn
from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold

from PIL import Image

In [None]:
BASE_PATH  = '../input/uw-madison-gi-tract-image-segmentation'
df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
df

In [None]:
class CFG:
    seed          = 101
    debug         = False # set debug=False for Full Training
    exp_name      = 'Baselinev2'
    comment       = 'unet-efficientnet_b1-224x224-aug2-split2'
    model_name    = 'Unet'
    backbone      = 'efficientnet-b1'
    train_bs      = 128
    valid_bs      = train_bs*2
    img_size      = [224, 224]
    epochs        = 15
    lr            = 2e-3
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/train_bs*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_accumulate  = max(1, 32//train_bs)
    n_fold        = 5
    num_classes   = 3
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
from collections import OrderedDict

ids = OrderedDict()
ids["case"] = []
ids["day"] = []
ids["slice"] = []


for index, row in df.iterrows():
    splitted_id_row = row["id"].split("_")
    case = int(splitted_id_row[0].strip("case"))
    day  = int(splitted_id_row[1].strip("day"))
    patch_slice = int(splitted_id_row[3])
    ids["case"].append(case)
    ids["day"].append(day)
    ids["slice"].append(patch_slice)
    
df["case"] = ids["case"]
df["day"] = ids["day"]
df["slice"] = ids["slice"]

df["height"]= ""
df["width"] = ""
df["file_path"] = ""


df['segmentation'] = df.segmentation.fillna('')
df['rle_len'] = df.segmentation.map(len) # length of each rle mask
df['empty'] = (df.rle_len==0) # empty masks

df

In [None]:
df['empty'].value_counts().plot.bar()

In [None]:
train_img = OrderedDict()
train_img["case"] = []
train_img["day"] = []
train_img["slice"] = []
train_img["slice"] = []
train_img["height"] = []
train_img["width"] = []
train_img["file_path"] = []

for dirname, _, filenames in os.walk('../input/uw-madison-gi-tract-image-segmentation'):
    for filename in filenames:
        if "train" in dirname and "png" in filename:
            test = os.path.join(dirname, filename)
            splitted_path = test.split("/")
            case_day = splitted_path[-3]
            case = int(case_day.split("_")[0].strip("case"))
            day  = int(case_day.split("_")[1].strip("day"))
            
            train_img["file_path"].append(os.path.join(dirname, filename))
            
            filename = splitted_path[-1].strip(".png").split('_')
            patch_slice = int(filename[1])
            height = int(filename[2])
            width  = int(filename[3])
            
            train_img["case"].append(case)
            train_img["day"].append(day)
            train_img["slice"].append(patch_slice)
            train_img["height"].append(height)
            train_img["width"].append(width)

train_img_df = pd.DataFrame(train_img)
train_img_df

In [None]:
for index, row in tqdm(df.iterrows()):
    case = row["case"]
    day = row["day"]
    patch_slice = row["slice"]
    res = train_img_df.query('case==@case and day ==@day and slice==@patch_slice')
    df.at[index,'height']    = res.iloc[0]["height"]
    df.at[index,'width']     = res.iloc[0]["width"]
    df.at[index,'file_path'] = res.iloc[0]["file_path"]

In [None]:
df

# Utility
## Mask

In [None]:
def id2mask(id_):
    idf = df[df['id']==id_]
    wh = idf[['height','width']].iloc[0]
    shape = (wh.height, wh.width, 3)
    mask = np.zeros(shape, dtype=np.uint8)
    for i, class_ in enumerate(['large_bowel', 'small_bowel', 'stomach']):
        cdf = idf[idf['class']==class_]
        rle = cdf.segmentation.squeeze()
        if len(cdf) and not pd.isna(rle):
            mask[..., i] = rle_decode(rle, shape[:2])
    return mask

def rgb2gray(mask):
    pad_mask = np.pad(mask, pad_width=[(0,0),(0,0),(1,0)])
    gray_mask = pad_mask.argmax(-1)
    return gray_mask

def gray2rgb(mask):
    rgb_mask = tf.keras.utils.to_categorical(mask, num_classes=4)
    return rgb_mask[..., 1:].astype(mask.dtype)

In [None]:
# ref: https://www.kaggle.com/code/awsaf49/uwmgi-unet-train-pytorch#%F0%9F%93%92-Notebooks

def load_img(path):
    im = Image.open(path)
    im = np.array(im).astype(np.float32)
    return im

def load_msk(path):
    msk = np.load(path)
    msk = msk.astype('float32')
    msk*=255.0
    return msk
    

def show_img(img, mask=None):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    plt.imshow(img)#, cmap='bone')
    
    if mask is not None:
        plt.imshow(mask*255, alpha=0.5)
        handles = [Rectangle((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), (0.0,0.667,0.0), (0.0,0.0,0.667)]]
        labels = ["Large Bowel", "Small Bowel", "Stomach"]
        plt.legend(handles,labels)
    plt.axis('off')

## RLE

In [None]:
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction


# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
df

# Create Mask Dataset

In [None]:
for index, row in tqdm(df.iterrows()):
    idx = row["id"]
    file_path = row["file_path"]
    dest_path = "./train_masks/"
    os.makedirs(os.path.join(dest_path,"/".join(file_path.split("/")[5:-1])), exist_ok = True)
    
    mask_name = os.path.basename(file_path).replace(".png",".mask.npy")
    dest_mask_path = os.path.join(dest_path,"/".join(file_path.split("/")[5:-1]))
    
    mask_path = os.path.join(dest_mask_path,mask_name)
    
    if not os.path.exists(mask_path):
        mask_data = id2mask(idx)
        np.save(mask_path,mask_data)
        
    df.at[index,'mask_path'] = mask_path

In [None]:
test = df.query("empty == False")
file_path,mask_path =  test.iloc[10]["file_path"], test.iloc[10]["mask_path"]


im = Image.open(file_path)
im = np.array(im).astype(np.float32)
#im = load_img(file_path)
#mask = load_msk(mask_path)
mask = np.load(mask_path)
show_img(im,mask)

In [None]:
df.to_csv('./train_processed.csv',index=False,encoding='utf-8')

In [None]:
test = df.query("empty == False")
file_path,mask_path =  test.iloc[10]["file_path"], test.iloc[10]["mask_path"]


im = Image.open(file_path)
im = np.array(im).astype(np.float32)
mask = np.load(mask_path)
show_img(im,mask)