In [None]:
#!pip install segmentation_models_pytorch

In [None]:
import numpy as np
import pandas as pd
from glob import glob
import os, shutil # shutil not used
from tqdm import tqdm
#tqdm.pandas()
import time
import copy
import joblib
#from collections import defaultdict
#import gc
from pathlib import Path

import cv2

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold # only StratifiedGroupKFold used

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader, random_split
from torch.cuda import amp

#import timm # not used

import albumentations as A

#import rasterio
from joblib import Parallel, delayed

#from colorama import Fore, Back, Style
#c_  = Fore.GREEN
#sr_ = Style.RESET_ALL

#import warnings
#warnings.filterwarnings("ignore")

#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

#import segmentation_models_pytorch as smp

In [None]:
class CFG:
    seed = 101
    debug = False 
    exp_name = 'Baseline'
    comment = 'unet-timm-mobilenetv3_large_100-224x224'
    model_name = 'Unet'
    backbone = 'timm-mobilenetv3_large_100'
    train_bs = 32
    valid_bs = train_bs
    img_size = [224, 224]
    epochs = 15
    lr = 2e-3
    scheduler = 'CosineAnnealingLR'
    min_lr = 1e-6
    T_max = int(30000/train_bs*epochs)+50
    T_0 = 25
    warmup_epochs = 0
    wd = 1e-6
    n_accumulate = max(1, 32//train_bs)
    n_fold = 5
    num_classes = 3
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    thr = 0.45

In [None]:
def id2mask(id_, df=None):
    idf = df[df['id']==id_]
    wh = idf[['height','width']].iloc[0]
    shape = (wh.height, wh.width, 3)
    mask = np.zeros(shape, dtype=np.uint8)
    for i, class_ in enumerate(['large_bowel', 'small_bowel', 'stomach']):
        cdf = idf[idf['class']==class_]
        rle = cdf.segmentation.squeeze()
        if len(cdf) and not pd.isna(rle):
            mask[..., i] = rle_decode(rle, shape[:2])
    return mask

In [None]:
def get_metadata(row):
    data = row['id'].split('_')
    case = int(data[0].replace('case',''))
    day = int(data[1].replace('day',''))
    slice_ = int(data[-1])
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def path2info(row):
    path = row['image_path']
    data = path.split('/')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case',''))
    day = int(data[-3].split('_')[1].replace('day',''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

In [None]:
def load_img(path):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = np.tile(img[...,None], [1, 1, 3])
    img = img.astype('float32') 
    mx = np.max(img)
    if mx:
        img/=mx 
    return img

def load_msk(path):
    msk = np.load(path)
    msk = msk.astype('float32')
    msk/=255.0
    return msk

def show_img(img, mask=None):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    img = clahe.apply(img)
    plt.imshow(img, cmap='bone')
    
    if mask is not None:
        plt.imshow(mask, alpha=0.5)
        handles = [Rectangle((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), (0.0,0.667,0.0), (0.0,0.0,0.667)]]
        labels = ["Large Bowel", "Small Bowel", "Stomach"]
        plt.legend(handles,labels)
    plt.axis('off')

In [None]:
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


def rle_encode(img):
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
BASE_PATH = '/kaggle/input/uw-madison-gi-tract-image-segmentation'

In [None]:
df = pd.read_csv('/kaggle/input/uw-madison-gi-tract-image-segmentation/train.csv')[:10000]
df = df.apply(get_metadata, axis=1) # progress_apply
df.head()

In [None]:
paths = glob('/kaggle/input/uw-madison-gi-tract-image-segmentation/train/*/*/*/*')
path_df = pd.DataFrame(paths, columns=['image_path'])
path_df = path_df.apply(path2info, axis=1) # progress_apply
df = df.merge(path_df, on=['case','day','slice'])
df.head()

In [None]:
#sub = df[['id','class', 'segmentation']]
#sub.columns = ['id','class','predicted']
#sub.to_csv('submission.csv',index=False)

In [None]:
TEST_DIR = '/kaggle/input/uw-madison-gi-tract-image-segmentation/test/'
SS_CSV   = "/kaggle/input/uw-madison-gi-tract-image-segmentation/sample_submission.csv"
ss_df = pd.read_csv(SS_CSV)

if len(ss_df) > 0 :
    sub = ss_df[['id','class']]
    #sub.columns = ['id','class','predicted']
    sub['predicted'] = ""
    sub.to_csv('submission.csv',index=False)
else:
    sub = df[['id','class', 'segmentation']]
    sub.columns = ['id','class','predicted']
    sub.to_csv('submission.csv',index=False)
# Get all testing images if there are any
#all_test_images = glob(os.path.join(TEST_DIR, "**", "*.png"), recursive=True)