In [None]:
! ls ../input/segmentation-pytorchmodel/segmentation_models_pytorch

In [None]:
!cp -r ../input/segmentation-pytorchmodel/segmentation_models_pytorch/efficientnet_pytorch-0.6.3/ /tmp/st
!cp -r ../input/segmentation-pytorchmodel/segmentation_models_pytorch/pretrainedmodels-0.7.4/ /tmp/st

In [None]:
!pip install /tmp/st/efficientnet_pytorch-0.6.3/

In [None]:
!pip install /tmp/st/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4

In [None]:
!pip install segmentation_models_pytorch --no-index --find-links=../input/segmentation-pytorchmodel/segmentation_models_pytorch


In [None]:
! pip install monai --no-index --find-links ../input/monai-uwmadison/monai monai

In [None]:
# Import libraries
# Operating system libraries
from glob import glob
import os
import time
import copy
import monai
import gc
import cupy as cp

# linear algebra and data processing
import numpy as np
import pandas as pd
from collections import defaultdict

# visualization
import cv2
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib import animation, rc
rc('animation', html='jshtml')
import seaborn as sns

# Progress bars to know cell progress in pandas apply
from tqdm import tqdm
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()

# PyTorch deep learning semantic segmentation
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Albumentations for image augmentations
import albumentations as A

In [None]:
if not os.path.isdir('test_updated'):
    os.makedirs('test_updated')
case_day_all = glob('../input/uw-madison-gi-tract-image-segmentation/test/*/*')
clahe_transform_6 = A.Compose([A.CLAHE(clip_limit=6,p=1.0)])

for case_day in case_day_all:
    print(case_day)
    case, day = (case_day.split('/')[-1]).split('_')
    if not os.path.isdir('./test_updated/'+case):
        os.makedirs('./test_updated/'+case)
    if not os.path.isdir('./test_updated/'+case+'/'+case+'_'+day):
        os.makedirs('./test_updated/'+case+'/'+case+'_'+day)
    if not os.path.isdir('./test_updated/'+case+'/'+case+'_'+day+'/scans'):
        os.makedirs('./test_updated/'+case+'/'+case+'_'+day+'/scans')

    images = glob('../input/uw-madison-gi-tract-image-segmentation/test/'+case+'/'+case+'_'+day+'/scans/*')
    max_pixel = 0
    for image in images:
        image = cv2.imread(image,cv2.IMREAD_UNCHANGED)
        if image.max() > max_pixel:
            max_pixel = image.max()
    #print("Max Pixel Value:",max_pixel)
    NormalizeIntensity = monai.transforms.NormalizeIntensity(subtrahend=0, divisor=max_pixel/255)
    #HistogramNormalize = monai.transforms.HistogramNormalize(num_bins=64)
    for image in images:
        image_path = image.split('/')[-1]
        image = cv2.imread(image,cv2.IMREAD_UNCHANGED)
        image[image>12000] = 12000
        image_normalized = NormalizeIntensity.__call__(image)
        data = clahe_transform_6(image=image_normalized)
        image_clahe = data['image']
        #image_histogram_normalized = HistogramNormalize.__call__(image_normalized)
        cv2.imwrite('./test_updated/'+case+'/'+case+'_'+day+'/scans'+'/'+image_path, image_clahe)

In [None]:
DATASET_FOLDER = "/kaggle/input/uw-madison-gi-tract-image-segmentation"
# df_train = pd.read_csv(os.path.join(DATASET_FOLDER, "train.csv"))
# display(df_train.head())
TRAIN_DIR = "../input/normalized-dataset/Final Normalized Data"
TRAIN_CSV = os.path.join(DATASET_FOLDER, "train.csv")
train_df = pd.read_csv(TRAIN_CSV)

df_pred = pd.read_csv(os.path.join(DATASET_FOLDER, "sample_submission.csv"))
WITH_SUBMISSION = not df_pred.empty

In [None]:
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
# modified from: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    """ TBD

    Args:
        mask_rle (str): run-length as string formated (start length)
        shape (tuple of ints): (height,width) of array to return 

    Returns: 
        Mask (np.array)
            - 1 indicating mask
            - 0 indicating background

    """
    # Split the string by space, then convert it into a integer array
    s = np.array(mask_rle.split(), dtype=int)

    # Every even value is the start, every odd value is the "run" length
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths

    # The image is actually flattened since RLE is a 1D "run"
    if len(shape) == 3:
        h, w, d = shape
        img = np.zeros((h * w, d), dtype=np.float32)
    else:
        h, w = shape
        img = np.zeros((h * w,), dtype=np.float32)

    # The color here is actually just any integer you want!
    for lo, hi in zip(starts, ends):
        img[lo: hi] = color

    # Don't forget to change the image back to the original shape
    return img.reshape(shape)


# https://www.kaggle.com/namgalielei/which-reshape-is-used-in-rle
def rle_decode_top_to_bot_first(mask_rle, shape):
    """ TBD

    Args:
        mask_rle (str): run-length as string formated (start length)
        shape (tuple of ints): (height,width) of array to return 

    Returns:
        Mask (np.array)
            - 1 indicating mask
            - 0 indicating background

    """
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape((shape[1], shape[0]), order='F').T  # Reshape from top -> bottom first


# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    """ TBD

    Args:
        img (np.array): 
            - 1 indicating mask
            - 0 indicating background

    Returns: 
        run length as string formated
    """

    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def flatten_l_o_l(nested_list):
    """ Flatten a list of lists """
    return [item for sublist in nested_list for item in sublist]


def load_json_to_dict(json_path):
    """ tbd """
    with open(json_path) as json_file:
        data = json.load(json_file)
    return data


def tf_load_png(img_path):
    return tf.image.decode_png(tf.io.read_file(img_path), channels=3)


def open_gray16(_path, normalize=True, to_rgb=False):
    """ Helper to open files """
    if normalize:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH) / 65535., axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH) / 65535.
    else:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH), axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH)


def load_img(path):

    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = np.tile(img[..., None], [1, 1, 3])  # gray to rgb
    img = img.astype('float32')  # original is uint16
    mx = np.max(img)
    if mx:
        img /= mx  # scale image to [0, 1]
    return img


def load_msk(path):
    msk = np.load(path)
    msk = msk.astype('float32')
    return msk

In [None]:
class constants:
    num_classes   = 3
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def get_metadata(row):
    data = row['id'].split('_')
    case = int(data[0].replace('case',''))
    day = int(data[1].replace('day',''))
    slice_ = int(data[-1])
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def path2info(row):
    path = row['image_path']
    data = path.split('/')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case',''))
    day = int(data[-3].split('_')[1].replace('day',''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
#     row['id'] = f'case{case}_day{day}_slice_{slice_}'
    return row

In [None]:
sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
if not len(sub_df):
    debug = True
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')[:1000*3]
    sub_df = sub_df.drop(columns=['class','segmentation']).drop_duplicates()
else:
    debug = False
    sub_df = sub_df.drop(columns=['class','predicted']).drop_duplicates()
sub_df = sub_df.progress_apply(get_metadata,axis=1)

In [None]:
if debug:
    paths = glob(f'/kaggle/input/uw-madison-gi-tract-image-segmentation/train/**/*png',recursive=True)
#     paths = sorted(paths)
else:
    paths = glob(f'./test_updated/**/*png',recursive=True)
#     paths = sorted(paths)
path_df = pd.DataFrame(paths, columns=['image_path'])
path_df = path_df.progress_apply(path2info, axis=1)
path_df.head()

In [None]:
df_pred = sub_df.merge(path_df, on=['case','day','slice'], how='left')
df_pred.head()

In [None]:
# sfolder = "test" if WITH_SUBMISSION else "train"
# ls_images = glob(os.path.join(DATASET_FOLDER, sfolder, "**", "*.png"), recursive=True)
# ls_images = [p.replace(DATASET_FOLDER + os.path.sep, "") for p in ls_images]
# case_day = [os.path.dirname(p).split(os.path.sep)[-2] for p in ls_images]
# df_pred = pd.DataFrame({'Case_Day': case_day, 'image_path': ls_images})

# if not WITH_SUBMISSION:
#     df_pred = df_pred[df_pred["Case_Day"].str.startswith("case123_day")]
# display(df_pred.head())

In [None]:
# df_pred['image_path']=DATASET_FOLDER+"/"+df_pred['image_path']
# df_pred["slice_h"] = df_pred["image_path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[1]))
# df_pred["slice_w"] = df_pred["image_path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[2]))
# df_pred["slice_id"] = df_pred["image_path"].apply(lambda x: x.split("_", 3)[2])
# df_pred['id']=df_pred["Case_Day"]+"_slice_"+df_pred["slice_id"]


In [None]:
# df_pred['case'] = df_pred['Case_Day'].apply(lambda x:x.split('_')[0])
# df_pred['day'] = df_pred['Case_Day'].apply(lambda x:x.split('_')[1])
# df_pred

In [None]:
class BuildDataset(torch.utils.data.Dataset):
    def __init__(self, df, label=False, transforms=None):
        self.df         = df
        self.label      = label
        self.img_paths  = df['image_path'].tolist()
        self.ids        = df['id'].tolist()
        if 'msk_path' in df.columns:
            self.msk_paths  = df['mask_path'].tolist()
        else:
            self.msk_paths = None
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        id_       = self.ids[index]
        img = []
        img = load_img(img_path)
        h, w = img.shape[:2]
        if self.label:
            msk_path = self.msk_paths[index]
            msk = load_msk(msk_path)
            if self.transforms:
                data = self.transforms(image=img, mask=msk)
                img  = data['image']
                msk  = data['mask']
            img = np.transpose(img, (2, 0, 1))
            msk = np.transpose(msk, (2, 0, 1))
            return torch.tensor(img), torch.tensor(msk)
        else:
            if self.transforms:
                data = self.transforms(image=img)
                img  = data['image']
            img = np.transpose(img, (2, 0, 1))
            return torch.tensor(img), id_, h, w

In [None]:
# class BuildDataset(torch.utils.data.Dataset):
#     def __init__(self, df, label=True, transforms=None,_style = 'multilabel'):
#         self.df         = df
#         self.label      = label
#         self.transforms = transforms
        
#     def __len__(self):
#         return len(self.df)
    
#     def __getitem__(self, index):
#         img_path=self.df['image_path'].iloc[index]
#         img = []
#         img = load_img(img_path)
#         data = self.transforms(image=img)
#         img  = data['image']
#         img = np.transpose(img, (2, 0, 1))
#         return img_path,torch.tensor(img)

In [None]:
if debug:

    IMAGE_SHAPE = SEG_SHAPE = (256,256)
    submission_transforms = {
        "test": A.Compose([
            A.Resize(*IMAGE_SHAPE, interpolation=cv2.INTER_NEAREST)
            ], p=1.0)
    }
    
else:
    
    IMAGE_SHAPE = SEG_SHAPE = (256,256)
    submission_transforms = {
        "test": A.Compose([
            A.Resize(*IMAGE_SHAPE, interpolation=cv2.INTER_NEAREST),
            
         ], p=1.0)
    }

In [None]:
test_dataset = BuildDataset(df_pred, transforms=submission_transforms['test'])
test_loader  = DataLoader(test_dataset, batch_size=32,num_workers=2, shuffle=False, pin_memory=False)

In [None]:
model = torch.load("../input/unet-plusplus-normmodel/unetPlusPlusNorm_v2")

In [None]:
def path_to_shape(path):
    #return 300,300
    return int(path[:-4].rsplit("_", 4)[1]),int(path[:-4].rsplit("_", 4)[2])

In [None]:
def reshape_to_original(paths,masks):
    masks=masks.numpy()
    df=pd.dataframe()
    for i in range(0,masks.shape[0]):
        mask=masks[i]
        
        for j in range(0,mask.shape[0]):
            channel=mask[j]
            re_size_mask=A.Resize(*path_to_shape(paths[i]), interpolation=cv2.INTER_NEAREST,always_apply=True)
            channel=re_size_mask(image=channel)
            channel=channel['image']
            if(j==0):
                resized_mask=channel
            else:
                resized_mask=cv2.merge((resized_mask, channel))
        print(resized_mask.shape)
        
        out_df=image_to_submission(path[i],resized_mask)
        df.append(out_df,inplace=True)
        
    return 0


In [None]:
@torch.no_grad()
def infer(model_paths, _d_loader, num_log=1, thr=0.5):
    msks = []; imgs = [];
    pred_strings = []; pred_ids = []; pred_classes = [];
    for idx, (img, ids, heights, widths) in enumerate(tqdm(test_loader, total=len(test_loader), desc='Infer ')):
        img = img.to(constants.device, dtype=torch.float) # .squeeze(0)
        size = img.size()
        msk = []
        msk = torch.zeros((size[0], 3, size[2], size[3]), device=constants.device, dtype=torch.float32)
        for path in model_paths:
            model = torch.load(path)
            out   = model(img) # .squeeze(0) # removing batch axis
            out   = nn.Sigmoid()(out) # removing channel axis
            msk+=out/len(model_paths)
        msk = (msk.permute((0,2,3,1))>thr).to(torch.uint8).cpu().detach().numpy() # shape: (n, h, w, c)
        result = masks2rles(msk, ids, heights, widths)
        pred_strings.extend(result[0])
        pred_ids.extend(result[1])
        pred_classes.extend(result[2])
        if idx<num_log:
            img = img.permute((0,2,3,1)).cpu().detach().numpy()
            imgs.append(img[:10])
            msks.append(msk[:10])
        #del img, msk, out, model, result
        gc.collect()
        torch.cuda.empty_cache()
    return pred_strings, pred_ids, pred_classes, imgs, msks

In [None]:
# @torch.no_grad()
# def infer(df_pred,model_path="../input/effnet-v7/Effnet_b7v_07", device="cuda", thr=0.5):
#     test_dataset = BuildDataset(df_pred, transforms=submission_transforms['test'])
#     test_loader  = DataLoader(test_dataset, batch_size=32,num_workers=2, shuffle=False, pin_memory=False)
    
#     pred_strings = []
#     pred_ids = []
#     pred_classes = []

#     for idx, (paths, imgs) in enumerate(tqdm(test_loader, total=len(test_loader), desc='Infer ')):
#         size_3d = imgs.size()
#         imgs = imgs.to(constants.device, dtype=torch.float)
#         masks_3d = torch.zeros((size_3d[0],size_3d[1], size_3d[2], size_3d[3]), device=device, dtype=torch.float32)
#         model = torch.load(model_path)
#         out_3d = model(imgs)
#         out_3d = torch.nn.Sigmoid()(out_3d)
#         masks_3d += out_3d / 1

#         # Remove batch dim
#         masks_3d = torch.squeeze(masks_3d) 
        
#         ######
# #         1. Mask to original shape -> to result(mask RLE ) -> return DF same as submission
# #     2. append DF to main df ( merge dfs of different batches into one)
    
        
        
        
        
#         #######
#         masks = (masks_3d.permute((0, 2, 3, 1)) > thr).to(torch.uint8).cpu().detach().numpy()
        
#         # shape: (n, h, w, c)
#         result = masks2rles(masks, paths, height=256, width=256)
#         pred_strings.extend(result[0])
#         pred_ids.extend(result[1])
#         pred_classes.extend(result[2])
        
#     pred_df = pd.DataFrame({"id": pred_ids, "class": pred_classes, "predicted": pred_strings})

#     return pred_df

In [None]:
def masks2rles(msks, ids, heights, widths):
    pred_strings = []; pred_ids = []; pred_classes = [];
    for idx in range(msks.shape[0]):
        height = heights[idx].item()
        width = widths[idx].item()
        left = (width - msks[idx].shape[0])//2
        right = left
        top = (height - msks[idx].shape[1])//2
        bottom = top
        msk = cv2.copyMakeBorder(msks[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, 0)
        rle = [None]*3
        for midx in [0, 1, 2]:
            rle[midx] = mask2rle(msk[...,midx])
        pred_strings.extend(rle)
        pred_ids.extend([ids[idx]]*len(rle))
        pred_classes.extend(['large_bowel', 'small_bowel', 'stomach'])
    return pred_strings, pred_ids, pred_classes

In [None]:
def mask2rle(msk, thr=0.5):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    msk    = cp.array(msk)
    pixels = msk.flatten()
    pad    = cp.array([0])
    pixels = cp.concatenate([pad, pixels, pad])
    runs   = cp.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
pred_strings, pred_ids, pred_classes, imgs, msks =infer(['../input/unet-plusplus-normmodel/unetPlusPlusNorm_v2'], test_loader)

In [None]:
pred_df = pd.DataFrame({
    "id":pred_ids,
    "class":pred_classes,
    "predicted":pred_strings
})

In [None]:
if not debug:
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
    del sub_df['predicted']
else:
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')[:1000*3]
    del sub_df['segmentation']
    
sub_df = sub_df.merge(pred_df, on=['id','class'])
sub_df.to_csv('submission.csv',index=False)
display(sub_df.head(5))

In [None]:
# sub_idsList = []
# for i in range(len(pred_df)):
#     sub_ids = ("_").join(pred_df['id'][i].split(os.sep)[6:7] + pred_df['id'][i].split(os.sep)[6:10][-1].split("_")[:2])
#     sub_idsList.append(sub_ids)

In [None]:
# pred_df["Testid"] = sub_idsList
# first_column = pred_df.pop('Testid')
# pred_df.insert(0, 'Testid', first_column)

In [None]:
# pred_df.drop('id', axis=1, inplace=True)
# pred_df.rename(columns = {'Testid':'id'}, inplace = True)
# pred_df.head(2)

In [None]:
# pred_df.head(5)

In [None]:
# sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
# del sub_df['predicted']

# sub_df = sub_df.merge(pred_df, on=['id', 'class'])
# sub_df = sub_df.fillna('')


In [None]:
# sub_df.to_csv('submission.csv',index=False)

In [None]:
# sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')

# empty_df = pd.DataFrame(columns=['id','class','predicted'])
# empty_df.to_csv('submission.csv',index=False)

# if len(pred_df) != len(sub_df):
#     print(wrong)

# sample_sub_length = len(sub_df)
# actual_length = len(pred_df)

# del sub_df['predicted']
# sub_df = sub_df.merge(pred_df, on=['id','class'],how='left')
# sub_df = sub_df.fillna('')
# sub_df.to_csv('submission.csv',index=False)