In [None]:
import sys
sys.path.append("../input/segmentation-models-pytorch/segmentation_models.pytorch-master")
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append("../input/pretrainedmodels/pretrainedmodels-0.7.4")
sys.path.append("../input/efficientnet-pytorch/EfficientNet-PyTorch-master")

In [None]:
import pandas as pd
import numpy as np
import os
from glob import glob
import gc
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable
from PIL import Image
import cv2
import albumentations as A
import time
import os
from tqdm import tqdm
import torch.nn.functional as F
import segmentation_models_pytorch as smp
from torch.autograd import Variable
import torch.nn.functional as F
import numba
import numpy as np
from math import sqrt
from scipy.spatial.distance import directed_hausdorff
from scipy.ndimage import convolve
from scipy.ndimage.morphology import distance_transform_edt as edt
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold
import math

In [None]:
def prepare_nd_df_test(df, WINDOW_SIZE=12, base_path_image="", base_path_mask=""):
    IMG, MSK, STM, SML, LRG, SH, SW, F, ID, CASE, DAY = [], [], [], [], [], [], [], [], [], [], []
    cases = df['case_id'].unique()
    for eachC in cases:
        xdf = df[df['case_id'] == eachC].reset_index(drop=True)
        days = xdf['day_num'].unique()
        for eachD in days:
            xxdf = xdf[xdf['day_num'] == eachD].reset_index(drop=True)

            for i in range(0, xxdf.shape[0]):
                check = xxdf[i:i + WINDOW_SIZE]
                if check.shape[0] != WINDOW_SIZE:
                    break
                else:
                    id = check['id'].values[2]
                    img = check['f_path'].values.tolist()
                    msk = img[2]
                    stm = [None, None, None, None, None]
                    sml = [None, None, None, None, None]
                    lrg = [None, None, None, None, None]
                    slice_h = check['slice_h'].values.tolist()
                    slice_w = check['slice_w'].values.tolist()
                    case_id = check['case_id'].values.tolist()
                    day_num = check['day_num'].values.tolist()
                    fold = -1
                    IMG.append(img)
                    MSK.append(msk)
                    STM.append(stm)
                    SML.append(sml)
                    LRG.append(lrg)
                    SH.append(slice_h)
                    SW.append(slice_w)
                    F.append(fold)
                    ID.append(id)
                    CASE.append(case_id[0])
                    DAY.append(day_num[0])

    nndf = pd.DataFrame()
    nndf["id"] = ID
    nndf["case_id"] = CASE
    nndf["day_num"] = DAY
    nndf["nd_images"] = IMG
    nndf["nd_masks"] = MSK
    nndf["stomach_rles"] = STM
    nndf["small_rles"] = SML
    nndf["large_rles"] = LRG
    nndf["slices_h"] = SH
    nndf["slices_w"] = SW
    nndf["fold"] = F
    return nndf


def get_1d_transformations(img_size):
    data_transforms = {
        "train": A.Compose([
                    A.Resize(img_size, img_size, interpolation=cv2.INTER_NEAREST),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
            A.OneOf([
                A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
                A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
            ], p=0.25),
            A.CoarseDropout(max_holes=8, max_height=img_size // 20, max_width=img_size // 20,
                            min_holes=5, fill_value=0, mask_fill_value=0, p=0.5),
        ], p=1.0),

        "valid": A.Compose([A.Resize(img_size, img_size, interpolation=cv2.INTER_NEAREST)], p=1.0)
    }
    return data_transforms


def get_prepare_df(filename, sep = '\\', TRAIN_DIR = '../train'):

    df = pd.read_csv(filename)

    df["case_id_str"] = df["id"].apply(lambda x: x.split("_", 2)[0])
    df["case_id"] = df["id"].apply(lambda x: int(x.split("_", 2)[0].replace("case", "")))

    # 2. Get Day as a column
    df["day_num_str"] = df["id"].apply(lambda x: x.split("_", 2)[1])
    df["day_num"] = df["id"].apply(lambda x: int(x.split("_", 2)[1].replace("day", "")))

    # 3. Get Slice Identifier as a column
    df["slice_id"] = df["id"].apply(lambda x: x.split("_", 2)[2])

    # Get all training images
    all_train_images = glob(os.path.join(TRAIN_DIR, "**", "*.png"), recursive=True)

    p = []
    x = all_train_images[0].rsplit(sep, 4)[0]
    for i in range(0, df.shape[0]):
        p.append(os.path.join(x, df["case_id_str"].values[i],
                              df["case_id_str"].values[i] + "_" + df["day_num_str"].values[i], "scans",
                              df["slice_id"].values[i]))
    df["_partial_ident"] = p

    p = []
    for i in range(0, len(all_train_images)):
        p.append(str(all_train_images[i].rsplit("_", 4)[0]))

    _tmp_merge_df = pd.DataFrame()
    _tmp_merge_df['_partial_ident'] = p
    _tmp_merge_df['f_path'] = all_train_images

    df = df.merge(_tmp_merge_df, on="_partial_ident").drop(columns=["_partial_ident"])

    # 5. Get slice dimensions from filepath (int in pixels)
    df["slice_h"] = df["f_path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[1]))
    df["slice_w"] = df["f_path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[2]))

    # 6. Pixel spacing from filepath (float in mm)
    df["px_spacing_h"] = df["f_path"].apply(lambda x: float(x[:-4].rsplit("_", 4)[3]))
    df["px_spacing_w"] = df["f_path"].apply(lambda x: float(x[:-4].rsplit("_", 4)[4]))

    df1 = df[df.index % 3 == 0]
    df2 = df[df.index % 3 == 1]
    df3 = df[df.index % 3 == 2]
    df = df1.copy()
    df.pop('class')
    gc.collect()

    del df1, df2, df3
    gc.collect()
    df = df.reset_index(drop=True)
    return df

# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    """ TBD

    Args:
        img (np.array):
            - 1 indicating mask
            - 0 indicating background

    Returns:
        run length as string formated
    """

    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def open_gray16(_path, normalize_f=True, normalize=False, to_rgb=False):
    """ Helper to open competition specific files from path

    Args:
        _path (str): Path to the image on the LOCAL file system
        normalize (bool, optional): Whether or not to coerce image to be between 0-1
        to_rgb (bool, optional): Whether or not to tile the grayscale image to produce a pseudo RGB image

    Returns:
        The image as a numpy array
    """
    if normalize_f:
        img = cv2.imread(_path, cv2.IMREAD_UNCHANGED)
        img = img.astype('float32')  # original is uint16
        img = (img - img.min()) / (img.max() - img.min()) * 255.0  # scale image to [0, 255]
        img = img.astype('uint8')
        if to_rgb:
            return np.tile(np.expand_dims(img, axis=-1), 3)
        else:
            return np.tile(np.expand_dims(img, axis=-1), 1)
    elif normalize:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH) / 65535., axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH) / 65535.
    else:
        if to_rgb:
            return np.tile(np.expand_dims(cv2.imread(_path, cv2.IMREAD_ANYDEPTH), axis=-1), 3)
        else:
            return cv2.imread(_path, cv2.IMREAD_ANYDEPTH)

In [None]:
def get_1d_model(model_type='unet', encoder_name='timm-efficientnet-b4', encoder_weights='noisy-student',
                 in_channels=1, classes=3, activation=None):

    if model_type == 'unet':
        model = smp.Unet(encoder_name=encoder_name, encoder_weights=encoder_weights,
                         in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'fpn':
        model = smp.FPN(encoder_name=encoder_name, encoder_weights=encoder_weights,
                        in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'unetplusplus':
        model = smp.UnetPlusPlus(encoder_name=encoder_name, encoder_weights=encoder_weights,
                                 in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'linknet':
        model = smp.Linknet(encoder_name=encoder_name, encoder_weights=encoder_weights,
                            in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'deeplabv3':
        model = smp.DeepLabV3(encoder_name=encoder_name, encoder_weights=encoder_weights,
                              in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'deeplabv3plus':
        model = smp.DeepLabV3Plus(encoder_name=encoder_name, encoder_weights=encoder_weights,
                                  in_channels=in_channels, classes=classes, activation=activation)
    elif model_type == 'pspnet':
        model = smp.PSPNet(encoder_name=encoder_name, encoder_weights=encoder_weights,
                           in_channels=in_channels, classes=classes, activation=activation)
    return model


In [None]:
df = pd.read_csv("../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv")
DEBUG = False
if df.shape[0] == 0:
    DEBUG = True

In [None]:
CFG = {
    'fold': 0,
    'bs': 1,
    'n_workers': 2,
    'init_lr': 1e-3,
    'warmup_factor': 10,
    'warmup_epochs': 2,
    'n_epochs': 50,
    'img_size': 384,
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'debug': DEBUG,
    'loss_name': 'dice',
    'model_type': 'unet',
    'encoder_name': 'timm-efficientnet-b4',
    'in_channels': 5,
    'num_classes': 3,
    }

In [None]:
def crop_get_mask(img, tol=15):
    mask = img>tol
    return np.ix_(mask.any(1),mask.any(0))

In [None]:
class TractDatasetNSeg(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.image_path = df['nd_images']
        self.sh = df['slices_h']
        self.sw = df['slices_w']
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):

        img_paths = self.image_path[idx]
        slack = []
        for j in range(0, len(img_paths)):
            slack.append(open_gray16(img_paths[j], to_rgb=False)[:, :, 0])
            
        slack = np.stack(slack)
        slack = np.transpose(slack, (1, 2, 0))
        m = crop_get_mask(slack[:, :, 2], 15)
#         print (m[0].shape, m[1].shape)
        img = slack[m]
        prev_shape = (img.shape[0], img.shape[1])
        
        
        if self.transform is not None:
            aug = self.transform(image=img)
            img = aug['image']
        img = img / img.max()
        img = np.transpose(img, (2, 0, 1))
        return torch.tensor(img, dtype=torch.float), [self.sh[idx][2], self.sw[idx][2], m, prev_shape[0], prev_shape[1]]

In [None]:
def test_1d_epoch(model, loader, device):

    model.eval()
    activation = nn.Sigmoid()
    stomach = []
    smalltract = []
    largetract = []
    with torch.no_grad():
        for (data, xd) in tqdm(loader, total=len(loader)):
            data = data.to(device)
            output = model(data)
            output = activation(output)
            output = (output > 0.5).to(torch.float32).cpu().numpy()
            sh, sw, m, prev_shape0, prev_shape1 = xd
            prev_shape = (int(prev_shape1[0]), int(prev_shape0[0]))
            m = [m[0][0].numpy(), m[1][0].numpy()]
            for idx in range(0, len(sh)):
                root_shape = (int(sw[idx]), int(sh[idx]), 1)
                xpred_arr = np.round(cv2.resize(output[idx, 0, :, :].astype('uint8'), prev_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
                pred_arr = np.zeros(shape=root_shape)
                pred_arr[m] = xpred_arr[:, :, np.newaxis]
#                 print (prev_shape, root_shape, pred_arr[m].shape, xpred_arr.shape)
                stomach.append(rle_encode(pred_arr[:, :, 0]))
                xpred_arr = np.round(cv2.resize(output[idx, 1, :, :].astype('uint8'), prev_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
                pred_arr = np.zeros(shape=root_shape)
                pred_arr[m] = xpred_arr[:, :, np.newaxis]
                smalltract.append(rle_encode(pred_arr[:, :, 0]))
                xpred_arr = np.round(cv2.resize(output[idx, 2, :, :].astype('uint8'), prev_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
                pred_arr = np.zeros(shape=root_shape)
                pred_arr[m] = xpred_arr[:, :, np.newaxis]
                largetract.append(rle_encode(pred_arr[:, :, 0]))
            
    return stomach, smalltract, largetract

In [None]:
model = get_1d_model(model_type=CFG['model_type'], encoder_name=CFG['encoder_name'], encoder_weights=None,
                 in_channels=CFG['in_channels'], classes=CFG['num_classes'], activation=None).to(CFG['device'])

In [None]:
model_file = f'../input/unet-2-5d-2folds-384-50-epochs/2_5d_v1_crop_best_fold_0.pth'
model.load_state_dict(torch.load(model_file))

In [None]:
transforms = get_1d_transformations(CFG['img_size'])
if CFG['debug']:
    tr_csv_f = "../input/uw-madison-gi-tract-image-segmentation/train.csv"
    df = get_prepare_df(tr_csv_f, sep='/', TRAIN_DIR = '../input/uw-madison-gi-tract-image-segmentation/train')
    df = df[df['case_id'] == 30].reset_index(drop=True)
    pdf = prepare_nd_df_test(df, WINDOW_SIZE=CFG['in_channels'])
    test_set = TractDatasetNSeg(pdf.reset_index(drop=True), transforms["valid"])
else:
    tr_csv_f = "../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv"
    df = get_prepare_df(tr_csv_f, sep='/', TRAIN_DIR = '../input/uw-madison-gi-tract-image-segmentation/test')
    pdf = prepare_nd_df_test(df, WINDOW_SIZE=CFG['in_channels'])
    test_set = TractDatasetNSeg(pdf.reset_index(drop=True), transforms["valid"])

In [None]:
test_loader = DataLoader(test_set, batch_size=CFG['bs'], shuffle=False, num_workers=CFG['n_workers'])

In [None]:
stomach, smalltract, largetract = test_1d_epoch(model, test_loader, CFG['device'])

In [None]:
ids = []
classes = []
rles = []
pids = pdf['id'].values
ix = 0
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    ids.extend([row['id']] * 3)
    classes.extend(['large_bowel', 'small_bowel', 'stomach'])
    if row['id'] in pids:
        rles.extend([largetract[ix], smalltract[ix], stomach[ix]])
        ix += 1
    else:
        v = np.zeros(shape = (row['slice_w'], row['slice_h']))
        v = rle_encode(v)
        rles.extend([v, v, v])
gc.collect()
del largetract, smalltract, stomach
gc.collect()

xxdf = pd.DataFrame()
xxdf['id'] = ids
xxdf['class'] = classes
xxdf['predicted'] = rles
xxdf.to_csv("submission.csv", index=False)