In [None]:
from os import listdir, makedirs, getcwd, remove
from os.path import isfile, join, abspath, exists, isdir, expanduser

In [None]:
!ls /kaggle/input/modellibs/libs

In [None]:
!cp -r /kaggle/input/modellibs/libs/efficientnet_pytorch-0.6.3/efficientnet_pytorch-0.6.3/ /tmp/efficientnet_pytorch-0.6.3

In [None]:
!cd /tmp/efficientnet_pytorch-0.6.3 && python setup.py -q install

In [None]:
!cp -r /tmp/efficientnet_pytorch-0.6.3/efficientnet_pytorch /opt/conda/lib/python3.7/site-packages/

In [None]:
!cp -R /kaggle/input/modellibs/libs/pytorch-image-models-0.4.12/pytorch-image-models-0.4.12/ /tmp/pytorch-image-models-0.4.12

In [None]:
!cd /tmp/pytorch-image-models-0.4.12/ && python setup.py -q install

In [None]:
!cp -r /tmp/pytorch-image-models-0.4.12/timm /opt/conda/lib/python3.7/site-packages/

In [None]:
!cp -R /kaggle/input/modellibs/libs/pretrained-models.pytorch/ /tmp/pretrained-models.pytorch

In [None]:
!cd /tmp/pretrained-models.pytorch && python setup.py -q install

In [None]:
!cp -r /tmp/pretrained-models.pytorch/pretrainedmodels /opt/conda/lib/python3.7/site-packages/

In [None]:
!cd /kaggle/input/seg-model-pytorch && pip install -q segmentation_models_pytorch-0.2.0-py3-none-any.whl

In [None]:
#Download some important libraries
# !pip install segmentation-models-pytorch

import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import os
import torch
from glob import glob
from pandarallel import pandarallel
from tqdm.notebook import tqdm
from torch.utils.data import Dataset,DataLoader
from typing import List,Tuple
import numpy as np
from torchvision import transforms as T
from torchvision.utils import make_grid
import cv2
import copy
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
from collections import defaultdict
import segmentation_models_pytorch as smp
from torch.cuda import amp
import torch.nn as nn
import torch.optim as optim
import time
import gc
from torch.optim import lr_scheduler
# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
tqdm.pandas()

pandarallel.initialize(progress_bar=True)

In [None]:
class CFG:
    seed = 42
    debug = False
    model_name = "UNET"
    encoder_name = "resnet50"
    encoder_weights = "imagenet"
    train_batch_size = 32
    val_batch_size = 32
    img_size = (224,224)
    scheduler = 'CosineAnnealingLR'
    epochs = 22
    lr = 2e-3
    min_lr = 1e-6
    weight_decay = 1e-6
    T_max = int(30000/train_batch_size*epochs)+50  #max iterations for scheduler
    num_classes = 3
    val_split_percentage = 0.2
    n_accumulate  = max(1, 32//train_batch_size)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    thr = 0.45

In [None]:
#inference section
TRAIN_METADATA_FILE = "../input/uw-madison-gi-tract-image-segmentation/train.csv"
TRAIN_DIR = "../input/uw-madison-gi-tract-image-segmentation/train/"
TRAIN_METADATA_FILE
BASE_PATH  = '../input/uw-madison-gi-tract-image-segmentation'
CKPT_DIR = '../input/res50augupdated2iter'
SAMPLE_SUBMISSION_CSV_PATH = '../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv'
TRAIN_FILE_CSV_PATH = TRAIN_METADATA_FILE

In [None]:
def get_metadata(df):
    #extract case, day and slice id and also converting segmentation values to string

    df["case"] = df["id"].apply(lambda x: int(x.split("_")[0].split("case")[1]))
    df["day"] = df["id"].apply(lambda x: int(x.split("_")[1].split("day")[1]))
    df["slice"] = df["id"].apply(lambda x: int(x.split("_")[-1]))
    return df

def path2info(row):
    path = row['image_path']
    data = path.split('/')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case',''))
    day = int(data[-3].split('_')[1].replace('day',''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

In [None]:
def load_img(path):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = np.expand_dims(img, axis=2)
    img = img.astype('float32') # original is uint16
    mx = np.max(img)
    if mx:
        img/=mx # scale image to [0, 1]
    return img

def load_image(path):
    return Image.open(path).convert("RGB")

def load_msk(path):
    msk = np.load(path)
    msk = msk.astype('float32')
    msk/=255.0
    return msk

def show_img(img, mask=None):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    img = clahe.apply(img)
    plt.imshow(img, cmap='bone')
    
    if mask is not None:
        # plt.imshow(np.ma.masked_where(mask!=1, mask), alpha=0.5, cmap='autumn')
        plt.imshow(mask, alpha=0.5)
        handles = [Rectangle((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), (0.0,0.667,0.0), (0.0,0.0,0.667)]]
        labels = ["Large Bowel", "Small Bowel", "Stomach"]
        plt.legend(handles,labels)
    plt.axis('off')

In [None]:
sub_df = pd.read_csv(SAMPLE_SUBMISSION_CSV_PATH)
if not len(sub_df):
    debug = True
    sub_df = pd.read_csv(TRAIN_FILE_CSV_PATH)[:1000*3]
    sub_df = sub_df.drop(columns=['class','segmentation']).drop_duplicates()
else:
    debug = False
    sub_df = sub_df.drop(columns=['class','predicted']).drop_duplicates()
sub_df = get_metadata(sub_df)

In [None]:
if debug:
    paths = glob(BASE_PATH+'/train/**/*png',recursive=True)
else:
    paths = glob(BASE_PATH+'/test/**/*png',recursive=True)
path_df = pd.DataFrame(paths, columns=['image_path'])
path_df = path_df.progress_apply(path2info, axis=1)
path_df.head()

In [None]:
sub_df.head()

In [None]:
test_df = sub_df.merge(path_df, on=['case','day','slice'], how='left')
test_df.head()

In [None]:
class TestDataset(Dataset):
    def __init__(self,df,transforms=None,label=None):
        self.df         = df
        self.label      = label
        self.img_paths  = df['image_path'].tolist()
        self.ids        = df['id'].tolist()
        if 'msk_path' in df.columns:
            self.msk_paths  = df['mask_path'].tolist()
        else:
            self.msk_paths = None
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        id_       = self.ids[index]
        img = []
        img = load_image(img_path)
        img = np.array(img)
        h, w = img.shape[:2]
        if self.label:
            msk_path = self.msk_paths[index]
            msk = load_msk(msk_path)
            if self.transforms:
                data = self.transforms(image=img, mask=msk)
                img  = data['image']
                msk  = data['mask']
            img = np.transpose(img, (2, 0, 1))
            msk = np.transpose(msk, (2, 0, 1))
            return torch.tensor(img), torch.tensor(msk)
        else:
            if self.transforms:
                data = self.transforms(image=img)
                img  = data['image']
            img = np.transpose(img, (2, 0, 1))
            return torch.tensor(img), id_, h, w

In [None]:
COLOR_MEAN: float = 0.349977
COLOR_STD: float = 0.215829
test_transforms = {
    "test": A.Compose([
        A.CenterCrop(*CFG.img_size),
        A.Normalize(mean=COLOR_MEAN, std=COLOR_STD, max_pixel_value=255)
        ], p=1.0)
}

In [None]:
def build_model():
    model = smp.Unet(
        encoder_name=CFG.encoder_name, 
        encoder_weights=None,
    in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=CFG.num_classes,                      # model output channels (number of classes in your dataset)
    )
    return model

def load_model(path):
    model = build_model()
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

In [None]:
# import cupy as cp

def mask2rle(msk, thr=0.5):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    msk    = np.array(msk)
    pixels = msk.flatten()
    pad    = np.array([0])
    pixels = np.concatenate([pad, pixels, pad])
    runs   = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def masks2rles(msks, ids, heights, widths):
    pred_strings = []; pred_ids = []; pred_classes = [];
    for idx in range(msks.shape[0]):
        height = heights[idx].item()
        width = widths[idx].item()
        left = (width - msks[idx].shape[0])//2
        right = left
        top = (height - msks[idx].shape[1])//2
        bottom = top
        msk = cv2.copyMakeBorder(msks[idx], top, bottom, left, right, cv2.BORDER_CONSTANT, 0)
        rle = [None]*3
        for midx in [0, 1, 2]:
            rle[midx] = mask2rle(msk[...,midx])
        pred_strings.extend(rle)
        pred_ids.extend([ids[idx]]*len(rle))
        pred_classes.extend(['large_bowel', 'small_bowel', 'stomach'])
    return pred_strings, pred_ids, pred_classes

In [None]:

@torch.no_grad()
def infer(model_paths, test_loader, num_log=1, thr=CFG.thr):
    msks = []; imgs = [];
    pred_strings = []; pred_ids = []; pred_classes = [];
    for idx, (img, ids, heights, widths) in enumerate(tqdm(test_loader, total=len(test_loader), desc='Infer ')):
        img = img.to(CFG.device, dtype=torch.float) # .squeeze(0)
        size = img.size()
        msk = []
        msk = torch.zeros((size[0], 3, size[2], size[3]), device=CFG.device, dtype=torch.float32)
        for path in model_paths:
            model = load_model(path)
            model = model.to(CFG.device)
            out   = model(img) # .squeeze(0) # removing batch axis
            out   = nn.Sigmoid()(out) # removing channel axis
            msk+=out/len(model_paths)
        msk = (msk.permute((0,2,3,1))>thr).to(torch.uint8).cpu().detach().numpy() # shape: (n, h, w, c)
        result = masks2rles(msk, ids, heights, widths)
        pred_strings.extend(result[0])
        pred_ids.extend(result[1])
        pred_classes.extend(result[2])
        if idx<num_log:
            img = img.permute((0,2,3,1)).cpu().detach().numpy()
            imgs.append(img[:10])
            msks.append(msk[:10])
        del img, msk, out, model, result
        gc.collect()
        torch.cuda.empty_cache()
    return pred_strings, pred_ids, pred_classes, imgs, msks

In [None]:
test_dataset = TestDataset(test_df, transforms=test_transforms['test'])
test_loader  = DataLoader(test_dataset, batch_size=CFG.val_batch_size, 
                          num_workers=4, shuffle=False, pin_memory=False)
model_paths  = glob(f'{CKPT_DIR}/best_epoch*.bin')
pred_strings, pred_ids, pred_classes, imgs, msks = infer(model_paths, test_loader)

In [None]:
for img, msk in zip(imgs[0][:5], msks[0][:5]):
    plt.figure(figsize=(12, 7))
    plt.subplot(1, 3, 1); plt.imshow(img, cmap='bone');
    plt.axis('OFF'); plt.title('image')
    plt.subplot(1, 3, 2); plt.imshow(msk*255); plt.axis('OFF'); plt.title('mask')
    plt.subplot(1, 3, 3); plt.imshow(img, cmap='bone'); plt.imshow(msk*255, alpha=0.4);
    plt.axis('OFF'); plt.title('overlay')
    plt.tight_layout()
    plt.show()

In [None]:
del imgs, msks
gc.collect()

In [None]:
pred_df = pd.DataFrame({
    "id":pred_ids,
    "class":pred_classes,
    "predicted":pred_strings
})
if not debug:
    sub_df = pd.read_csv(BASE_PATH+'/sample_submission.csv')
    del sub_df['predicted']
else:
    sub_df = pd.read_csv(BASE_PATH+'/train.csv')[:1000*3]
    del sub_df['segmentation']
    
sub_df = sub_df.merge(pred_df, on=['id','class'])
sub_df.to_csv('submission.csv',index=False)
print(sub_df.head(5))

In [None]:
# pred_df.to_csv("submission.csv", index=False)