In [None]:
!cp -r ../input/pytorch-segmentation-models-lib/ ./

In [None]:
!pip config set global.disable-pip-version-check true

In [None]:
!pip install -q ./pytorch-segmentation-models-lib/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4
!pip install -q ./pytorch-segmentation-models-lib/efficientnet_pytorch-0.6.3/efficientnet_pytorch-0.6.3
!pip install -q ./pytorch-segmentation-models-lib/timm-0.4.12-py3-none-any.whl
!pip install -q ./pytorch-segmentation-models-lib/segmentation_models_pytorch-0.2.0-py3-none-any.whl

In [16]:
import pandas as pd
import os
from glob import glob
import torch
import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import time
import datetime

# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

In [17]:
SEED = 4321
BATCH_SIZE = 64
VAL_SIZE = 0.2
LEARNING_RATE = 2e-3
N_EPOCHS = 15
IMG_SIZE = (128,128)

In [18]:
# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [19]:
class GIDataset(torch.utils.data.Dataset):
    def __init__(self, df, subset="train"):
        self.df = df
        self.subset = subset

    def __len__(self):
        return len(self.df)
    
    
    def __getitem__(self, index): 
        masks = np.zeros((IMG_SIZE[0], IMG_SIZE[1], 3), dtype=np.float32)
        img_path=self.df['path'].iloc[index]
        w=self.df['width'].iloc[index]
        h=self.df['height'].iloc[index]
        img = self.__load_img(img_path)
        if self.subset == 'train':
            for k,j in zip([0,1,2],["large_bowel","small_bowel","stomach"]):
                rles=self.df[j].iloc[index]
                mask = rle_decode(rles, shape=(h, w, 1))
                mask = cv2.resize(mask, IMG_SIZE)
                masks[:,:,k] = mask
        
        masks = masks.transpose(2, 0, 1)
        img = img.transpose(2, 0, 1)

        if self.subset == 'train': return torch.tensor(img), torch.tensor(masks)
        else: return torch.tensor(img)
        
    def __load_img(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        img = (img - img.min())/(img.max() - img.min())*255.0 
        img = cv2.resize(img, IMG_SIZE)
        img = np.tile(img[...,None], [1, 1, 3]) # gray to rgb
        img = img.astype(np.float32) /255.
        return img

In [20]:

sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
if not len(sub_df):
    debug = True
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')[:1000*3]
    sub_df = sub_df.drop(columns=['class','segmentation']).drop_duplicates()
else:
    debug = False
    sub_df = sub_df.drop(columns=['class','predicted']).drop_duplicates()

if debug:
    paths = glob(f'/kaggle/input/uw-madison-gi-tract-image-segmentation/train/**/*png',recursive=True)[:1000]
else:
    paths = glob(f'/kaggle/input/uw-madison-gi-tract-image-segmentation/test/**/*png',recursive=True)

print('DEBUG: ',debug)

In [21]:
sub_df

In [None]:
test_df = pd.DataFrame({"id": sub_df["id"][::3]})
test_df["large_bowel"] = sub_df["predicted"][::3].values
test_df["small_bowel"] = sub_df["predicted"][1::3].values
test_df["stomach"] = sub_df["predicted"][2::3].values

test_df.reset_index(drop=True, inplace=True)

test_df["case"] = test_df["id"].apply(lambda x: int(x.split("_")[0].replace("case", "")))
test_df["day"] = test_df["id"].apply(lambda x: int(x.split("_")[1].replace("day", "")))
test_df["slice"] = test_df["id"].apply(lambda x: x.split("_")[3])

test_df["path"] =paths

test_df["width"] = test_df["path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[1]))
test_df["height"] = test_df["path"].apply(lambda x: int(x[:-4].rsplit("_", 4)[2]))

test_df.reset_index(inplace=True, drop=True)
test_df.fillna('',inplace=True)

test_dataset = GIDataset(test_df, subset='test')
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=False)


In [22]:
def predict(model_path, test_dataloader):
    model = torch.load(model_path)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    t0 = time.time()
    model.eval()
    preds = []
    pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader), desc='Test ')
    for step, images in pbar:         
        images = images.to(device, dtype=torch.float)

        batch_size = images.size(0)

        with torch.no_grad():  
            y_pred = model(images)
            preds.append(y_pred.cpu())
    preds = torch.cat(preds,dim=0).cpu().numpy()
    return preds

In [23]:
preds = predict('/kaggle/input/modelgitract/trained_unet.pt', test_dataloader)

In [None]:

masks = []
for i in range(len(preds)):
    for j in range(3):
        class_pred = preds[i,j,:,:]
        pred_img = cv2.resize(class_pred, (test_df['width'].iloc[i], test_df['height'].iloc[i]), interpolation=cv2.INTER_NEAREST) # resize probabilities to original shape
        pred_img = (pred_img>0.5).astype(dtype='uint8')    # classify
        masks.append(pred_img)
pred_df = pd.DataFrame(data=np.array(outputs), columns=["id", "class", "predicted"])
sub_df['predicted'] = pd.Series([rle_encode(m) for m in masks])
del masks
sub_df.to_csv("submission.csv", index=False)