In [None]:
!pip install ../input/segmentation-models-pytorch-0-1-3/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4
!pip install ../input/segmentation-models-pytorch-0-1-3/efficientnet_pytorch-0.6.3/efficientnet_pytorch-0.6.3
!pip install ../input/segmentation-models-pytorch-0-1-3/timm-0.3.2-py3-none-any.whl
!pip install ../input/segmentation-models-pytorch-0-1-3/segmentation_models.pytorch.0.1.3/segmentation_models.pytorch.0.1.3

In [None]:
import os
import gc
import cv2
import pdb
import glob
import pytz
import warnings
import pickle
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp

import tifffile as tiff
import rasterio
from rasterio.windows import Window

In [None]:
from albumentations import (
    Compose,
    CenterCrop,
    CLAHE,
    Resize,
    Normalize
)

In [None]:
height, width = 1024, 1024
reduce = 2
THRESHOLD = 0.40
window = 2048
min_overlap = 256
DATA = '../input/hubmap-kidney-segmentation/test/'
MODELS = ["../input/hubmap-unet-effnetb4-fold0/model_HuBMAP_Unet_timm_EffNetB4_NS_fold0.pth"]
df_sample = pd.read_csv('../input/hubmap-kidney-segmentation/sample_submission.csv')
batch_size = 8
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Mask to Rle and Rle to Mask

In [None]:
#functions to convert encoding to mask and mask to encoding
def enc2mask(encs, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for m,enc in enumerate(encs):
        if isinstance(enc,np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s)//2):
            start = int(s[2*i]) - 1
            length = int(s[2*i+1])
            img[start:start+length] = 1 + m
    return img.reshape(shape).T

def mask2enc(mask, n=1):
    pixels = mask.T.flatten()
    encs = []
    for i in range(1,n+1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0: encs.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            runs[1::2] -= runs[::2]
            encs.append(' '.join(str(x) for x in runs))
    return encs

#https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with transposed mask
def rle_encode_less_memory(img):
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

In [None]:
# Imagenet statistics Mean and variance
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)

def get_transforms(mean, std):
    list_transforms = [Resize(height=height, width=width, interpolation=cv2.INTER_AREA, p=1.0)]
    list_transforms.extend(
        [
            Normalize(mean=mean, std=std, p=1.0),
            ToTensorV2(),
        ]
    )
    list_trfms = Compose(list_transforms)
    return list_trfms

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

class HuBMAPDataset(Dataset):
    def __init__(self, data):
        self.data = data
        if self.data.count != 3:
            subdatasets = self.data.subdatasets
            self.layers = []
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(rasterio.open(subdataset))
        self.shape = self.data.shape
        self.mask_grid = make_grid(self.data.shape, window=window, min_overlap=min_overlap)
        self.transforms = get_transforms(mean, std)
        
    def __len__(self):
        return len(self.mask_grid)
        
    def __getitem__(self, idx):
        x1, x2, y1, y2 = self.mask_grid[idx]
        if self.data.count == 3:
            img = data.read([1,2,3], window=Window.from_slices((x1, x2), (y1, y2)))
            img = np.moveaxis(img, 0, -1)
        else:
            img = np.zeros((window, window, 3), dtype=np.uint8)
            for i, layer in enumerate(self.layers):
                img[:,:,i] = layer.read(window=Window.from_slices((x1,x2),(y1,y2)))
        augmented = self.transforms(image=img)
        img = augmented['image']
        vetices = torch.tensor([x1, x2, y1, y2])
        return img, vetices

## Initialize models and load checkpoints

In [None]:
models = []
for path in MODELS:
    state_dict = torch.load(path, map_location=torch.device('cpu'))
    model = smp.Unet('timm-efficientnet-b4', classes=1, encoder_weights=None)
    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)
    models.append(model)

del state_dict
print(len(models))

In [None]:
def Make_prediction(img, tta = True):
    pred = None
    with torch.no_grad():
        for model in models:
            p_tta = None
            p = model(img)
            p = torch.sigmoid(p).detach()
            if p_tta is None:
                p_tta = p
            else:
                p_tta += p
            if tta:
                #x,y,xy flips as TTA
                flips = [[-1],[-2],[-2,-1]]
                for f in flips:
                    imgf = torch.flip(img, f)
                    p = model(imgf)
                    p = torch.flip(p, f)
                    p_tta += torch.sigmoid(p).detach()
                p_tta /= (1+len(flips))
            if pred is None:
                pred = p_tta
            else:
                pred += p_tta
        pred /= len(models)
    return pred

In [None]:
names, predictions = [],[]
for idx, row in tqdm(df_sample.iterrows(),total=len(df_sample)):
    imageId = row['id']
    data = rasterio.open(os.path.join(DATA, imageId+'.tiff'), transform = identity, num_threads='all_cpus')
    preds = np.zeros(data.shape, dtype=np.uint8)
    dataset = HuBMAPDataset(data)
    dataloader = DataLoader(dataset, batch_size, num_workers=0, shuffle=False, pin_memory=True)
    for i, (img, vertices) in enumerate(dataloader):
        img = img.to(device)
        pred = Make_prediction(img)
        pred = pred.squeeze().cpu().numpy()
        vertices = vertices.numpy()
        for p, vert in zip(pred, vertices):
            x1, x2, y1, y2 = vert
            p = cv2.resize(p, (window, window))
            preds[x1:x2,y1:y2] += (p > THRESHOLD).astype(np.uint8)
    preds = (preds > 0.5).astype(np.uint8)
    #convert to rle
    rle = rle_encode_less_memory(preds)
    names.append(imageId)
    predictions.append(rle)
    del preds, dataset, dataloader
    gc.collect()

In [None]:
df = pd.DataFrame({'id':names,'predicted':predictions})
df.to_csv('submission.csv', index=False)