In [None]:
pip -q install ../input/smp-packages/pretrainedmodels-0.7.4-py3-none-any.whl

In [None]:
pip -q install ../input/smp-packages/efficientnet_pytorch-0.6.3-py2.py3-none-any.whl

In [None]:
pip -q install ../input/smp-packages/timm-0.3.2-py3-none-any.whl

In [None]:
pip -q install ../input/smp-packages/segmentation_models_pytorch-0.1.3-py3-none-any.whl

In [None]:
import numpy as np
import pandas as pd
import os
import gc
import torch
import pathlib
import rasterio
from rasterio.windows import Window
import segmentation_models_pytorch as smp
from tqdm.notebook import tqdm
import cv2
import albumentations as albu
from albumentations.pytorch import ToTensorV2

In [None]:
WINDOW = 1024
image_size = 512
MIN_OVERLAP = 128
THRESHOLD = 0.3
BATCH_SIZE = 8

In [None]:
# best dice
model_pths = [
    '../input/hubmap-exp007/best_loss_fold0.pth',
    '../input/hubmap-exp007/best_loss_fold1.pth',
    '../input/hubmap-exp007/best_loss_fold2.pth',
    '../input/hubmap-exp007/best_loss_fold3.pth',
    '../input/hubmap-exp007/best_loss_fold4.pth'
]

# Functions

In [None]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [None]:
def read_from_slice(dataset, x1, x2, y1, y2):
    if dataset.count == 3:
        image = dataset.read([1, 2, 3], window=Window.from_slices((x1, x2), (y1, y2)))
        image = np.moveaxis(image, 0, -1)
    else:
        subdatasets = dataset.subdatasets
        if len(subdatasets) > 0:
            image = np.zeros((WINDOW, WINDOW, len(subdatasets)), dtype=np.uint8)
            for i, subdataset in enumerate(subdatasets, 0):
                with rasterio.open(subdataset) as layer:
                    image[:,:,i] = layer.read(1, window=Window.from_slices((x1, x2), (y1, y2)))
    return image

In [None]:
def load_model(model_pths):
    models = []
    for model_pth in model_pths:
        model = smp.Unet(
        'timm-efficientnet-b4', 
        encoder_weights=None, 
        in_channels=3, 
        classes=1, 
        activation=None,
        decoder_use_batchnorm=True
        )
        state = torch.load(model_pth)
        model.load_state_dict(state)
        model.eval()
        models.append(model)
    return models

In [None]:
transform = albu.Compose([
    albu.Resize(image_size, image_size),
    albu.Normalize(),
    ToTensorV2()
])

In [None]:
def infer(model, image):
    with torch.no_grad():
        outputs = model(image)
        preds = torch.sigmoid(outputs.detach().cpu()).data.numpy()
        
    return preds

# infer

In [None]:
p = pathlib.Path('../input/hubmap-kidney-segmentation')
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)

ids = []
predictions = []
models = load_model(model_pths)

for i, filename in enumerate(p.glob('test/*.tiff')):
    print(f'{i+1} Predicting {filename.stem}')
    
    dataset = rasterio.open(filename.as_posix(), transform = identity)
    slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)
    preds = np.zeros(dataset.shape, dtype=np.uint8)
    
    batch_images_list = []
    batch_cords_list = []
    j = 0
    for idx, (x1,x2,y1,y2) in enumerate(tqdm(slices)):
        image = read_from_slice(dataset, x1, x2, y1, y2)
        if image.sum() > 0:  # filter out black images
            image = transform(image=image)['image']
            batch_images_list.append(image)
            batch_cords_list.append([x1, x2, y1, y2])
            if (j+1) % BATCH_SIZE == 0 or (idx+1) == len(slices):
                batch_images = torch.stack(batch_images_list)
                pred = None
                for model in models:
                    if pred is None:
                        pred = infer(model.cuda(), batch_images.cuda())
                    else:
                        pred += infer(model.cuda(), batch_images.cuda())
                pred = pred / len(models)

                for cord, prd in zip(batch_cords_list, pred):
                    x1, x2, y1, y2 = cord[0], cord[1], cord[2], cord[3]
                    prd = cv2.resize(prd.squeeze(), (WINDOW, WINDOW))
                    preds[x1:x2, y1:y2] += (prd.squeeze() > THRESHOLD).astype(np.uint8)

                del batch_images
                del batch_images_list
                del batch_cords_list
                batch_images_list = []
                batch_cords_list = []
                torch.cuda.empty_cache()
            j += 1
            
    preds = (preds > THRESHOLD).astype(np.uint8)
    ids.append(filename.stem)
    predictions.append(rle_encode_less_memory(preds))
    #print(np.sum(preds))
    del dataset
    del slices
    del preds
    gc.collect();
    #break

# Making submission

In [None]:
submission = pd.DataFrame({
    'id': ids,
    'predicted': predictions
})

In [None]:
submission.to_csv('submission.csv', index=False)
submission.head()