This submission topped the first place solution on the private leaderboard (score: *0.9516*), even though it fared quite modestly on the public leaderboard (score: *0.9166*).

The approach taken in this notebook is:

1. Use one single model trained with FPN and efficientnet-b7 back-end (Pytorch, using [segmentation models Pytorch](https://github.com/qubvel/segmentation_models.pytorch)), and with a window size of *1536* window size and *768* tile size. 
2. Perform inference on three grids, all with window size of *1536* window size and *768* tile size, but different overlaps with these sizes: *[32, 128, 256]*
3. Take the predicted sets of masks and check if its average is above *0.49* and produce with that the final prediction set of masks.


In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!mkdir efficientnet_pytorch-0.6.3
!cp -R /kaggle/input/pytorch-segmentation-models-git/efficientnet_pytorch-0.6.3/efficientnet_pytorch-0.6.3/* efficientnet_pytorch-0.6.3
!pip install -e efficientnet_pytorch-0.6.3
!ln -s efficientnet_pytorch-0.6.3/efficientnet_pytorch efficientnet_pytorch

In [None]:
!rm -rf pretrained-models.pytorch-master

In [None]:
!mkdir pretrained-models.pytorch-master
!cp -R /kaggle/input/pytorch-segmentation-models-git/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/* pretrained-models.pytorch-master
!pip install -e pretrained-models.pytorch-master
!ln -s pretrained-models.pytorch-master/pretrainedmodels pretrainedmodels

In [None]:
!pip install /kaggle/input/pytorch-segmentation-models-git/timm-0.3.2-py3-none-any.whl

In [None]:
# !pip install --no-index /kaggle/input/pytorch-segmentation-models-git/segmentation_models_pytorch-0.1.3-py3-none-any.whl
!mkdir segmentation_models_pytorch_
!cp -R /kaggle/input/pytorch-segmentation-models-git/segmentation-models-pytorch-0.1.3/segmentation-models-pytorch/* segmentation_models_pytorch_
!pip install -e segmentation_models_pytorch_

In [None]:
!ln -s segmentation_models_pytorch_/segmentation_models_pytorch segmentation_models_pytorch
!ls segmentation_models_pytorch

In [None]:
import pretrainedmodels

In [None]:
from pathlib import Path

from tqdm.notebook import tqdm

import sys, os, random, time, glob
import numba, cv2, gc
import pickle

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as D

import torchvision
from torchvision import transforms as T
from segmentation_models_pytorch import Unet
from segmentation_models_pytorch import FPN

In [None]:
import rasterio
from rasterio.windows import Window

import albumentations as A

In [None]:
def set_seeds(seed = 42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    
set_seeds()

In [None]:
!ls /kaggle/input/hubmap-fpn-efficientnetb7-1536/

In [None]:
!rm -rf /kaggle/working/models
!mkdir /kaggle/working/models
!cp /kaggle/input/hubmap-fpn-efficientnetb7-1536/*.pth /kaggle/working/models
# !cp /kaggle/input/fpn-with-10-epochs//*.pth /kaggle/working/models

In [None]:
!ls /kaggle/working/models

In [None]:
DATA_PATH = Path('../input/hubmap-kidney-segmentation')
assert DATA_PATH.exists()

# path to our training notebook.
PATH_FOLD_MODELS = Path('/kaggle/working/models')
assert PATH_FOLD_MODELS.exists()

### Loading data

In [None]:
@numba.njit()
def rle_numba(pixels):
    size = len(pixels)
    points = []
    if pixels[0] == 1: points.append(1)
    for i in range(1, size):
        if pixels[i] != pixels[i-1]:
            if len(points) % 2 == 0:
                points.append(i+1)
            else:
                points.append(i+1 - points[-1])
    if pixels[-1] == 1: points.append(size-points[-1]+1)    
    return points

def rle_numba_encode(image):
    pixels = image.flatten(order = 'F')
    points = rle_numba(pixels)
    return ' '.join(str(x) for x in points)

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [None]:
class HuBMAPModel(nn.Module):
    def __init__(self, is_fpn=False, encoder_name='efficientnet-b7', encoder_weights='imagenet'):
        super(HuBMAPModel, self).__init__()
        print(f'encoder name: {encoder_name}')
        if is_fpn:
            self.model = FPN(encoder_name = encoder_name, 
                          encoder_weights = encoder_weights,
                          classes = 1,
                          activation = None)
        else:
            self.model = Unet(encoder_name = encoder_name, 
                          encoder_weights = encoder_weights,
                          classes = 1,
                          activation = None)
        
        
    def forward(self, images):
        return self.model(images)

In [None]:
def get_model(is_fpn=False, encoder_name='efficientnet-b7'):
    model = HuBMAPModel(is_fpn, encoder_name=encoder_name)
    return model

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/pytorch-segmentation-models-git/efficientnet-b7-dcc49843.pth /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/pytorch-segmentation-models-git/tf_efficientnet_b7_ra-6c08e654.pth /root/.cache/torch/hub/checkpoints/

In [None]:
# fold_models_paths = glob.glob(os.path.join(PATH_FOLD_MODELS, '*timm*.pth'))
fold_models_paths = []
fold_models_paths.extend(glob.glob(os.path.join(PATH_FOLD_MODELS, '/kaggle/working/models/*_best_model_fpn_efficientnetb7_1536_768_double_shift_efficientnet-b7-12b.pth')))
# fold_models_paths.append('/kaggle/working/models/0_best_model_fpn_efficientnetb7_1536_768_double_shift_efficientnet-b7.pth')
fold_models_paths

In [None]:
fold_models = []

for path in fold_models_paths:
    try:
        state_dict = torch.load(path)
        model = get_model(path.find('fpn') > 0, encoder_name='timm-efficientnet-b7' if path.find('timm') > 0 else 'efficientnet-b7')
        if 'model_state_dict' in state_dict:
            model.load_state_dict(state_dict['model_state_dict'])
        else:
            model.load_state_dict(state_dict)
        model.float()
        model.to(DEVICE)
        model.eval()

        fold_models.append(model)
    except Exception as e:
        print(f'Failed to load {path}', e)

In [None]:
len(fold_models)

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
# identity = None

def read_from_slice(dataset, x1, x2, y1, y2):
    image = dataset.read([1,2,3],
                    window=Window.from_slices((x1,x2),(y1,y2)))
    image = np.moveaxis(image, 0, -1)
    return image

In [None]:
WINDOW = 1536 # tile size
MIN_OVERLAP = 32
NEW_SIZE = 768 # size after re-size which are fed to the model

### Predict

In [None]:
# Affine transforms
horizontal_flip = A.HorizontalFlip(p = 1.0)
vertical_flip = A.VerticalFlip(p = 1.0)
rotate_cw = A.Rotate(limit = (-90, -90), p = 1.0)
rotate_acw = A.Rotate(limit = (90, 90), p = 1.0)

# List of augmentations for TTA
tta_augs = [horizontal_flip,
            vertical_flip,
            rotate_cw,
            rotate_acw]

# List of deaugmentations corresponding to the above aug list
tta_deaugs = [horizontal_flip,
              vertical_flip,
              rotate_acw,
              rotate_cw]

# # List of augmentations for TTA
# tta_augs = [horizontal_flip,
#             vertical_flip]

# # List of deaugmentations corresponding to the above aug list
# tta_deaugs = [horizontal_flip,
#               vertical_flip]

# List of augmentations for TTA
# tta_augs = [horizontal_flip]

# # List of deaugmentations corresponding to the above aug list
# tta_deaugs = [horizontal_flip]

In [None]:
!ls {DATA_PATH/'test'}

In [None]:

def get_preprocessing():
    _transform = [
        A.Normalize(mean=[0.6276, 0.4468, 0.6769],
                       std=[0.1446, 0.2113, 0.1233], 
                       max_pixel_value=255.0, always_apply=True, p=1.0)
    ]
    return A.Compose(_transform)

In [None]:
from scipy import stats
from scipy.stats import logistic

In [None]:
def rle_decode(mask_rle, shape=(256, 256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    splits = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (splits[0:][::2], splits[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype='uint8')
    for lo, hi in zip(starts, ends):
        img[lo: hi] = 1
    return img.reshape(shape, order='F') # Fortran order reshaping

In [None]:
%%time

preprocess_input = get_preprocessing()

p = Path(DATA_PATH)

submissions = []
overlaps = [32, 128, 256]

with torch.no_grad():
    for overlap in overlaps:
        subm = {}
        for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), total = len(list(p.glob('test/*.tiff')))):
            print(filename)

            dataset = rasterio.open(filename.as_posix(), transform = identity)
            slices = make_grid(dataset.shape, window=WINDOW, min_overlap=overlap)

            preds = np.zeros(dataset.shape, dtype=np.uint8)
            if dataset.count != 3:
                print(f'Image file ({filename}) with subdatasets as channels')
                layers = [rasterio.open(subd) for subd in dataset.subdatasets]

            for (x1,x2,y1,y2) in tqdm(slices, total = len(slices)):
                if dataset.count == 3:
                    image = dataset.read([1,2,3],
                                window=Window.from_slices((x1,x2),(y1,y2)))
                    image = np.moveaxis(image, 0, -1)
                else:
                    image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
                    for fl in range(3):
                        image[:,:,fl] = layers[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))

                image = preprocess_input(image = image)['image']
                image = cv2.resize(image, (NEW_SIZE, NEW_SIZE))
                image = np.moveaxis(image, -1, 0)
                image = torch.from_numpy(image)
                pred = np.zeros([len(fold_models), WINDOW, WINDOW])
                for j, fold_model in enumerate(fold_models):
                    score = fold_model(image.float().to(DEVICE)[None])
                    score = score.squeeze().cpu().numpy()
                    pred[j] = cv2.resize(score, (WINDOW, WINDOW))
                pred = np.mean(pred, axis=0)
                preds[x1:x2,y1:y2] = (pred > 0).astype(np.uint8)

            subm[i] = {'id':filename.stem, 'predicted': rle_numba_encode(preds), 'shape': preds.shape}
        submissions.append(subm)

In [None]:
assert len(submissions) == len(overlaps)

In [None]:
from collections import defaultdict
grouped_submissions = defaultdict(list)

for s_dict in submissions:
    for index, v in s_dict.items():
        grouped_submissions[v['id']].append({'predicted': v['predicted'], 'shape': v['shape']})

In [None]:
assert len(grouped_submissions[list(grouped_submissions.keys())[0]]) == len(submissions)

In [None]:
%%time

subm = {}
for i, (k, v) in enumerate(grouped_submissions.items()):
    mean_mask = np.zeros([v[0]['shape'][0], v[0]['shape'][1]], dtype=np.float16)
    print(f'{i + 1}. Processing {k} with {len(v)} masks')
    for encoded in v:
        mean_mask += rle_decode(encoded['predicted'], encoded['shape'])
    print(f'Finished adding {len(v)} masks')
    mean_mask = mean_mask / len(v)
    mean_mask = (mean_mask > 0.49).astype(np.uint8)
    subm[i] = {'id': k, 'predicted': rle_numba_encode(mean_mask)}
    print(f'Finished encoding average')

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
!du -h submission.csv
!du -k submission.csv

In [None]:
!rm /kaggle/working/models/*.pth