In [None]:
import sys
sys.path.append("../input/segmentation-models-pytorch-install")
import segmentation_models_pytorch as smp

from torch.utils.data import Dataset, DataLoader
import cv2
from pathlib import Path
import rasterio
from rasterio.windows import Window
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import torch
from tqdm import tqdm
import math
import gc
from typing import List, Dict
from dataclasses import dataclass

## Unpack weights

In [None]:
!ls -l ../input/hubmap-folds-2
!mkdir model_1
!tar -xvf ../input/hubmap-folds-2/1024_avg_last.tar -C model_1
!mkdir model_2
!tar -xvf ../input/hubmap-folds-2/1024_512_pseudo_v1_avg.tar -C model_2

## Main pipline config

In [None]:
BATCH_SIZE = 1
NUM_WORKERS = 0
CROP_SIZE = 1024 * 4
STEP = 1024 * 2
THR = 0.5
VOTE = 0
SKIP_BACKGROUND = True
SKIP_COMMIT = True
RAMKA = 512

df_sub = pd.read_csv("../input/hubmap-kidney-segmentation/sample_submission.csv")

## Model configs

In [None]:
@dataclass
class ModelConfig:
    encoder: str
    weights_path: List[str]
    img_size: int
    weight_blend: float

my_models = [
    ModelConfig(
        "resnet34", 
        [
            "./model_1/fold0_avg_0.9238.pth",
            "./model_1/fold1_avg_0.9162.pth",
            "./model_1/fold2_avg_0.9303.pth",
            "./model_1/fold3_avg_0.9336.pth",
            "./model_1/fold4_avg_0.9407.pth",
        ], 
        4096, 
        0.35
    ),
    ModelConfig(
        "timm-efficientnet-b3", 
        [
            f'../input/hubmap5/exp_25_fold_0.pt',
            f'../input/hubmap5/exp_24_fold_1.pt',
            f'../input/hubmap5/exp_23_fold_2.pt',
            f'../input/hubmap5/exp_22_fold_3.pt',
            f'../input/hubmap5/exp_21_fold_4.pt',
        ],
        1024, 
        0.3
    ),
    ModelConfig(
        "se_resnext50_32x4d", 
        [
            "./model_2/fold0_avg_0.9457.pth",
            "./model_2/fold1_avg_0.9566.pth",
            "./model_2/fold2_avg_0.9379.pth",
            "./model_2/fold3_avg_0.9095.pth",
            "./model_2/fold4_avg_0.9338.pth",
        ],
        2048, 
        0.35
    ),
]

# choosing reference image size for all models in enssamble
TRAIN_IMG_SIZE = max([cfg.img_size for cfg in my_models])
TRAIN_IMG_SIZE

In [None]:
# import sys
# sys.path.append("../input/segmentation-models-pytorch-install")
# import segmentation_models_pytorch as smp
# import torch
# model = smp.FPN("timm-efficientnet-b1", upsampling=1, encoder_weights=None).cuda()
# x = torch.rand(1, 3, 1344, 1344).cuda()
# out = model(x)
# print(out.shape)

## Helping functions

In [None]:
def rle_encode_less_memory(img):
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

def valid_transform(img_size):
    return A.Compose(
        [
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],),
            ToTensorV2(),
        ]
    )

def read_from_layers(layers, window):
    if len(layers) == 1:
        return np.stack([layers[0].read(x, window=window) for x in [1, 2, 3]], 2)
    else:
        return np.stack([layers[x].read(1, window=window) for x in range(3)], 2)

## Pytorch dataset class

In [None]:
def _check_background(img, crop_size) -> bool:
    s_th = 40  # saturation blancking threshold
    p_th = 1000 * (crop_size // 256) ** 2 
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    _, ss, _ = cv2.split(hsv)
    
    background = False if (ss > s_th).sum() <= p_th or img.sum() <= p_th else True
    return background

# Dataset class returns multiple instances of image with different sizes
class SingleTiffDataset(Dataset):
    def __init__(self, tiff_path, all_img_sizes, crop_size=1024, step=512):
        self.crop_size = crop_size
        self.all_img_sizes = all_img_sizes

        self.step = step
        dataset = rasterio.open(tiff_path, num_threads="all_cpus")
        self.h = dataset.height
        self.w = dataset.width
        self.row_count = 1 + math.ceil((self.h - self.crop_size) / self.step)
        self.col_count = 1 + math.ceil((self.w - self.crop_size) / self.step)

        self.layers = []
        if dataset.count != 3:
            subdatasets = dataset.subdatasets
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(
                        rasterio.open(subdataset, num_threads="all_cpus")
                    )
        else:
            self.layers.append(rasterio.open(tiff_path, num_threads="all_cpus"))

    def __len__(self):
        return self.row_count * self.col_count

    def __getitem__(self, idx):
        y = (idx // self.col_count) * self.step
        x = (idx % self.col_count) * self.step
        if x + self.crop_size > self.w:
            x = self.w - self.crop_size
        if y + self.crop_size > self.h:
            y = self.h - self.crop_size
        window = Window(x, y, self.crop_size, self.crop_size)
        img = read_from_layers(self.layers, window=window)

        # forming batch with different image sizes as keys
        transormed_imgs = {img_size: valid_transform(img_size)(image=img)['image'] for img_size in self.all_img_sizes}
        transormed_imgs["crop_names"] = f"{x}_{y}"
        transormed_imgs["not_background"] = _check_background(img, self.crop_size)
        return transormed_imgs

## tiff inference function

In [None]:
    
def inference(data_loader, models_with_config, crop_size):
    img_size = (data_loader.dataset.h, data_loader.dataset.w)
    mask_pred = np.zeros(img_size, dtype=np.uint8)
    #iterate over crops
    for batch in tqdm(data_loader, ncols=70, leave=True):
        #skip crop if there is no tissue
        if SKIP_BACKGROUND is True and batch["not_background"].item() is False:
            continue
        with torch.no_grad():
            pred_total = None # accumulation for averaged by fold all models predictions
            # iterate over folds of each model
            for cfg, model_group in models_with_config:
                image = batch[cfg.img_size].cuda()
                pred_group = None # accumulation for all folds

                for model in model_group:
                    pred = model(image)
                    pred = pred.sigmoid()
                    if cfg.img_size != TRAIN_IMG_SIZE:
                        pred = torch.nn.functional.interpolate(pred, size=TRAIN_IMG_SIZE, mode='bilinear')
                    if pred_group == None:
                        pred_group = pred
                    else:
                        pred_group += pred
                        
                pred_group = pred_group.squeeze()
                if len(pred_group.shape) == 2:
                    pred_group = pred_group.unsqueeze(0)
                image.cpu()
                del image
                pred_group = cfg.weight_blend * (pred_group / len(model_group))
                if pred_total == None:
                    pred_total = pred_group
                else:
                    pred_total += pred_group

            pred_total = (pred_total.cpu().data.numpy() > THR).astype(np.uint8)
            for predict_single, crop_name in zip(pred_total, batch['crop_names']):
                x = int(crop_name.split("_")[-2])
                y = int(crop_name.split("_")[-1])

                if crop_size != TRAIN_IMG_SIZE:
                    predict_single = cv2.resize(predict_single, (crop_size, crop_size))
      
                predict_single[0:RAMKA] = 0
                predict_single[-RAMKA:] = 0
                predict_single[:, 0:RAMKA] = 0
                predict_single[:, -RAMKA:] = 0
                    
                mask_pred[y : y + crop_size, x : x + crop_size] += predict_single

    mask_pred = (mask_pred > VOTE)
    mask_rle = rle_encode_less_memory(mask_pred)
    del mask_pred, pred
    gc.collect()
    gc.collect()
    return mask_rle

## Initialization of models and loading them to GPU

In [None]:
all_img_sizes = set([cfg.img_size for cfg in my_models])
models_with_config = []
for cfg in my_models:
    models_group = []
    for w_path in cfg.weights_path:
        model = smp.Unet(cfg.encoder, encoder_weights=None).cuda()
        model.load_state_dict(torch.load(w_path))
        model.eval()
        models_group.append(model)
    models_with_config.append((cfg, models_group))

## Iterating over test tiffs

In [None]:
count_thr = 5 if SKIP_COMMIT is True else 4
if len(df_sub) > count_thr:
    for idx, row in df_sub.iterrows():
        test_ds = SingleTiffDataset(
                tiff_path=f"../input/hubmap-kidney-segmentation/test/{row['id']}.tiff",
                all_img_sizes=all_img_sizes,
                crop_size=CROP_SIZE, 
                step=STEP,
            )

        test_loader = DataLoader(
                dataset=test_ds,
                batch_size=BATCH_SIZE,
                shuffle=False,
                num_workers=NUM_WORKERS,
                pin_memory=True,
            )
        rle = inference(test_loader, models_with_config, CROP_SIZE)
        df_sub.loc[idx, "predicted"] = rle

In [None]:
!rm -rf model_1
!rm -rf model_2
# !rm -rf model_3

In [None]:
df_sub.to_csv("submission.csv", index=False)

In [None]:
df_sub