## Origianl kernal
* https://www.kaggle.com/wrrosa/hubmap-tf-with-tpu-efficientunet-512x512-subm
## Tensorflow HuBMAP - Hacking the Kidney competition starter kit:
* https://www.kaggle.com/wrrosa/hubmap-tf-with-tpu-efficientunet-512x512-tfrecs (how to create training and inference tfrecords)
* https://www.kaggle.com/wrrosa/hubmap-tf-with-tpu-efficientunet-512x512-train (training pipeline)
* this notebook (inference with submission)


# Refferences:
* https://www.kaggle.com/joshi98kishan/hubmap-keras-pipeline-training-inference
* https://www.kaggle.com/bguberfain/memory-aware-rle-encoding/
* https://www.kaggle.com/leighplt/pytorch-fcn-resnet50
* https://www.kaggle.com/c/hubmap-kidney-segmentation/discussion/224883#1233186

# Parameters
Read parameteres from notebook output, actually only **DIM** is used:

In [None]:
mod_path = '../input/hubmap-tensorflow-models-256x256/'
import yaml
import pprint
# with open(mod_path+'params.yaml') as file:
#     P = yaml.load(file, Loader=yaml.FullLoader)
#     pprint.pprint(P)
    
THRESHOLD = 0.4 # preds > THRESHOLD
VOTERS = 0.5
WINDOW = 1024
MIN_OVERLAP = 300
NEW_SIZE = 512
SUM_PRED = 128
 
SUBMISSION_MODE = 'FULL' # PUBLIC_TFREC or FULL
# 'PUBLIC_TFREC' = use created tfrecords for public test set with MIN_OVERLAP = 300 tiling 1024-512, ignore other (private test) data
# 'FULL' do not use tfrecords, just full submission 

CHECKSUM = False # compute mask sum for each image


# Metrics

In [None]:
# import json

# with open(mod_path + 'metrics.json') as json_file:
#     M = json.load(json_file)
# print('Model run datetime: '+M['datetime'])
# print('OOF val_dice_coe: ' + str(M['oof_dice_coe']))

# Packages

In [None]:
! pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index -q
! pip install ../input/efficientnet/efficientnet-1.1.0/ -f ./ --no-index -q
import numpy as np
import pandas as pd
import os
import glob
import gc

import rasterio
from rasterio.windows import Window

import pathlib
from tqdm.notebook import tqdm
import cv2

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

# Functions

In [None]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_grid(shape, window=1024, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

# Models

In [None]:
models_path_list = [
#     '../input/hubmap-tensorflow-models-256x256/densenet121_FPN_model_fold_0_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/densenet121_FPN_model_fold_3_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb4_Linknet_model_fold_0_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb4_Linknet_model_fold_2_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb4_Unet_model_fold_0_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb4_Unet_model_fold_3_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/resnet50_Unet_model_fold_0_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/resnet50_Unet_model_fold_3_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_0_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_3_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/seresnext101_Unet_model_fold_2_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/seresnext101_Unet_model_fold_3_ex.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_0_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_2_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_3_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_1_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/efficientnetb7_Unet_model_fold_4_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/seresnext101_Unet_model_fold_0_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models-256x256/seresnext101_Unet_model_fold_3_ex_sudo_test.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_0_sm.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_1_sm.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_2_sm.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_3_sm.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_4_sm.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_0_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_1_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_2_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_3_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_4_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/seresnet50_Unet_model_fold_0_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/seresnet50_Unet_model_fold_2_sm_ex_sl.h5',
#     '../input/hubmap-tensorflow-models/seresnet50_Unet_model_fold_3_sm_ex_sl.h5',
    
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_0_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_1_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_2_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_3_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_4_sl_ex_hub_tst.h5',
    
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_0_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_1_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_2_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_3_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_4_sl_ex_hub_tst.h5',
    
    '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_0_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_1_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_2_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_3_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_Unet_model_fold_4_sl_ex_no_hub_tst.h5',
    
#     '../input/hubmap-tensorflow-models/efficientnetb2_Unet_model_fold_0_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb2_Unet_model_fold_1_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb2_Unet_model_fold_2_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb2_Unet_model_fold_3_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb2_Unet_model_fold_4_sl_ex_no_hub_tst.h5',
    
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_0_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_1_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_2_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_3_sl_ex_no_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/efficientnetb1_Unet_model_fold_4_sl_ex_no_hub_tst.h5',
    
#     '../input/hubmap-tensorflow-models/mobilenetv2_FPN_model_fold_0_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/mobilenetv2_FPN_model_fold_1_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/mobilenetv2_FPN_model_fold_2_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/mobilenetv2_FPN_model_fold_3_sl_ex_hub_tst.h5',
#     '../input/hubmap-tensorflow-models/mobilenetv2_FPN_model_fold_4_sl_ex_hub_tst.h5',
    
    '../input/hubmap-tensorflow-models/efficientnetb0_FPN_model_fold_0_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_FPN_model_fold_1_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_FPN_model_fold_2_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_FPN_model_fold_3_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb0_FPN_model_fold_4_sl_ex_no_hub_tst.h5',
    
    '../input/hubmap-tensorflow-models/efficientnetb1_FPN_model_fold_0_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb1_FPN_model_fold_1_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb1_FPN_model_fold_2_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb1_FPN_model_fold_3_sl_ex_no_hub_tst.h5',
    '../input/hubmap-tensorflow-models/efficientnetb1_FPN_model_fold_4_sl_ex_no_hub_tst.h5',
    
]

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
fold_models = []
# for fold_model_path in glob.glob(mod_path+'*.h5'):
for fold_model_path in models_path_list:
    model = tf.keras.models.load_model(fold_model_path,compile = False)
#     for layer in model.layers:
#             if isinstance(layer, tf.keras.layers.BatchNormalization):
#                 layer.trainable = False
                
    fold_models.append(model)
print(len(fold_models))

# Tfrecords functions

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

DIM = NEW_SIZE

image_feature = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'x1': tf.io.FixedLenFeature([], tf.int64),
    'y1': tf.io.FixedLenFeature([], tf.int64)
}
def _parse_image(example_proto):
    example = tf.io.parse_single_example(example_proto, image_feature)
    image = tf.reshape( tf.io.decode_raw(example['image'],out_type=np.dtype('uint8')), (DIM,DIM, 3))
    return tf.cast(image, tf.float32)/255.0, example['x1'], example['y1']

def load_dataset(filenames, ordered=True):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(_parse_image)
    return dataset

def get_dataset(FILENAME):
    dataset = load_dataset(FILENAME)
    dataset  = dataset.batch(64)
    dataset = dataset.prefetch(AUTO)
    return dataset

# Results

In [None]:
p = pathlib.Path('../input/hubmap-kidney-segmentation')
subm = {}

for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    
    print(f'{i+1} Predicting {filename.stem}')
    
    dataset = rasterio.open(filename.as_posix(), transform = identity)
    preds = np.zeros(dataset.shape, dtype=np.uint8)    
    
    if SUBMISSION_MODE == 'PUBLIC_TFREC' and MIN_OVERLAP == 300 and WINDOW == 1024 and NEW_SIZE == 512:
        print('SUBMISSION_MODE: PUBLIC_TFREC')
        fnames = glob.glob('/kaggle/input/hubmap-tfrecords-1024-512-test/test/'+filename.stem+'*.tfrec')
        
        if len(fnames)>0: # PUBLIC TEST SET
            for FILENAME in fnames:
                pred = None
                for fold_model in fold_models:
                    tmp = fold_model.predict(get_dataset(FILENAME))/len(fold_models)
                    if pred is None:
                        pred = tmp
                    else:
                        pred += tmp
                    del tmp
                    gc.collect()

                pred = tf.cast((tf.image.resize(pred, (WINDOW,WINDOW)) > THRESHOLD),tf.bool).numpy().squeeze()

                idx = 0
                for img, X1, Y1 in get_dataset(FILENAME):
                    for fi in range(X1.shape[0]):
                        x1 = X1[fi].numpy()
                        y1 = Y1[fi].numpy()
                        preds[x1:(x1+WINDOW),y1:(y1+WINDOW)] += pred[idx]
                        idx += 1
                        
        else: # IGNORE PRIVATE TEST SET (CREATE TFRECORDS IN FUTURE)
            pass
    else:
        print('SUBMISSION_MODE: FULL')
        slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)

        if dataset.count != 3:
            print('Image file with subdatasets as channels')
            layers = [rasterio.open(subd) for subd in dataset.subdatasets]
            
        print(f'Dataset Shape: {dataset.shape}')
        
        EMPTY = np.zeros((NEW_SIZE, NEW_SIZE))
            
        for (x1,x2,y1,y2) in tqdm(slices):
            if dataset.count == 3:
                image = dataset.read([1,2,3],
                            window=Window.from_slices((x1,x2),(y1,y2)))
                image = np.moveaxis(image, 0, -1)
            else:
                image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
                for fl in range(3):
                    image[:,:,fl] = layers[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))
                    
            
            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            h,s,v = cv2.split(hsv)
            s_th = 40
            p_th = 1000*(1024//256)**2
            
            if (s>s_th).sum() <= p_th or image.sum() <= p_th :
                
                pred = EMPTY
            else:
                    
                image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                image = np.expand_dims(image, 0)
                image = tf.cast(image, tf.float32)/255.0

                pred = None

                for fold_model in fold_models:
                    if pred is None:
                        pred = np.squeeze(fold_model.predict(image))
                    else:
                        pred += np.squeeze(fold_model.predict(image))

                pred = pred/len(fold_models)
            
#             if np.sum((pred > THRESHOLD).astype(np.uint8)) < SUM_PRED:
#                 pred = EMPTY

            pred = cv2.resize(pred, (WINDOW, WINDOW))
            preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)

    preds = (preds >= VOTERS).astype(np.uint8)
    
    subm[i] = {'id':filename.stem, 'predicted': rle_encode_less_memory(preds)}
    
    if CHECKSUM:
        print('Checksum: '+ str(np.sum(preds)))
    
    del preds
    gc.collect();

# Making submission

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()