# Inference and submission notebook for competition
## Reference: https://www.kaggle.com/wrrosa/hubmap-tf-with-tpu-efficientunet-512x512-subm

In [None]:
mod_path = '../input/hubmap-train-model/'
import yaml
import pprint
with open(mod_path+'params.yaml') as file:
    P = yaml.load(file, Loader=yaml.FullLoader)
    pprint.pprint(P)
    
THRESHOLD = 0.4 # preds > THRESHOLD
WINDOW = 1024
MIN_OVERLAP = 300
NEW_SIZE = P['DIM'] #512

SUBMISSION_MODE = 'FULL' 
# 'PUBLIC_TFREC' = use created tfrecords for public test set with MIN_OVERLAP = 300 tiling 1024-512, ignore other (private test) data
# 'FULL' do not use tfrecords, just full submission 

CHECKSUM = False # compute mask sum for each image

In [None]:
# ! pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index -q
# ! pip install segmentation_models -q
# ! pip install "../input/segmentation-models"

import sys, os
from shutil import copyfile, copytree
sys.path.append('../input/segmentation-models/segmentation_models')
# ! pip install ../input/segmentation-models

! pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index -q
# ! pip install ../input/efficientnet/efficientnet-1.1.0/ -f ./ --no-index -q
! pip install ../input/imageclassifiers -f ./ --no-index -q
! pip install ../input/efficientnet100 -f ./ --no-index -q
! pip install ../input/segmentation-models -f ./ --no-index -q

# copy our file into the working directory (make sure it has .py suffix)
# copyfile(src = "../input/segmentation-models/segmentation_models/losses.py", dst = "../working/losses.py")
# copytree(src = "../input/segmentation-models/segmentation_models", dst = "../working/segmentation_models")

In [None]:
import numpy as np
import pandas as pd
import glob
import gc

import rasterio
from rasterio.windows import Window

import pathlib
from tqdm.notebook import tqdm
import cv2

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

from tensorflow.keras.utils import get_custom_objects

os.environ['SM_FRAMEWORK'] = 'tf.keras'

import segmentation_models as sm

In [None]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

def dice_coe(output, target, axis = None, smooth=1e-10):
    output = tf.dtypes.cast( tf.math.greater(output, 0.5), tf. float32 )
    target = tf.dtypes.cast( tf.math.greater(target, 0.5), tf. float32 )
    inse = tf.reduce_sum(output * target, axis=axis)
    l = tf.reduce_sum(output, axis=axis)
    r = tf.reduce_sum(target, axis=axis)

    dice = (2. * inse + smooth) / (l + r + smooth)
    dice = tf.reduce_mean(dice, name='dice_coe')
    return dice

# https://www.kaggle.com/bigironsphere/loss-function-library-keras-pytorch#BCE-Dice-Loss
def dice_loss(y_true, y_pred, axis = None, smooth=1e-10):
#     y_pred = tf.dtypes.cast( tf.math.greater(y_pred, 0.5), tf. float32 ) # THIS IS NOT DIFFERENTIABLE!!!
#     y_true = tf.dtypes.cast( tf.math.greater(y_true, 0.5), tf. float32 )
    inse = tf.reduce_sum(y_pred * y_true, axis=axis)
    l = tf.reduce_sum(y_pred, axis=axis)
    r = tf.reduce_sum(y_true, axis=axis)

    dice = (2. * inse + smooth) / (l + r + smooth)
    dice = tf.reduce_mean(dice)
    
    return 1 - dice

# lovasz loss
def symmetric_lovasz(y_pred, y_true):
    return 0.5*(lovasz_hinge(y_pred, y_true) + lovasz_hinge(-y_pred, 1.0 - y_true))


# https://www.kaggle.com/kool777/training-hubmap-eda-tf-keras-tpu
# tversky loss
def tversky(y_true, y_pred, alpha=0.7, beta=0.3, smooth=1):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1 - y_pred_pos))
    false_pos = K.sum((1 - y_true_pos) * y_pred_pos)
    return (true_pos + smooth) / (true_pos + alpha * false_neg + beta * false_pos + smooth)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true, y_pred)

def focal_tversky_loss(y_true, y_pred, gamma=0.75): # experiment with different values of gamma
    tv = tversky(y_true, y_pred)
    return K.pow((1 - tv), gamma)

def FocalLoss(targets, inputs, alpha=0.8, gamma=2):    
    
    inputs = K.flatten(inputs)
    targets = K.flatten(targets)
    
    BCE = K.binary_crossentropy(targets, inputs)
    BCE_EXP = K.exp(-BCE)
    focal_loss = K.mean(alpha * K.pow((1-BCE_EXP), gamma) * BCE)
    
    return focal_loss

# Try Dice + topK loss

# Try Dice + Focal loss
def dice_focal(ytrue, ypred):
    return FocalLoss(ytrue, ypred) + dice_loss(ytrue, ypred)

get_custom_objects().update({"dice": dice_loss})
# get_custom_objects().update({"lovasz": symmetric_lovasz})
get_custom_objects().update({"tversky": tversky_loss})
get_custom_objects().update({"focal_tversky": focal_tversky_loss})
get_custom_objects().update({"binary_focal_loss_plus_dice_loss": sm.losses.binary_focal_dice_loss})

# Load model

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
# fold_models = []
# for fold_model_path in glob.glob(mod_path+'*.h5'):
#     fold_models.append(tf.keras.models.load_model(fold_model_path,compile = False))
# print(len(fold_models))

# model_file = mod_path + "ek-model-focal-dice-512.h5"
# model_file = mod_path + "ek-model-dice_loss-512.h5"
# model_file = mod_path+"ek-model-focal-tversky-bs4-512.h5"
# model_file = mod_path + "ek-model-focal-dice-bs16-512.h5"

model_file = "../input/focaltversky-bs16-efficientneth5/ek-model-focal-tversky-512.h5"
model_efficientnet = tf.keras.models.load_model(model_file, custom_objects={'dice_coe':dice_coe, 'focal_tversky_loss':focal_tversky_loss})

model_file = "../input/focaltversky-bs16-vgg16h5/ah-vgg16-focal-tversky-bs16-512.h5"
model_vgg = tf.keras.models.load_model(model_file, custom_objects={'dice_coe':dice_coe, 'focal_tversky_loss':focal_tversky_loss})

models = [model_efficientnet, model_vgg]
# models = [model_vgg]

In [None]:
p = pathlib.Path('../input/hubmap-kidney-segmentation')
subm = {}

for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    print(f'{i+1} Predicting {filename.stem}')
    
    dataset = rasterio.open(filename.as_posix(), transform = identity)
    preds = np.zeros(dataset.shape, dtype=np.uint8)    

    slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)

    for (x1,x2,y1,y2) in slices:
        image = dataset.read([1,2,3],
                    window=Window.from_slices((x1,x2),(y1,y2)))
        image = np.moveaxis(image, 0, -1)
        image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = np.expand_dims(image, 0)
        
        pred = None
        
        # MEAN
#         for model in models:
#             raw_pred = model.predict(image)
#             if pred is None:
#                 pred =  np.squeeze(raw_pred)
#             else:
#                 pred += np.squeeze(raw_pred)
        
#         # Mean of predictions
#         pred = pred/len(models)
        
#         pred = cv2.resize(pred, (WINDOW, WINDOW))
#         preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)

        # AT LEAST ONE
        for model in models:
            raw_pred = model.predict(image)
            pred =  np.squeeze(raw_pred)
            pred = cv2.resize(pred, (WINDOW, WINDOW))
            preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)
        
    preds = (preds > 0.5).astype(np.uint8)
    print(preds.shape)
    
    subm[i] = {'id':filename.stem, 'predicted': rle_encode_less_memory(preds)}
    
    if CHECKSUM:
        print('Checksum: '+ str(np.sum(preds)))
    
    del preds
    gc.collect();

# Make the submission

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()