The objective of this exercise is to test TPU EfficientUNet 512x512 with freeze-pretrained SUB EffUNet5. This exercise is to improve the existing algorithms used to detect functional tissue units (FTUs) across different tissue preparation pipelines.

Kudos to this kernels, used in the exercise.
1. ISA's Kernel : https://www.kaggle.com/isakev/hubmap-freeze-pretrained-sub-effunet5-valloss/
2. Wojtek' Kernel : https://www.kaggle.com/wrrosa/hubmap-tf-with-tpu-efficientunet-512x512-subm/data?scriptVersionId=51404430
3. Vasiliy's Kernel : https://www.kaggle.com/vgarshin/kidney-unet-model-keras-inference
3. https://www.kaggle.com/joshi98kishan/hubmap-keras-pipeline-training-inference
4. https://www.kaggle.com/leighplt/pytorch-fcn-resnet50

In [None]:
mod_path = '/kaggle/input/hubmap-tf-with-tpu-efficientunet-512x512-train/'
import yaml
import pprint
with open(mod_path+'params.yaml') as file:
    P = yaml.load(file, Loader=yaml.FullLoader)
    pprint.pprint(P)
    
THRESHOLD = 0.4
WINDOW = 1024
MIN_OVERLAP = 300
NEW_SIZE = P['DIM']

SUBMISSION_MODE = 'PUBLIC_TFREC' 
# 'PUBLIC_TFREC' = use created tfrecords for public test set with MIN_OVERLAP = 300 tiling 1024-512, ignore other (private test) data
# 'FULL' do not use tfrecords, just full submission 

CHECKSUM = True # compute mask sum for each image

In [None]:
# METRICS

import json

with open(mod_path + 'metrics.json') as json_file:
    M = json.load(json_file)
print('Model run datetime: '+M['datetime'])
print('OOF val_dice_coe: ' + str(M['oof_dice_coe']))

In [None]:
! pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index -q
! pip install ../input/efficientnet/efficientnet-1.1.0/ -f ./ --no-index -q
import numpy as np
import pandas as pd
import os
import glob
import gc

import rasterio
from rasterio.windows import Window

import pathlib
from tqdm.notebook import tqdm
import cv2

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

import os, glob, gc
import json

osj = os.path.join

Functions from Wojteck Kernel

In [None]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

def global_shift_mask(maskpred1, y_shift, x_shift):
    """
    applies a global shift to a mask by 
    padding one side and cropping from the other
    """
    if y_shift < 0 and x_shift >=0:
        maskpred2 = np.pad(maskpred1, 
                           [(0,abs(y_shift)), (abs(x_shift), 0)], 
                           mode='constant', constant_values=0)
        maskpred3 = maskpred2[abs(y_shift):, :maskpred1.shape[1]]
    elif y_shift >=0 and x_shift <0:
        maskpred2 = np.pad(maskpred1, 
                           [(abs(y_shift),0), (0, abs(x_shift))], 
                           mode='constant', constant_values=0)
        maskpred3 = maskpred2[:maskpred1.shape[0], abs(x_shift):]
    elif y_shift >=0 and x_shift >=0:
        maskpred2 = np.pad(maskpred1,
                           [(abs(y_shift),0), (abs(x_shift), 0)], 
                           mode='constant', constant_values=0)
        maskpred3 = maskpred2[:maskpred1.shape[0], :maskpred1.shape[1]]
    elif y_shift < 0 and x_shift < 0:
        maskpred2 = np.pad(maskpred1, 
                           [(0, abs(y_shift)), (0, abs(x_shift))], 
                           mode='constant', constant_values=0)
        maskpred3 = maskpred2[abs(y_shift):, abs(x_shift):]
    return maskpred3

In [None]:
##MODEL
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
fold_models_1 = []
for fold_model_path in glob.glob(mod_path+'*.h5'):
    fold_models_1.append(tf.keras.models.load_model(fold_model_path,compile = False))
print(len(fold_models_1))

TF Records added from Wojtek's Kernel

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
image_feature = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'x1': tf.io.FixedLenFeature([], tf.int64),
    'y1': tf.io.FixedLenFeature([], tf.int64)
}
def _parse_image(example_proto):
    example = tf.io.parse_single_example(example_proto, image_feature)
    image = tf.reshape( tf.io.decode_raw(example['image'],out_type=np.dtype('uint8')), (P['DIM'],P['DIM'], 3))
    return image, example['x1'], example['y1']

def load_dataset(filenames, ordered=True):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(_parse_image)
    return dataset

def get_dataset(FILENAME):
    dataset = load_dataset(FILENAME)
    dataset  = dataset.batch(64)
    dataset = dataset.prefetch(AUTO)
    return dataset

Parameters from ISA's Kernel

In [None]:
debug = True # True False
n_debug_images = 1 if debug else 1000000000
n_debug_slices = 20 if debug else 1000000000

# whether to run prediction when committing. WILL RUN predictions during submission in any case
do_predict = False  if not debug else True

models_dir = '../input/hubmap-models-cv-08848-pl-0847'
model_filepaths = [ os.path.join(models_dir, f"model-fold-{i}.h5") for i in range(4)]

assert len(model_filepaths)==len(np.unique(model_filepaths))
#folds_to_predict = [i for (i, fn) in enumerate(model_filepaths) if os.path.isfile(fn)]
model_dirnames = [os.path.dirname(filepath) for filepath in model_filepaths]

#check_order = [fn.split('.')[-2].split('-')[-1] == i for (i,fn) in enumerate(model_filepaths) if fn.strip()!='']
#assert np.sum(check_order)==0, 'models should be in folds order or empty string'

import yaml
import pprint
with open(osj(model_dirnames[0],'params.yaml')) as file:
    P = yaml.load(file, Loader=yaml.FullLoader)
    pprint.pprint(P)

THRESHOLD = 0.3
WINDOW = 1024
MIN_OVERLAP = 32
NEW_SIZE = P['DIM']

assert sum([not os.path.isfile(path_) for path_ in model_filepaths]) == 0
print("\n Number of models:: {}".format(len(model_filepaths)))

In [None]:
ave_score = 0
for i, m_path in enumerate(model_filepaths):
    fold_ = int(m_path.split('.')[-2].split('-')[-1])
    with open(osj(model_dirnames[i],'metrics.json')) as json_file:
        M = json.load(json_file)
    print(f"\n ----------- \nModel {model_dirnames[i].split('/')[-1]}" +
          '\nval_dice_coe: '+ str(round(M['val_dice_coe'][fold_], 5)) +
          '\tval_loss: ' + str(round(M['val_loss'][fold_], 5)) +
          '\tval_accuracy: '+ str(round(M['val_accuracy'][fold_], 5))
          )



for model_group in np.unique(model_dirnames):
    with open(osj(model_group,'metrics.json')) as json_file:
        M = json.load(json_file)
        ave_dice = np.mean(M['val_dice_coe']) 
    ave_loss = np.mean(M['val_loss'])  # /len(folds_to_predict)
    ave_accuracy = np.mean(M['val_accuracy'])
    print(f"\n ============ MODEL GROUP {model_group} ==============")
    print(" ------------ \nAVERAGE DICE SCORE = {}".format(round(ave_dice, 5)))
    print(" ------------ \nAVERAGE VALIDATION LOSS = {}".format(round(ave_loss, 5)))
    print(" ------------ \nAVERAGE VALIDATION ACCURACY = {}".format(round(ave_accuracy, 5)))

In [None]:
%%time
if do_predict:
    identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
    fold_models_2 = []
    
    for fold_model_path in model_filepaths:
        fold_models_2.append(tf.keras.models.load_model(fold_model_path,compile = False))
    print(len(fold_models_2))

Results

In [None]:
TARGET_IMG = 'afa5e8098.tiff'
Y_SHFT = -40
X_SHFT = -24

In [None]:
import pathlib
p = pathlib.Path('../input/hubmap-kidney-segmentation')
subm = {}

for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    
    print(f'{i+1} Predicting {filename.stem}')
    
    dataset = rasterio.open(filename.as_posix(), transform = identity)
    preds = np.zeros(dataset.shape, dtype=np.uint8)    
    slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)

    if dataset.count != 3:
        print('Image file with subdatasets as channels')
        layers = [rasterio.open(subd) for subd in dataset.subdatasets]
            
    for (x1,x2,y1,y2) in slices:
        if dataset.count == 3:
            image = dataset.read([1,2,3],
                            window=Window.from_slices((x1,x2),(y1,y2)))
            image = np.moveaxis(image, 0, -1)
        else:
            image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
            for fl in range(3):
                image[:,:,fl] = layers[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))
                    
        image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = np.expand_dims(image, 0)

        pred_1 = None
        pred_2 = None
        
        for fold_model in fold_models_1:
            if pred_1 is None:
                pred_1 = np.squeeze(fold_model.predict(image))
            else:
                pred_1 += np.squeeze(fold_model.predict(image))
        
        for fold_model in fold_models_2:
            if pred_2 is None:
                pred_2 = np.squeeze(fold_model.predict(image))
            else:
                pred_2 += np.squeeze(fold_model.predict(image))
        
        pred_1 = pred_1/len(fold_models_1)
        pred_2 = pred_2/len(fold_models_2)
        
        pred = 0.5 * pred_1 + 0.5 * pred_2

        pred = cv2.resize(pred, (WINDOW, WINDOW))
        preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)

    if filename == TARGET_IMG:
        print('global shift')
        preds = (preds > 0.5).astype(np.uint8)
        preds = global_shift_mask(preds, Y_SHFT, X_SHFT)
    else:
        preds = (preds > 0.5).astype(np.uint8)
    
    subm[i] = {'id':filename.stem, 'predicted': rle_encode_less_memory(preds)}
    
    if CHECKSUM:
        print('Checksum: '+ str(np.sum(preds)))
    
    del preds
    gc.collect();

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()