In [None]:
!pip install --no-index --find-links=../input/preinstall efficientnet

In [None]:
import numpy as np
import pandas as pd
import os
import glob
import gc
from functools import partial
import json
import rasterio
from rasterio.windows import Window
import yaml
import pprint

import pathlib
from tqdm.notebook import tqdm
import cv2

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras
import time

In [None]:
mod_paths = ['../input/hubmap-ensamble-model1/','../input/hubmap-ensamble-model2/']
    
THRESHOLD = 0.5 # preds > THRESHOLD
BATCH_SIZE = 256

CHECKSUM = False # compute mask sum for each image

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
fold_models = []
for mod_path in mod_paths:
    with open(mod_path+'params.yaml') as file:
        P = yaml.load(file, Loader=yaml.FullLoader)
        pprint.pprint(P)
    with open(mod_path + 'metrics.json') as json_file:
        M = json.load(json_file)
        print('Model run datetime: '+M['datetime'])
        print('OOF val_dice_coef: ' + str(M['oof_dice_coef']))
        
    for fold_model_path in glob.glob(mod_path+'*.h5'):
        fold_models.append(tf.keras.models.load_model(fold_model_path,compile = False))
print(len(fold_models))

In [None]:
WINDOW = P['TILE'] if 'TILE' in P.keys() else P['DIM_FROM']
CROP_SIZE = WINDOW//2
INPUT_SIZE = P['INPUT_SIZE']

In [None]:
MIN_OVERLAP = WINDOW - CROP_SIZE
BOARD_CUT = (WINDOW - CROP_SIZE)//2

In [None]:
def rle_encode_less_memory(img):
    pixels = np.concatenate([[False], img.T.flatten(), [False]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def make_grid(shape, window, min_overlap=0, board_cut = 0):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    step = window - min_overlap
    x, y = shape
    start_x = np.arange(0, max(x-window+board_cut,1), step, dtype=np.int64)
    start_y = np.arange(0, max(y-window+board_cut,1), step, dtype=np.int64)
    if start_x[-1] < x-window+board_cut and x-window+board_cut > 0:
        start_x = np.concatenate([start_x,[x-window+board_cut]])
    if start_y[-1] < y-window+board_cut and y-window+board_cut > 0:
        start_y = np.concatenate([start_y,[y-window+board_cut]])
        
    slices = np.zeros((len(start_x),len(start_y), 4), dtype=np.int64)
    
    for i in range(len(start_x)):
        for j in range(len(start_y)):
            slices[i,j] = start_x[i], min(start_x[i]+window,x), start_y[j], min(start_y[j]+window,y)   
    return slices.reshape(len(start_x)*len(start_y),4)

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
image_feature = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'x1': tf.io.FixedLenFeature([], tf.int64),
    'y1': tf.io.FixedLenFeature([], tf.int64)
}
def _parse_image(example_proto):
    example = tf.io.parse_single_example(example_proto, image_feature)
    image = tf.reshape( tf.io.decode_raw(example['image'],out_type=np.dtype('uint8')), (P['DIM'],P['DIM'], 3))
    return image, example['x1'], example['y1']

def load_dataset(filenames, ordered=True):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(_parse_image)
    return dataset

def get_dataset(FILENAME):
    dataset = load_dataset(FILENAME)
    dataset  = dataset.batch(64)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [None]:
def crop_resize(data,windows,size):
    images = []
    ori_h,ori_w = data.shape
    for window in windows:
        window_x1,window_x2,window_y1,window_y2 =  window
        pad_x1,pad_x2,pad_y1,pad_y2 =  [0,0,0,0]
        if window_x1 < 0:
            window_x1 = 0
            pad_x1 = window_x1 - window[0]
        if window_y1 < 0:
            window_y1 = 0
            pad_y1 = window_y1 - window[2]
        if window_x2 > ori_h:
            window_x2 = ori_h
            pad_x2 = window[1]-window_x2
        if window_y2 > ori_w:
            window_y2 = ori_w
            pad_y2 = window[3]-window_y2
            
        if data.count != 3:
            layers = [rasterio.open(subd) for subd in data.subdatasets]
            image = np.zeros((int(window_x2-window_x1), int(window_y2-window_y1), 3), dtype=np.uint8)
            for i in range(3):
                image[:,:,i] = layers[i].read(window=Window.from_slices((window_x1,window_x2),(window_y1,window_y2)))
        else:
            image = data.read([1,2,3],
                            window=Window.from_slices((window_x1,window_x2),(window_y1,window_y2)))
            image = np.moveaxis(image, 0, -1)
            
        image = np.pad(image,((pad_x1,pad_x2),(pad_y1,pad_y2),(0,0)),'constant',constant_values = 0) 
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = cv2.resize(image,size)
        images.append(image)
    images = np.stack(images, axis=0)
    return images

In [None]:
p = pathlib.Path('../input/hubmap-kidney-segmentation')
subm = {}

cnt = 0
for i, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    
    print(f'{i+1} Predicting {filename.stem}') 
    
    data = rasterio.open(filename)
    slices = make_grid(data.shape, window=WINDOW, min_overlap=MIN_OVERLAP, board_cut=BOARD_CUT)
    dataset = tf.data.Dataset.from_tensor_slices(slices).batch(BATCH_SIZE)
    
    print('window num:{}'.format(len(slices)))
    
    image_h, image_w = data.shape
    

                
    preds = np.zeros(data.shape, dtype=np.bool)
        
    for windows in dataset:
        images = crop_resize(data,windows.numpy(),(INPUT_SIZE,INPUT_SIZE))
                
        pred_batch = None
        
        for fold_model in fold_models:
            if pred_batch is None:
                pred_batch = fold_model.predict(images)
            else:
                pred_batch += fold_model.predict(images)
            
        pred_batch = pred_batch/len(fold_models)
            
        for j,pred in enumerate(pred_batch):
            x1,x2,y1,y2 = windows[j]
            window_h, window_w = x2-x1, y2-y1
            preds_start_x = x1+BOARD_CUT
            preds_end_x = x2-BOARD_CUT
            preds_start_y = y1+BOARD_CUT
            preds_end_y = y2-BOARD_CUT
            
            pred_start_x = BOARD_CUT
            pred_end_x = window_h-BOARD_CUT
            pred_start_y = BOARD_CUT
            pred_end_y = window_w-BOARD_CUT
                
            pred = cv2.resize(pred, (window_w, window_h))
            preds[preds_start_x:preds_end_x,preds_start_y:preds_end_y] += (pred[pred_start_x:pred_end_x,pred_start_y:pred_end_y] > THRESHOLD)
        
        del pred,pred_batch,images
        gc.collect();
        
    subm[i] = {'id':filename.stem, 'predicted': rle_encode_less_memory(preds)}
    
    del preds
    gc.collect();

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()