# Inference with Keras U-Net (512x512)
This is the final notebook in a series of three, the two first being:
  * [[data] HuBMAP TIF 2 JPG+TFRecords 128,256,512,1024](https://www.kaggle.com/mistag/data-hubmap-tif-2-jpg-tfrecords-128-256-512-1024), generating training data
  * [[train] Keras U-Net with TFRecords input](https://www.kaggle.com/mistag/train-keras-u-net-with-tfrecords-input), training a U-Net model

In [None]:
import os
import numpy as np
import cv2
import glob
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, model_from_json
from tensorflow.keras.utils import CustomObjectScope
from skimage import io
import json
import gc
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# get hyperparameters from the training notebook
with open('../input/train-keras-u-net-with-tfrecords-input/hparams.json') as json_file:
    hparams = json.load(json_file)

## Helper functions
Input images are downscaled with a scaling factor, and the predicted mask will be upscaled again. 

In [None]:
IMG_SIZE = hparams['IMG_SIZE']
SCALE_FACTOR = hparams['SCALE_FACTOR']
P_THRESHOLD = 0.5

def read_tif_file(fname):
    img = io.imread(fname)
    img = np.squeeze(img)
    if img.shape[0] == 3: # swap axes as required
        img = img.swapaxes(0,1)
        img = img.swapaxes(1,2)
    return img

def read_mask_file(fname, mshape):
    with open(fname) as f:
        mdata = json.load(f)
        polys = []
        for index in range(mdata.__len__()):
            if (mdata[index]['properties']['classification']['name'] == 'Cortex') or (mdata[index]['properties']['classification']['name'] == 'glomerulus'):
                geom = np.array(mdata[index]['geometry']['coordinates'])
                if geom.shape[0] == 1:
                    polys.append(geom)
                else:
                    for j in range(geom.shape[0]):
                        polys.append(np.array([geom[j][0]]).astype(int))
        mask = np.zeros(mshape, dtype=np.int8)
        cv2.fillPoly(mask, polys, 1)
        mask = mask.astype(bool, copy=False)
    return mask

Using the RLE-encoder from [this notebook](https://www.kaggle.com/iafoss/hubmap-pytorch-fast-ai-starter-sub):

In [None]:
##https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with bug fix
def rle_encode_less_memory(img):
    #watch out for the bug
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

## Inference time
Rebuild model from the training notebook and load weights. Since we used a custom loss function and metrics, using keras.models.load_model() will not work here. The predicted masks are big, so we write them to disk immediately - no point making a DataFrame here. 

In [None]:
# Augmentations

AUGS = 3

def create_TTA_batch(img):
    batch=np.zeros((AUGS,IMG_SIZE,IMG_SIZE,3), dtype=np.float32)
    orig = tf.keras.preprocessing.image.img_to_array(img)/255. # un-augmented
    batch[0,:,:,:] = orig
    batch[1,:,:,:] = cv2.rotate(orig, cv2.ROTATE_90_CLOCKWISE) 
    batch[2,:,:,:] = cv2.rotate(orig, cv2.ROTATE_90_COUNTERCLOCKWISE)
    #batch[3,:,:,:] = cv2.flip(orig, 1) # horizontal flip
    #batch[4,:,:,:] = cv2.flip(orig, 0) # vertical flip
    #batch[5,:,:,:] = np.clip(orig *1.1, 0, 1)
    #batch[6,:,:,:] = orig *0.9
    return batch

def create_TTA_mask(preds):
    # de-augment mask where needed
    preds[1,:,:] = np.expand_dims(cv2.rotate(preds[1,:,:], cv2.ROTATE_90_COUNTERCLOCKWISE), axis = 2)
    preds[2,:,:] = np.expand_dims(cv2.rotate(preds[2,:,:], cv2.ROTATE_90_CLOCKWISE), axis = 2)
    #preds[3,:,:] = np.expand_dims(cv2.flip(preds[3,:,:], 1), axis = 2)
    #preds[4,:,:] = np.expand_dims(cv2.flip(preds[4,:,:], 0), axis = 2)
    # sum up
    pred = np.sum(preds, axis=0) / AUGS
    return pred > P_THRESHOLD

# get files to process
PATH = '../input/hubmap-kidney-segmentation/test/'
filelist = glob.glob(PATH+'*.tiff')
filelist
SUB_FILE = './submission.csv'
with open(SUB_FILE, 'w') as f:
    f.write("id,predicted\n")

# load model
with open('../input/train-keras-u-net-with-tfrecords-input/model.json', 'r') as m:
    lm = m.read()
    model = model_from_json(lm)
model.load_weights('../input/train-keras-u-net-with-tfrecords-input/model.h5')
    
s_th = 45 # saturation blanking threshold
p_th = IMG_SIZE*IMG_SIZE//32   # pixel count threshold

size = IMG_SIZE * SCALE_FACTOR
for file in filelist:
    fid = file.replace('\\','.').replace('/','.').split('.')[-2]
    print(fid)
    #img, pmask = 0,0
    img = read_tif_file(file)
    dims = np.array(img.shape[:2])
    pmask = np.zeros(dims, dtype=np.uint8)
    for x in range(img.shape[0]//size):
        for y in range(img.shape[1]//size):
            patch = cv2.resize(img[x*size:(x+1)*size, y*size:(y+1)*size], dsize=(IMG_SIZE, IMG_SIZE), 
                               interpolation = cv2.INTER_AREA)
            _, s, _ = cv2.split(cv2.cvtColor(patch, cv2.COLOR_BGR2HSV))
            go = False
            if (s>s_th).sum() > p_th:
                go = True
            if go:
                # TTA
                batch = create_TTA_batch(patch)
                predictions = model.predict(batch)
                mask = create_TTA_mask(predictions)
                pint = cv2.resize(mask.astype(int), dsize=(size, size), interpolation = cv2.INTER_NEAREST) #upscale to original
                pmask[x*size:(x+1)*size, y*size:(y+1)*size] = pint.astype(np.uint8)
        # clean up some memory
        del s, patch
        try:
            del batch, predictions, mask, pint
        except:
            pass
        gc.collect()
    # save mask
    with open(SUB_FILE, 'a') as f:
        f.write("{},".format(fid))
        f.write(rle_encode_less_memory(pmask.astype(np.uint8)))
        f.write("\n")
    del pmask, img
    gc.collect()