In [None]:
INPUT_DIR = "/kaggle/input/hubmap-kidney-segmentation/"
TMP_DIR = "/kaggle/temp/"
MODEL_FILE = "/kaggle/input/vgg-unet/vgg_unet.h5"

In [None]:
sz = 1024   #the size of tiles
input_sz = 64
vram = 16
batch_size = 1 * (vram // 8) * (1024 // input_sz) ** 2
print("Batch size", batch_size)

In [None]:
import os
import gc
import numpy as np
import pandas as pd
import tifffile
import cv2

In [None]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_encode_less_memory(img):
    #watch out for the bug
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
!rm -fR {TMP_DIR}/test_tiles/
!mkdir -p {TMP_DIR}/test_tiles/

In [None]:
test_ids = []
for dirname, _, filenames in os.walk(f"{INPUT_DIR}/test/"):
    for filename in filenames:
        if filename.endswith(".tiff"):
            image_id = filename[:-len(".tiff")]
            test_ids.append(image_id)

In [None]:
num_tiles = {}
pads = {}
boxes = {}
for id in test_ids:
    gc.collect()
    image = tifffile.imread(f"{INPUT_DIR}/test/{id}.tiff")
    print(id, image.shape)
    image = np.squeeze(image)
    if image.shape[0] == 3:
        image = image.transpose(1,2,0)   
    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gc.collect()
    print("converted to gray")
        
    shape = image.shape
    pad0,pad1 = (sz - shape[0] % sz), (sz - shape[1] % sz) 
    pads[id] = (pad0, pad1)
    print("before padding")    
    image = np.pad(image, [[pad0 // 2, pad0 - pad0 // 2], [pad1 // 2, pad1 - pad1 // 2]], constant_values=0) 
    gc.collect()
    print("image padded")
    num_tiles0 = image.shape[0] // sz
    num_tiles1 = image.shape[1] // sz
    num_tiles[id] = (num_tiles0, num_tiles1)
    image = image.reshape(num_tiles0, sz, num_tiles1, sz)
    image = image.transpose(0, 2, 1, 3).reshape(-1, sz, sz)
    gc.collect()
    print("tiles created")      
    for i,im in enumerate(image):    
        if im.std() == 0:
            continue
        im2 = cv2.resize(im, (input_sz, input_sz), interpolation=cv2.INTER_NEAREST)
        cv2.imwrite(f'{TMP_DIR}/test_tiles/{id}_{i}.png', im2)  
        del im2
        gc.collect()
    del image

In [None]:
import tensorflow as tf

def dice(gt, pr):    
    intersections = tf.reduce_sum(gt[:,:,1] * pr[:,:,1])
    unions = tf.reduce_sum(gt[:,:,1] + pr[:,:,1])
    eps = 10 ** -6
    dice_scores = 2.0 * (intersections + eps/2) / (unions + eps)
    return dice_scores

In [None]:
IMAGE_ORDERING_CHANNELS_LAST = "channels_last"
IMAGE_ORDERING_CHANNELS_FIRST = "channels_first"

# Default IMAGE_ORDERING = channels_last
IMAGE_ORDERING = IMAGE_ORDERING_CHANNELS_LAST

class DataLoaderError(Exception):
    pass

def get_image_array(image_input,
                    width, height,
                    imgNorm="sub_mean", ordering='channels_first'):
    """ Load image array from input """

    if not os.path.isfile(image_input):
        raise DataLoaderError("get_image_array: path {0} doesn't exist".format(image_input))
    img = cv2.imread(image_input, 1)   

    if imgNorm == "sub_and_divide":
        img = np.float32(cv2.resize(img, (width, height))) / 127.5 - 1
    elif imgNorm == "sub_mean":
        img = cv2.resize(img, (width, height))
        img = img.astype(np.float32)
        img[:, :, 0] -= 103.939
        img[:, :, 1] -= 116.779
        img[:, :, 2] -= 123.68
        img = img[:, :, ::-1]
    elif imgNorm == "divide":
        img = cv2.resize(img, (width, height))
        img = img.astype(np.float32)
        img = img/255.0

    if ordering == 'channels_first':
        img = np.rollaxis(img, 2, 0)
    return img

In [None]:
from keras.models import load_model
model = load_model(MODEL_FILE, custom_objects={"dice": dice})
    
o_shape = model.output_shape
i_shape = model.input_shape

if IMAGE_ORDERING == 'channels_first':
    output_height = i_shape[2] // 2
    output_width = i_shape[3] // 2
    input_height = i_shape[2]
    input_width = i_shape[3]
    n_classes = o_shape[1]  
elif IMAGE_ORDERING == 'channels_last':
    output_height = i_shape[1] // 2
    output_width = i_shape[2] // 2
    input_height = i_shape[1]
    input_width = i_shape[2]
    n_classes = o_shape[2]

In [None]:
total = 0
for dirname, _, filenames in os.walk(f"{TMP_DIR}/test_tiles/"):
    for filename in filenames:
        if filename.endswith(".png"):
            total += 1
steps_per_epoch = total // batch_size      
print("Total: ", total)   
print("Steps per epoch: ", steps_per_epoch)

In [None]:
submission_df = pd.DataFrame(columns = ['id','predicted'])
for id in test_ids:
    gc.collect()
    print(id)
    predicted = np.zeros(shape = (num_tiles[id][0] * num_tiles[id][1], sz, sz), dtype=np.uint8)    
    print("prediction placeholder")
    names = []
    orig = []
    for i in range(predicted.shape[0]):
        name = f'{TMP_DIR}/test_tiles/{id}_{i}.png'
        if not os.path.exists(name):
            continue
        names.append(name)
        orig.append(i)
    indices = np.array(range(len(names)))
    add = batch_size - (len(names) % batch_size)
    if add != batch_size:
        adds = np.array(range(add))
        indices = np.concatenate((indices, adds))
    indices = indices.reshape(-1, batch_size)
    
    for idxs in indices:        
        x = np.array([get_image_array(names[i], input_width, input_height, ordering=IMAGE_ORDERING) for i in idxs]) 
        pr = model.predict(x)
        pr = pr.reshape((batch_size, output_height,  output_width, n_classes))
        if n_classes == 1:
            pr = np.squeeze(np.where(pr > 0.5, 1, 0))
        else:
            pr = pr.argmax(axis=3)    
        gc.collect()          
        out_res = np.array([cv2.resize(p, (sz, sz), interpolation=cv2.INTER_NEAREST) for p in pr])         
        for k,i in enumerate(idxs):
            j = orig[i]
            predicted[j] = out_res[k].astype(np.uint8)        

        del out_res        
        gc.collect()
    print("tiles predicted")
    predicted = predicted.reshape(num_tiles[id][0], num_tiles[id][1], sz, sz)
    predicted = predicted.transpose(0,2,1,3)
    predicted = predicted.reshape(num_tiles[id][0]*sz, num_tiles[id][1]*sz)
    predicted = predicted[pads[id][0] // 2 : -(pads[id][0] - pads[id][0] // 2), pads[id][1] // 2 : -(pads[id][1] - pads[id][1] // 2)]
    print(id, predicted.shape)
    gc.collect()
    rle_string = rle_encode_less_memory(predicted) 
    print("mask rle encoded")
    del predicted
    submission_df = submission_df.append({'id':id, 'predicted':rle_string}, ignore_index=True)
    print("submission updated")    
    del rle_string
submission_df.to_csv('submission.csv',index=False)   