In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from tqdm.notebook import tqdm

import os
import rasterio
from rasterio.windows import Window
import cv2
import glob
import gc

import json
from skimage import data, io
from matplotlib import pyplot as plt

In [None]:
! pip install ../input/segmentation-models/keras-applications-master/keras-applications-master/ -f ./ --no-index -q

! pip install ../input/segmentation-models/efficientnet-1.0.0/efficientnet-1.0.0/ -f ./ --no-index -q

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

In [None]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    # pixels = img.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

    
def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    #print(nx,ny)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]
            # print(x1[i]-x2[i], y1[j]-y2[j])
    return slices.reshape(nx*ny,4)

fold_models = []
for fold_model_path in glob.glob("/kaggle/input/hubmap-weights/*.h5"):
    print(fold_model_path)
    fold_models.append(tf.keras.models.load_model(fold_model_path,compile=False))

print(len(fold_models))

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
NEW_SIZE = 256
WINDOW = 1024
MIN_OVERLAP = 300
subm = {}
THRESHOLD = 0.3
CHECKSUM = False
p_th = 1000*(NEW_SIZE//256)**2
s_th = 40

df_sample = pd.read_csv('../input/hubmap-kidney-segmentation/sample_submission.csv')
for i, (idx,row) in tqdm(enumerate(df_sample.iterrows()), total=5):
    print(i,"Predicting ",row['id'])

    img_path = "../input/hubmap-kidney-segmentation/test/"+row['id']+".tiff"
    
    
    dataset = rasterio.open(img_path, transform = identity)
    preds = np.zeros(dataset.shape, dtype=np.uint8) 
    
    slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)
    
    if dataset.count != 3:
        print('Image file with subdatasets as channels')
        layers = [rasterio.open(subd) for subd in dataset.subdatasets]

    inference_count = 0
    noinference_count = 0
    for (x1,x2,y1,y2) in slices:
        if dataset.count == 3:
            image = dataset.read([1,2,3],
                        window=Window.from_slices((x1,x2),(y1,y2)))
            image = np.moveaxis(image, 0, -1)
        else:
            image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
            for fl in range(3):
                image[:,:,fl] = layers[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))
        image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
        
        
        
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h,s,v = cv2.split(hsv)
        
        if (s>s_th).sum() <= p_th or image.sum() <= p_th:
            continue 
        
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            
        image = np.expand_dims(image, 0)
        

        pred = None

        for fold_model in fold_models:
            image_aug = image /255.0
            if pred is None:
                pred = np.squeeze(fold_model.predict(image_aug))
            else:
                pred += np.squeeze(fold_model.predict(image_aug))

        pred = pred/len(fold_models)

        pred = cv2.resize(pred, (WINDOW, WINDOW))
        preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)           

        
    preds = (preds > 0.5).astype(np.uint8)
    
    subm[i] = {'id':row['id'], 'predicted': rle_encode_less_memory(preds)}
    
    if CHECKSUM:
        print('Checksum: '+ str(np.sum(preds)))
    
    del preds, dataset
    gc.collect();

In [None]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()

### 