In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
#import tensorflow as tf
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from tqdm.notebook import tqdm

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
import rasterio
from rasterio.windows import Window
import cv2
import glob
import gc

import json

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# Install Important Packages

! pip install ../input/segmentation-models/keras-applications-master/keras-applications-master/ -f ./ --no-index -q

! pip install ../input/segmentation-models/efficientnet-1.0.0/efficientnet-1.0.0/ -f ./ --no-index -q

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

In [3]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    # pixels = img.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

    
def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    #print(nx,ny)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]
            # print(x1[i]-x2[i], y1[j]-y2[j])
    return slices.reshape(nx*ny,4)

fold_models = []
for fold_model_path in glob.glob("/kaggle/input/hubmap-weights/*.h5"):
    if "_new" in fold_model_path:
        continue
    print(fold_model_path)
    fold_models.append(tf.keras.models.load_model(fold_model_path,compile=False))
#fold_models.append(tf.keras.models.load_model("/kaggle/input/hubmap-weights/fpn.h5",compile=False))
print(len(fold_models))

/kaggle/input/hubmap-weights/Fold3_unet_bce.h5
/kaggle/input/hubmap-weights/Fold2_unet_bce.h5
/kaggle/input/hubmap-weights/Fold1_unet_bce.h5
/kaggle/input/hubmap-weights/Fold4_unet_bce.h5
/kaggle/input/hubmap-weights/Fold5_unet_bce.h5
5


In [4]:
from skimage import data, io
from matplotlib import pyplot as plt




def augmentation(input_image):
    input_image = tf.convert_to_tensor(input_image)
    #print(type(input_image))
#     if tf.random.uniform(()) > 0.5:
#         input_image = tf.image.flip_left_right(input_image)

#     if tf.random.uniform(()) > 0.4:
#         input_image = tf.image.flip_up_down(input_image)

#     if tf.random.uniform(()) > 0.5:
#         input_image = tf.image.rot90(input_image, k=1)

    if tf.random.uniform(()) > 0.45:
        input_image = tf.image.random_saturation(input_image, 0.7, 1.3)

    if tf.random.uniform(()) > 0.45:
        input_image = tf.image.random_contrast(input_image, 0.8, 1.2)

    

    #io.imshow(input_image.numpy().reshape(256,256,3))
    #plt.show()
    return input_image.numpy()/255.0


identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
NEW_SIZE = 256
WINDOW = 1024
MIN_OVERLAP = 300
subm = {}
THRESHOLD = 0.3
CHECKSUM = False
p_th = 1000*(NEW_SIZE//256)**2
s_th = 40

df_sample = pd.read_csv('../input/hubmap-kidney-segmentation/sample_submission.csv')
for i, (idx,row) in tqdm(enumerate(df_sample.iterrows()), total=5):
    print(i,"Predicting ",row['id'])

    img_path = "../input/hubmap-kidney-segmentation/test/"+row['id']+".tiff"
    
    
    dataset = rasterio.open(img_path, transform = identity)
    preds = np.zeros(dataset.shape, dtype=np.uint8) 
    # print(data.shape)
    
    #indexes = make_grid(data.shape,1024,300)
    #print(data.shape,indexes[-1],indexes.shape[0])
    
    slices = make_grid(dataset.shape, window=WINDOW, min_overlap=MIN_OVERLAP)
    # slices = make_grid(dataset.shape)
    
    
    if dataset.count != 3:
        print('Image file with subdatasets as channels')
        layers = [rasterio.open(subd) for subd in dataset.subdatasets]

    inference_count = 0
    noinference_count = 0
    for (x1,x2,y1,y2) in slices:
        # print(x1,x2,y1,y2)
        if dataset.count == 3:
            image = dataset.read([1,2,3],
                        window=Window.from_slices((x1,x2),(y1,y2)))
            image = np.moveaxis(image, 0, -1)
        else:
            image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
            for fl in range(3):
                image[:,:,fl] = layers[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))
        image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
        
        
        
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h,s,v = cv2.split(hsv)
        
        if (s>s_th).sum() <= p_th or image.sum() <= p_th:
            # print((s>s_th).sum(),image.sum())
            continue 
        
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        #cv2.COLOR_BGR2RGB
            
        image = np.expand_dims(image, 0)
        

        pred = None

        for fold_model in fold_models:
            #image_aug = augmentation(image)
            image_aug = image /255.0
            if pred is None:
                pred = np.squeeze(fold_model.predict(image_aug))
            else:
                pred += np.squeeze(fold_model.predict(image_aug))
        # print(np.unique((pred > THRESHOLD).astype(np.uint8)))
        pred = pred/len(fold_models)

        pred = cv2.resize(pred, (WINDOW, WINDOW))
        preds[x1:x2,y1:y2] += (pred > THRESHOLD).astype(np.uint8)           

        
    preds = (preds > 0.4).astype(np.uint8)
    
    subm[i] = {'id':row['id'], 'predicted': rle_encode_less_memory(preds)}
    
    if CHECKSUM:
        print('Checksum: '+ str(np.sum(preds)))
    
    del preds, dataset
    gc.collect();

  0%|          | 0/5 [00:00<?, ?it/s]

0 Predicting  2ec3f1bb9


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


1 Predicting  3589adb90
2 Predicting  d488c759a
Image file with subdatasets as channels
3 Predicting  aa05346ff
Image file with subdatasets as channels
4 Predicting  57512b7f1
Image file with subdatasets as channels


In [5]:
submission = pd.DataFrame.from_dict(subm, orient='index')
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,predicted
0,2ec3f1bb9,60594381 23 60618360 38 60642340 52 60666322 6...
1,3589adb90,68305792 11 68335218 32 68364647 40 68394075 4...
2,d488c759a,330290392 34 330337052 34 330383713 33 3304303...
3,aa05346ff,52764528 35 52795239 50 52825953 61 52856664 7...
4,57512b7f1,231705412 5 231738648 14 231738704 2 231771884...


### 