# HuBMAP - Efficient Sampling Baseline (deepflash2, pytorch, fastai) [sub]

> Submission kernel for model trained with efficient region based sampling. 

- Train Notebook: https://www.kaggle.com/matjes/hubmap-efficient-sampling-deepflash2-train
- Sampling Notebook: https://www.kaggle.com/matjes/hubmap-labels-pdf-0-5-0-25-0-01

Requires deepflash2 (git version), zarr, and segmentation-models-pytorch


## Overview

1. Installation and package loading
2. Helper functions and patches
3. Configuration
4. Prediction
5. Submission

### Versions
- V12: Minor changes in deepflash2 API to support albumentations (changes `apply`in `DeformationField` slightly, see patch below)
- V13: Adding prediction threshold 0.3

### Installation and package loading

In [None]:
# Install deepflash2 and dependencies
! pip install ../input/kerasapplications/keras-team-keras-applications-3b180cb -f ./ --no-index -q
! pip install ../input/efficientnet/efficientnet-1.1.0/ -f ./ --no-index -q
import numpy as np
import pandas as pd
import os
import glob
import gc

import rasterio
from rasterio.windows import Window

import pathlib
from tqdm.notebook import tqdm
import cv2
from keras import backend as K

import tensorflow as tf
import efficientnet as efn
import efficientnet.tfkeras

import sys
sys.path.append("../input/zarrkaggleinstall")
sys.path.append("../input/segmentation-models-pytorch-install")
!pip install -q --no-deps ../input/deepflash2-lfs
import cv2, torch, zarr, tifffile, pandas as pd, gc
from fastai.vision.all import *
from deepflash2.all import *
import segmentation_models_pytorch as smp
import os
import pathlib
from tqdm.notebook import tqdm
import cv2
import numpy as np
import matplotlib.pyplot as plt
import cv2
import glob
import gc
import rasterio
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)

### Helper functions and patches

In [None]:
#https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with transposed mask
def rle_encode_less_memory(img):
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

def load_model_weights(model, file, strict=True):
    state = torch.load(file, map_location='cpu')
    stats = state['stats']
    model_state = state['model']
    model.load_state_dict(model_state, strict=strict)
    return model, stats

Patches for deepflash2 classes, see https://fastcore.fast.ai/basics.html#patch

In [None]:
# https://matjesg.github.io/deepflash2/data.html#BaseDataset
# Handling of different input shapes
@patch
def read_img(self:BaseDataset, *args, **kwargs):
    
    data = rasterio.open(args[0], transform = identity, num_threads='all_cpus')
    if data.count != 3:
        subdatasets = data.subdatasets
        layers = []
        if len(subdatasets) > 0:
            for i, subdataset in enumerate(subdatasets, 0):
                layers.append(rasterio.open(subdataset))
                
            image = np.zeros((data.shape[0],data.shape[1],3),np.uint8)
            for i,layer in enumerate(layers):
                image[:,:,i] =layer.read(1)
    
    else:
        image = data.read([1, 2, 3])
        image = np.moveaxis(image, 0, -1)
    
    return image

# https://matjesg.github.io/deepflash2/data.html#DeformationField
# Adding normalization (divide by 255)
@patch
def apply(self:DeformationField, data, offset=(0, 0), pad=(0, 0), order=1):
    "Apply deformation field to image using interpolation"
    outshape = tuple(int(s - p) for (s, p) in zip(self.shape, pad))
    coords = [np.squeeze(d).astype('float32').reshape(*outshape) for d in self.get(offset, pad)]
    # Get slices to avoid loading all data (.zarr files)
    sl = []
    for i in range(len(coords)):
        cmin, cmax = int(coords[i].min()), int(coords[i].max())
        dmax = data.shape[i]
        if cmin<0: 
            cmax = max(-cmin, cmax)
            cmin = 0 
        elif cmax>dmax:
            cmin = min(cmin, 2*dmax-cmax)
            cmax = dmax
            coords[i] -= cmin
        else: coords[i] -= cmin
        sl.append(slice(cmin, cmax))    
    if len(data.shape) == len(self.shape) + 1:
        
        ## Channel order change in V12
        tile = np.empty((*outshape, data.shape[-1]))
        for c in range(data.shape[-1]):
            # Adding divide
            tile[..., c] = cv2.remap(data[sl[0],sl[1], c]/255, coords[1],coords[0], interpolation=order, borderMode=cv2.BORDER_REFLECT)
    else:
        tile = cv2.remap(data[sl[0], sl[1]], coords[1], coords[0], interpolation=order, borderMode=cv2.BORDER_REFLECT)
    return tile

### Configuration

In [None]:
class CONFIG():
    
    # data paths
    data_path = Path('../input/hubmap-kidney-segmentation')
    model_file = '../input/hubmap-deepflash-efficientnetb4/'
    
    # deepflash2 dataset (https://matjesg.github.io/deepflash2/data.html#TileDataset)
    scale = 3 # zoom facor (zoom out)
    tile_shape = (512, 512)
    padding = (100,100) # Border overlap for prediction
    pred_tta = True

    # pytorch model (https://github.com/qubvel/segmentation_models.pytorch)
    encoder_name = "efficientnet-b4"
    encoder_weights = None
    in_channels = 3
    classes = 2
    
    # dataloader 
    batch_size = 1
    
    # prediction threshold
    threshold = 0.30
    
cfg = CONFIG()

In [None]:
import yaml
import pprint
    
THRESHOLD = 0.3 # preds > THRESHOLD
VOTERS = 0.5
WINDOW = 1024
MIN_OVERLAP = 300
NEW_SIZE = 256
SUM_PRED = 128
CHECK=False

In [None]:
import json
with open('../input/hubmap-efficientnet-b4/' + 'metrics.json') as json_file:
    M = json.load(json_file)
print('Model run datetime: '+M['datetime'])
print('OOF val_dice_coe: ' + str(M['oof_dice_coe']))

In [None]:
def rle_encode_less_memory_tf(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_grid_tf(shape, window=1024, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [None]:
models_path_list = [
    '../input/hubmap-efficientnet-b4/model-fold-2.h5',
    '../input/hubmap-efficientnet-b6-pseudo/model-fold-0.h5',
    '../input/hubmap-efficientnetb7-pseudo-labelled/model-fold-1.h5'
]

In [None]:
fold_models = []
for fold_model_path in models_path_list:
    fold_models.append(tf.keras.models.load_model(fold_model_path,compile = False))
print(len(fold_models))

In [None]:
deepflash_models=[]

#for fold in [1, 2]:
        
###################### efficient net b4 #########################
'''
model = smp.Unet(encoder_name='efficientnet-b6', 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
model, stats = load_model_weights(model, '../input/hubmap-deepflash-efficientnet-b6-local/'+f'unet_efficientnet-b6_1.pth')
batch_tfms = [Normalize.from_stats(*stats)]

if torch.cuda.is_available():  model.cuda()

deepflash_models.append(model)
del model
gc.collect()

model = smp.Unet(encoder_name='efficientnet-b4', 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
model, stats = load_model_weights(model, '../input/hubmap-single-fold-models-b1b5/'+f'unet_efficientnet-b4_1.pth')
batch_tfms = [Normalize.from_stats(*stats)]
if torch.cuda.is_available():  model.cuda()

deepflash_models.append(model)
del model
gc.collect()

model = smp.Unet(encoder_name='efficientnet-b5', 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
model, stats = load_model_weights(model, '../input/hubmap-single-fold-models-b1b5/'+f'unet_efficientnet-b5_1.pth')
batch_tfms = [Normalize.from_stats(*stats)]
if torch.cuda.is_available():  model.cuda()

deepflash_models.append(model)
del model
gc.collect()
        
'''

model = smp.Unet(encoder_name='efficientnet-b7', 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
model, stats = load_model_weights(model, '../input/hubmap-deepflash-effiicentnetb7/'+f'unet_efficientnet-b7.pth')
batch_tfms = [Normalize.from_stats(*stats)]
if torch.cuda.is_available():  model.cuda()

deepflash_models.append(model)
del model
gc.collect()
    
model = smp.Unet(encoder_name='efficientnet-b6', 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
model, stats = load_model_weights(model, '../input/hubmap-deepflash-efficientnetb6/'+f'unet_efficientnet-b6.pth')
batch_tfms = [Normalize.from_stats(*stats)]
if torch.cuda.is_available(): model.cuda()

deepflash_models.append(model)
del model
gc.collect()

In [None]:
print(len(deepflash_models))

### Prediction

In [None]:
import pathlib
import glob
from tqdm.notebook import tqdm
p = pathlib.Path('../input/hubmap-kidney-segmentation')
ids=[]

for index, filename in tqdm(enumerate(p.glob('test/*.tiff')), 
                        total = len(list(p.glob('test/*.tiff')))):
    ids.append(filename.stem)

df_sample=pd.DataFrame()
df_sample['id']=ids
df_sample['predicted']=np.nan
df_sample = df_sample.set_index('id')

if df_sample.shape[0]==5:
    df_sample = df_sample.iloc[:2, :]
else:
    df_sample=df_sample

In [None]:
from numba import cuda
names,preds = [],[]

weights=[1, 2]
    
for idx, _ in df_sample.iterrows():
    print(f'###### File {idx} ######')
    
    # RESTRICT TENSORFLOW TO 2GB OF GPU RAM
    # SO THAT WE HAVE 14GB RAM FOR RAPIDS
    LIMIT = 6.0
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            tf.config.experimental.set_virtual_device_configuration(
                gpus[0],
                [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            #print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            print(e)
    print('We will restrict TensorFlow to max %iGB GPU RAM'%LIMIT)
    print('then RAPIDS can use %iGB GPU RAM'%(16-LIMIT))

    f = cfg.data_path/'test'/f'{idx}.tiff'
    
    # Model
    pred=None
    
    dataset_tf = rasterio.open(f, transform = identity)
    pred_tf = np.zeros(dataset_tf.shape, dtype=np.uint8)  
    
    slices_tf = make_grid_tf(dataset_tf.shape, window=WINDOW, min_overlap=MIN_OVERLAP)

    if dataset_tf.count != 3:
        print('Image file with subdatasets as channels')
        layers_tf = [rasterio.open(subd) for subd in dataset_tf.subdatasets]

    print(f'Dataset Shape: {dataset_tf.shape}')
    
    EMPTY = np.zeros((NEW_SIZE, NEW_SIZE))
    
    for (x1,x2,y1,y2) in tqdm(slices_tf):
            if dataset_tf.count == 3:
                image = dataset_tf.read([1,2,3],
                            window=Window.from_slices((x1,x2),(y1,y2)))
                image = np.moveaxis(image, 0, -1)
            else:
                image = np.zeros((WINDOW, WINDOW, 3), dtype=np.uint8)
                for fl in range(3):
                    image[:,:,fl] = layers_tf[fl].read(window=Window.from_slices((x1,x2),(y1,y2)))


            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            h,s,v = cv2.split(hsv)
            s_th = 40
            p_th = 1000*(1024//256)**2

            if (s>s_th).sum() <= p_th or image.sum() <= p_th :
                pred_temp = EMPTY
            else:

                image = cv2.resize(image, (NEW_SIZE, NEW_SIZE),interpolation = cv2.INTER_AREA)
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                image = np.expand_dims(image, 0)
                #image = tf.cast(image, tf.float32)/255.0

                pred_temp = None

                for fold_model in fold_models:
                    if pred_temp is None:
                        pred_temp = np.squeeze(fold_model.predict(image))
                    else:
                        pred_temp += np.squeeze(fold_model.predict(image))

                pred_temp = pred_temp/len(fold_models)


            pred_temp = cv2.resize(pred_temp, (WINDOW, WINDOW))
            pred_tf[x1:x2,y1:y2] +=(pred_temp > THRESHOLD).astype(np.uint8) 
    
    pred_tf = (pred_tf >= VOTERS).astype(np.uint8)
    np.save('pred_tf.npy', pred_tf)
    
    del pred_tf, EMPTY, slices_tf, dataset_tf, fold_model, pred_temp, image, hsv, h, s, v
    gc.collect()
    K.clear_session()
    
    
   
    # Create deepflash2 dataset (including tiling and file conversion)
    ds = TileDataset([f], scale=cfg.scale, tile_shape=cfg.tile_shape, padding=cfg.padding)
    shape = ds.data[f.name].shape
    print('Shape:', shape)

    
    # Create fastai dataloader and learner
    dls = DataLoaders.from_dsets(ds, batch_size=cfg.batch_size, after_batch=batch_tfms, shuffle=False, drop_last=False)
    
    for model, weight in zip(deepflash_models, weights):
        print(weight)
        if torch.cuda.is_available(): dls.cuda(), model.cuda()
    
        learn = Learner(dls, model, loss_func='')

        print('Prediction')
        if weight==2:
            res = learn.predict_tiles(dl=dls.train, path='/kaggle/temp/', use_tta=True, uncertainty_estimates=False)
        else:
            res = learn.predict_tiles(dl=dls.train, path='/kaggle/temp/', use_tta=True, uncertainty_estimates=False)
        if pred is None:
            pred = weight*res[0][f.name][..., 1]/np.sum(weights)
        else:
            pred += weight*res[0][f.name][..., 1]/np.sum(weights)

        del res, learn, model
        gc.collect()
    
    del ds, dls
    gc.collect()
    torch.cuda.empty_cache()
    
    # Load mask from softmax prediction > threshold
    
    msk = (pred>cfg.threshold).astype(np.uint8)
 
    print('Rezising')
    msk = cv2.resize(msk, (shape[1], shape[0]))
    pred_tf = np.load('../working/pred_tf.npy')
    msk = (msk+pred_tf)
    del pred_tf
    gc.collect()
    msk = (msk>=1).astype(np.uint8)
    rle = rle_encode_less_memory(msk)
    
    names.append(idx)
    preds.append(rle)
    
    del msk, rle
    gc.collect()
    

    # Overwrite store (reduce disk usage)
    _ = [shutil.rmtree(p, ignore_errors=True) for p in Path('/kaggle/temp/').iterdir()]
    _ = [shutil.rmtree(p, ignore_errors=True) for p in Path('/tmp/').iterdir() if p.name.startswith('zarr')]
    

### Submission

In [None]:
df = pd.DataFrame({'id':names,'predicted':preds}).set_index('id')
df_sample.loc[df.index.values] = df.values  
df_sample.to_csv('submission.csv')
display(df_sample)