# HuBMAP - Efficient Sampling Baseline (deepflash2, pytorch, fastai) [sub]

> Submission kernel for model trained with efficient region based sampling. 

***

## Overview

1. Installation and package loading
2. Functions and classes for prediction
3. Configuration
4. Prediction
5. Submission

#### Related Kernels

- Train Notebook: https://www.kaggle.com/matjes/hubmap-efficient-sampling-deepflash2-train
- Sampling Notebook: https://www.kaggle.com/matjes/hubmap-labels-pdf-0-5-0-25-0-01

#### Versions
- V12: Minor changes in deepflash2 API to support albumentations (changes `apply`in `DeformationField` slightly, see patch below)
- V13: Adding prediction threshold 0.4
- V14: Threshold 0.2 for d488c759a - see discussion https://www.kaggle.com/c/hubmap-kidney-segmentation/discussion/228993 
- V15: **NEW PREDICTION** 
    - Using overlapping tiles and gaussian weighting from [nnunet](https://www.nature.com/articles/s41592-020-01008-z)/[github](https://github.com/MIC-DKFZ/nnUNet), which will also be part of the upcoming `deepflash2` release
    - Supporting model ensembles
    - Fixing submission to private LB using `rasterio` (thanks to @leighplt [kernel](https://www.kaggle.com/leighplt/pytorch-fcn-resnet50) and @iafoss ([kernel](https://www.kaggle.com/iafoss/hubmap-pytorch-fast-ai-starter-sub))

### Installation and package loading

In [None]:
# Install deepflash2 and dependencies
import sys
sys.path.append("../input/segmentation-models-pytorch-install")
!pip install -q --no-deps ../input/deepflash2-lfs
import cv2, torch, gc, rasterio
import torch.nn.functional as F
import deepflash2.tta as tta
import matplotlib.pyplot as plt
import pandas as pd, numpy as np
import segmentation_models_pytorch as smp
from pathlib import Path
from rasterio.windows import Window
from torch.utils.data import Dataset, DataLoader
from scipy .ndimage.filters import gaussian_filter
from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings("ignore")

### Functions and classes for prediction

In [None]:
#https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with transposed mask
def rle_encode_less_memory(img):
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

# def load_model_weights(model, file, strict=True):
#     state = torch.load(file, map_location='cpu')
#     stats = state['stats']
#     model_state = state['model']
#     model.load_state_dict(model_state, strict=strict)
#     return model, stats


def load_model_weights(model, file, strict=True):
    state = torch.load(file, map_location='cpu')
    try:
        
        model_state = state['model']
    except:
        model_state=state
    model.load_state_dict(model_state, strict=strict)
    model.eval()
    return model


# from https://github.com/MIC-DKFZ/nnUNet/blob/2fade8f32607220f8598544f0d5b5e5fa73768e5/nnunet/network_architecture/neural_network.py#L250
def _get_gaussian(patch_size, sigma_scale=1. / 8) -> np.ndarray:
    tmp = np.zeros(patch_size)
    center_coords = [i // 2 for i in patch_size]
    sigmas = [i * sigma_scale for i in patch_size]
    tmp[tuple(center_coords)] = 1
    gaussian_importance_map = gaussian_filter(tmp, sigmas, 0, mode='constant', cval=0)
    gaussian_importance_map = gaussian_importance_map / np.max(gaussian_importance_map) * 1
    gaussian_importance_map = gaussian_importance_map.astype(np.float32)

    # gaussian_importance_map cannot be 0, otherwise we may end up with nans!
    gaussian_importance_map[gaussian_importance_map == 0] = np.min(
        gaussian_importance_map[gaussian_importance_map != 0])

    return gaussian_importance_map

In [None]:
# Some code adapted from https://www.kaggle.com/iafoss/hubmap-pytorch-fast-ai-starter-sub
class HubmapDataset(Dataset):
    'HubmapDataset class that does not load the full tiff files.'
    def __init__(self, file, stats, scale=3, shift=.8, output_shape=(512,512), s_th = 40):
        
        self.mean, self.std = stats
        self.scale = scale
        self.shift = shift
        self.output_shape = output_shape
        self.input_shape = tuple(int(t*scale) for t in self.output_shape)      
        self.s_th = s_th #saturation blancking threshold
        self.p_th = 1000*(self.output_shape[0]//256)**2 #threshold for the minimum number of pixels

        identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
        self.data = rasterio.open(file, transform = identity, num_threads='all_cpus')
        if self.data.count != 3:
            subdatasets = self.data.subdatasets
            self.layers = []
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(rasterio.open(subdataset))
            
        # Tiling
        self.slices = []
        self.out_slices = []
        self.out_data_shape = tuple(int(x//self.scale) for x in self.data.shape)
        start_points = [o//2 for o in self.output_shape]
        end_points = [(s - st) for s, st in zip(self.out_data_shape, start_points)]
        n_points = [int(s//(o*self.shift))+1 for s, o in zip(self.out_data_shape, self.output_shape)]
        center_points = [np.linspace(st, e, num=n, endpoint=True, dtype=np.int64) for st, e, n in zip(start_points, end_points, n_points)]
        for cx in center_points[1]:
            for cy in center_points[0]:
                # Calculate output slices for whole image
                slices = tuple(slice(int((c*self.scale - o/2).clip(0, s)), int((c*self.scale + o/2).clip(max=s)))
                                 for (c, o, s) in zip((cy, cx), self.input_shape, self.data.shape))
                self.slices.append(slices)
                
                out_slices = tuple(slice(int((c - o/2).clip(0, s)), int((c + o/2).clip(max=s)))
                                 for (c, o, s) in zip((cy, cx), self.output_shape, self.out_data_shape))
                self.out_slices.append(out_slices)
                

    def __len__(self):
        return len(self.slices)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        slices = self.slices[idx]
        if self.data.count == 3: # normal
            img = self.data.read([1, 2, 3], 
                window=Window.from_slices(*slices)
            )
            img = np.moveaxis(img, 0, -1)
        else: # with subdatasets/layers
            img = np.zeros((*self.input_shape, 3), dtype=np.uint8)
            for fl in range(3):
                img[:, :, fl] = self.layers[fl].read(
                    window=Window.from_slices(*slices)
                )
        
        if self.scale!=1:
            img = cv2.resize(img, self.output_shape, interpolation = cv2.INTER_AREA)
        
        #check for empty imges
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h,s,v = cv2.split(hsv)
        if (s>self.s_th).sum() <= self.p_th or img.sum() <= self.p_th:
            # Remove if idx=-1
            idx = -1
        
        img = (img/255.0 - self.mean)/self.std
        img = img.transpose(2, 0, 1).astype('float32')
        
        return torch.from_numpy(img), idx
    


### Configuration

In [None]:
# class CONFIG():
    
#     # data paths
#     data_path = Path('../input/hubmap-kidney-segmentation')
# #     model_file = '../input/deep-flash-resnest50d/unet_timm-resnest50d.pth'
#     model_file = "../input/hubmap-deepflash2/model_9545.pth"
    
#     # deepflash2 dataset (https://matjesg.github.io/deepflash2/data.html#TileDataset)
#     scale = 3 # zoom facor (zoom out)
    
#     tile_shape = (1024,1024)
#     padding = (100,100) # Border overlap for prediction

#     # pytorch model (https://github.com/qubvel/segmentation_models.pytorch)
#     encoder_name = "timm-regnetx_064"
#     encoder_weights = None
#     in_channels = 3
#     classes = 2
    
#     # dataloader 
#     batch_size = 7
    
#     # prediction threshold
#     threshold = 0.45
    
# cfg = CONFIG()

In [None]:
# Sample submissions for ids
# df_sample = pd.read_csv(cfg.data_path/'sample_submission.csv',  index_col='id')
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # Models (see https://github.com/qubvel/segmentation_models.pytorch)
# MODELS = [f for f in cfg.model_path.iterdir() if f.suffix=='.pth']
# print(f'Found {len(MODELS)} models', *MODELS)

# models = []
# for i, m_path in enumerate(MODELS):
#     #state_dict = torch.load(path,map_location=torch.device('cpu'))
#     model = smp.Unet(encoder_name=cfg.encoder_name, 
#                      encoder_weights=cfg.encoder_weights, 
#                      in_channels=cfg.in_channels, 
#                      classes=cfg.classes)
#     model, stats = load_model_weights(model, m_path)
#     model.float()
#     model.eval()
#     model.to(device)
#     models.append(model)
#     models.append(model)

# mp = Model_pred(models, use_tta=cfg.tta, batch_size=cfg.batch_size)

In [None]:
class Model_pred:
    'Class for prediction with multiple models'
    def __init__(self, models, use_tta=True, batch_size=32):
        self.models = models
        self.bs = batch_size
        #self.tfms = [tta.HorizontalFlip()] if use_tta else [] #, tta.VerticalFlip()]  
        self.tfms = [tta.HorizontalFlip(), tta.VerticalFlip(), tta.Rotate90(angles=[90,180,270])] if use_tta else [] #, tta.VerticalFlip()]  
    def predict(self, ds):
        #rasterio cannot be used with multiple workers
        dl = DataLoader(ds, self.bs, num_workers=0, shuffle=False, pin_memory=True)
        
        # Create zero arrays
        pred = np.zeros(ds.out_data_shape, dtype='float32')
        merge_map = np.zeros(ds.out_data_shape, dtype='float32')
        
        # Gaussian weights
        gw_numpy = _get_gaussian(ds.output_shape)
        gw = torch.from_numpy(gw_numpy).to(device)
        
        with torch.no_grad():
            for images, idxs in tqdm(iter(dl), total=len(dl)):
                if ((idxs>=0).sum() > 0): #exclude empty images
                    images = images[idxs>=0].to(device)
                    idxs = idxs[idxs>=0]
                    merger = tta.Merger()
                    for t in tta.Compose(self.tfms):
                        aug_images = t.augment_image(images)
                        model_merger = tta.Merger()
                        for model in self.models:
                            out = model(aug_images)
                            out = F.softmax(out, dim=1)
                            model_merger.append(out)
                        out = t.deaugment_mask(model_merger.result())
                        merger.append(out)
            
                    # Apply gaussian weigthing
                    batch_smx = merger.result()*gw.view(1,1,*gw.shape)
                    batch_smx = [x for x in batch_smx.permute(0,2,3,1).cpu().numpy()]
                    
                    for smx, idx in zip(batch_smx, idxs):
                        slcs = ds.out_slices[idx]
                        # Only using positive class here
                        pred[slcs] += smx[...,1]
                        merge_map[slcs] += gw_numpy
                    
                    '''
                    for i , (smx, idx) in enumerate(zip(batch_smx, idxs)):
                        slcs = ds.out_slices[idx]
                        img = images[i].detach().cpu().numpy()
                        #img = np.moveaxis(img, 0, -1)
                        #img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
                        mask = (smx[...,1] > 0.45).astype(np.uint8)
                        #mask = (smx[...,1]*255).astype(np.uint8)
                        #print(img.shape)
                        #print(mask.shape)
                        new_pred = crf(images[i],mask)
                        
                        #print(new_pred.shape)
                        pred[slcs] += new_pred
                        merge_map[slcs] += gw_numpy
                    '''
                    

        pred /= merge_map
        return pred

In [None]:
#model.encoder

In [None]:
#model.encoder

In [None]:
'''

model = smp.DeepLabV3Plus(encoder_name=cfg.encoder_name, 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes,
                 decoder_atrous_rates = (6, 12, 18),
                  encoder_output_stride =8        ) #stage 4,5 wil have dil as 2 /4 else  for 16 stride its 2 for stage 5 last stage
#model.decoder
'''

In [None]:
#model.decoder
# depth convolution means in channel=Groups number so 1 0r out_channel/groups kernels will examine each group
# where each group = In_channel/groups
#point wise convolution - Instead of convolving  point matrix we convolve only one Point.3*3=1*1 in Point
#spatial dim= mean H * W conv

c1= torch.nn.Conv2d(3,3,(5,5),stride=1)
c2= torch.nn.Conv2d(3,3,(1,1),stride=1,dilation =(1,3 ))
i1=torch.randn(2,3,12,12)
c2(c1(i1)).size()
  

In [None]:
#state=torch.load('../input/hubmap-deeplabv3-v14/v14_stage3/model_6.pth',device)
 

In [None]:
#state=torch.load('../input/hubmap-deeplabv3-v14/DeepLab_timm-regnetx_064_v14.pth',device)
#state
 

In [None]:
class CONFIG():
    
    # data paths
    data_path = Path('../input/hubmap-kidney-segmentation')
    
    #model_path = Path('../input/hubmap-deepflash2/model_9545.pth')
    #model_path=[Path('../input/hubmap-deeplabv3-v14/v14_stage3/model_6.pth')
      #          ,Path('../input/hubmap-deeplabv3-v14/v14_stage3/model_9.pth')
                #,Path('../input/hubmap-deepflash2/model_9545.pth')
     #          ]
    
    #model_path=[ Path('../input/hubmap-deeplabv3-v14/v14_stage3/model_9.pth') ]
    model_path=[ Path('../input/hubmap-deeplabv3-v14/v14_stage3/model_6.pth'), 
                 Path('../input/hubmap-deeplabv3-v14/v14_stage3/model_9.pth'), 
                 Path('../input/hubmap-deepflash-1024-effnetb2-v06-pl2/model_9.pth'),
                 Path('../input/hubmap-deepflash-1024-effnetb2-v07-nonpld48/model_11.pth'),
                 Path('../input/hubmap-deepflash-1024-effnetb3-v08-nonpld48/model_11.pth'),
                 Path('../input/hubmap-deepflash-1024-effnetb3-v09-pl2/model_12.pth')]
                 
    #            Path('../input/hubmap-deepflash2/model_9545.pth')]
    #model_path=[ Path('../input/hubmap-deeplabv3-v14/v14_stage2/model_11.pth')  ]            
    #Path('../input/hubmap-deeplabv3-v14/v14_stage2/model_11.pth')
    #Path('../input/hubmap-deeplabv3-v14/DeepLab_timm-regnetx_064_v14.pth')
    # zoom factor (e.g., 3 means downscaling from 1536 to 512)
    scale =3 
    # tile shift for prediction
    shift = 0.8 
#     tile_shape = (512, 512)
    tile_shape = (1024, 1024)

    # pytorch model (https://github.com/qubvel/segmentation_models.pytorch)
    encoder_name = ["timm-regnetx_064","timm-regnetx_064",  'efficientnet-b2',
                    'efficientnet-b2', 'timm-efficientnet-b3','timm-efficientnet-b3']
    encoder_weights = None
    in_channels = 3
    classes = 2
    
    # dataloader 
    batch_size = 8
    
    # test time augmentation
    tta = True
    # prediction threshold
    threshold = 0.45
    
cfg = CONFIG()

In [None]:
for  x in cfg.encoder_name:
    print(x)

In [None]:
df_sample = pd.read_csv(cfg.data_path/'sample_submission.csv',  index_col='id')
#df_sample = pd.read_csv('../input/hubmap-kidney-segmentation/train.csv',  index_col='id')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Model (see https://github.com/qubvel/segmentation_models.pytorch)
'''
model = smp.Unet(encoder_name=cfg.encoder_name, 
                 encoder_weights=cfg.encoder_weights, 
                 in_channels=cfg.in_channels, 
                 classes=cfg.classes)
'''
models=[]
#for  path,name in  zip(cfg.model_path,cfg.encoder_name):
for  name,path in zip(cfg.encoder_name,cfg.model_path ) :
    print(path,name)
    if 'deeplab' in str(path):
        print(path)
        
        model = smp.DeepLabV3Plus(encoder_name=name, 
                         encoder_weights=cfg.encoder_weights, 
                         in_channels=cfg.in_channels, 
                         classes=cfg.classes)
        # model, stats = load_model_weights(model, cfg.model_file)
        #model = torch.nn.DataParallel(model, device_ids=[0])
        model = load_model_weights(model,path)
        model.float()
        model.eval()
        model.to(device)
        print('loaded',path)
        models.append(model)
    
    else:
        
        
        model= smp.Unet(encoder_name=name, 
                         encoder_weights=cfg.encoder_weights, 
                         in_channels=cfg.in_channels, 
                         classes=cfg.classes)
        # model, stats = load_model_weights(model, cfg.model_file)
        #model = torch.nn.DataParallel(model, device_ids=[0])
        model = load_model_weights(model, path)
        model.float()
        model.eval()
        model.to(device)
        print('loaded 1',path)
        models.append(model)

     


stats = np.array([0.61561477, 0.5179343 , 0.64067212]), np.array([0.2915353 , 0.31549066, 0.28647661])


#mp = Model_pred([model], use_tta=cfg.tta, batch_size=cfg.batch_size)
mp = Model_pred(models, use_tta=cfg.tta, batch_size=cfg.batch_size)


# batch_tfms = [Normalize.from_stats(*stats)]
# print(stats)

In [None]:
# mp
len(models)

In [None]:
def area_threshold(img,min_area=200,top_threshold=0.45):
    classification=img >top_threshold
    #print(classification.shape,img.shape)
    cont , heir = cv2.findContours(classification.astype('uint8'),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
    
    areas = []
    for c in cont:
        areas.append(cv2.contourArea(c))
    
    max_area = np.max(areas)
    print(np.max(areas), np.min(areas), 'max',np.percentile(areas, 0.8), 'min',np.percentile(areas, 0.1))
    ''''
    if np.min(areas) > 600:
        min_area = np.percentile(areas, 0.1) 
    else:
        min_area = 500
    ''' 
    
    if np.min(areas) > 500:
        min_area = np.percentile(areas, 0.1) 
    else:
        min_area =  500
    
    
    
#     min_area = min(500, np.percentile(areas, 0.8))
#     min_area = 500 
#     print(min_area)
    
    for c in cont:
        area=cv2.contourArea(c)
        if area<min_area:
            zero_mask= np.zeros(img.shape,np.uint8)
#             cv2.drawContours(zero_mask,[c],0,255,-1)
            cv2.drawContours(zero_mask, [c], 0, 255, -1)
            
            c0,c1=np.nonzero(zero_mask)
            img[c0,c1]=0
            
    return img

### Prediction

In [None]:
#!mkdir -p /tmp/pip/cache/
#!cp ../input/pydensecr/pydensecrf-1.0rc2-cp37-cp37m-linux_x86_64.whl /tmp/pip/cache/
#!pip install --no-index --find-links /tmp/pip/cache/ pydensecrf

In [None]:
#import pydensecrf.densecrf as dcrf
from skimage.io import imread, imsave
#from pydensecrf.utils import unary_from_labels, create_pairwise_bilateral
from skimage.color import gray2rgb
from skimage.color import rgb2gray

In [None]:
#Original_image = Image which has to labelled
#Mask image = Which has been labelled by some technique..
def crf(original_image, mask_img):
    
    # Converting annotated image to RGB if it is Gray scale
    if(len(mask_img.shape)<3):
        mask_img = gray2rgb(mask_img)

#     #Converting the annotations RGB color to single 32 bit integer
    annotated_label = mask_img[:,:,0] + (mask_img[:,:,1]<<8) + (mask_img[:,:,2]<<16)
    
#     # Convert the 32bit integer color to 0,1, 2, ... labels.
    colors, labels = np.unique(annotated_label, return_inverse=True)

    n_labels = 2
    
    #Setting up the CRF model
    #d = dcrf.DenseCRF2D(original_image.shape[1], original_image.shape[0], n_labels)
    d = dcrf.DenseCRF2D(original_image.shape[1], original_image.shape[2], n_labels)
    #print( original_image.shape)
    # get unary potentials (neg log probability)
    #print(len(labels),n_labels)
    U = unary_from_labels(labels, n_labels, gt_prob=0.7, zero_unsure=False)
    #print(U.shape)
    d.setUnaryEnergy(U)

    # This adds the color-independent term, features are the locations only.
    d.addPairwiseGaussian(sxy=(3, 3), compat=3, kernel=dcrf.DIAG_KERNEL,
                      normalization=dcrf.NORMALIZE_SYMMETRIC)
        
    #Run Inference for 30 steps 
    Q = d.inference(6)

    # Find out the most probable class for each pixel.
    MAP = np.argmax(Q, axis=0)

    return MAP.reshape((original_image.shape[1],original_image.shape[2]))

In [None]:
df_sample.iloc[4]


In [None]:
names,preds = [],[]
print(df_sample.iloc[5])
for idx,row in tqdm(df_sample.iterrows(),total=len(df_sample)):
    
    print(f'###### File {idx} ######')
    f = cfg.data_path/'test'/f'{idx}.tiff'
    ds = HubmapDataset(f, stats, scale=cfg.scale, shift=cfg.shift, output_shape=cfg.tile_shape)
    
#     ds = TileDataset([f], scale=cfg.scale, tile_shape=cfg.tile_shape, padding=cfg.padding,**ds_kwargs)
    
    
    print('Predicting...')   
    pred = mp.predict(ds)
    pred = area_threshold(pred)
        
    print('Rezising...')
    shape = ds.data.shape
    nan_array =~ np. isnan(pred)
    
    print(pred[nan_array] [np.where(pred[nan_array] >0.15)].mean(),
          pred[nan_array] [np.where(pred[nan_array] >0.15)].std())
    #break
    pred = cv2.resize((pred*255).astype('uint8'), (shape[1], shape[0]))
    
    th = 0.2 if idx=='d488c759a' else cfg.threshold
    
    pred = (pred>th*255).astype(np.uint8)
    
    #convert to rle
    #https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
    rle = rle_encode_less_memory(pred)
    names.append(idx)
    preds.append(rle)
    
    print('Plotting')
    fig, ax = plt.subplots(figsize=(15,15))
    ax.imshow(cv2.resize(pred, (1024, 1024*shape[0]//shape[1])))
    plt.show()
    
    del pred
    gc.collect()
    

In [None]:
!ls  -l # 5534885 
df_sample.reset_index()

In [None]:
 

#np.nansum(pred).mean()

#nan_array =~ np. isnan(pred)
#pred[nan_array] [np.where(pred[nan_array] >0.2)].std()

df_sample.reset_index().to_csv('submission.csv',index=False)

In [None]:
print(df_sample.iloc[5])
df = pd.DataFrame({'id':names,'predicted':preds})
df.to_csv('submission.csv',index=False)
df.head()