# HuBMAP + HPA fastai inference
The U-Net trained model with the best accuracy is used from the previous step for inference on the test set of the competition. The predicted mask image is upsampled to the dimension of the original image and is converted to a RLE format before submission. 

This notebook achieved the following scores:
* Public Score: **0.44730**
* Private Score: **0.34321**

_Notes:_ 
* _The private score is not shown to the participants until after the competition is concluded and it comprises 50% of the test set_
* _This notebook is using fastai v2._

In [1]:
from fastai.vision.all import *
from fastai.callback.all import *
from fastai.basics import *
import numpy as np
import pandas as pd
import PIL
import cv2

In [2]:
SEED = 88

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    #the following line gives ~10% speedup
    #but may lead to some stochasticity in the results 
#     torch.backends.cudnn.benchmark = True
    
seed_everything(SEED)

In [27]:
# Load the best model which was exported previously during training
MODEL_NAME = "/kaggle/input/simple-fastai-baseline-and-submission/export.pkl"
learn = load_learner(MODEL_NAME)
learn.model_dir='/kaggle/working/'

## Inference with the test set

In [30]:
def rle_encode_less_memory(img):
''' Convert a mask to rle encoded format
    Input: a mask of unsigned integers of 0s or 1s as a numpy array
    Output: A rle encoded string 
'''    
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

In [29]:
# Locations of the test files
DATASET_FOLDER = '../input/hubmap-organ-segmentation'
df_test = pd.read_csv(os.path.join(DATASET_FOLDER, "test.csv"))
test_img = Path(DATASET_FOLDER)/"test_images"
test_files = L([test_img/f'{tid}.tiff' for tid in df_test["id"]])
test_files

(#1) [Path('../input/hubmap-organ-segmentation/test_images/10078.tiff')]

In [31]:
# Copy structure of submission.csv
submission = pd.read_csv(Path(DATASET_FOLDER)/'sample_submission.csv')
submission["id"] = df_test["id"]
# Load the test set
test_dl = learn.dls.test_dl(test_files)
# Inference on the test files
_, _, decoded = learn.get_preds(dl=test_dl, with_decoded=True)
#     learn.show_results(ds_idx=1, dl=test_dl, max_n=6,nrows=1, figsize=(10,10))
rles = []
for idx, size in enumerate(zip(df_test["img_height"], df_test["img_width"])):
    # Upsample every mask to original image dimension (height, width)
    upS = nn.Upsample(size=size, mode='nearest')
    mask = decoded[idx].unsqueeze(0).unsqueeze(0)
    # If mask is not empty
    if mask.any():
        mask = upS(mask.float())
        mask = (mask > 1e-6).to(torch.uint8).squeeze(0).squeeze(0).numpy()
    else:
        # A random sized circular mask
        fraction = np.random.rand()
        mask = np.zeros(size)
        mask = cv2.circle(mask, (int(np.round(size[1]/2)), int(np.round(size[0]/2))), 
                          int(np.round((size[1]/2)*fraction)), 1, -1)
    # Encode mask to RLE string
    rle = rle_encode_less_memory(mask)
    rles.append(rle)
        
submission["rle"] = rles

## Submission of the results

In [32]:
display(submission.head())

Unnamed: 0,id,rle
0,10078,9670 4 11693 4 13716 4 15739 4 17758 12 17782 8 19781 12 19805 8 21804 12 21828 8 23827 12 23851 8 24716 12 25842 4 25850 40 26739 12 27865 4 27873 40 28762 12 29888 4 29896 40 30785 12 31911 4 31919 40 32788 4 32804 32 33930 52 34811 4 34827 32 35953 52 36834 4 36850 32 37976 52 38857 4 38873 32 39999 52 40872 8 40896 51 42022 52 42895 8 42919 51 44045 52 44918 8 44942 51 46068 52 46941 8 46965 51 48091 52 48957 11 48988 48 50110 52 50980 11 51011 48 52133 52 53003 11 53034 48 54156 52 55026 11 55057 48 56179 52 57049 19 57072 48 58186 4 58202 52 59072 19 59095 48 60209 4 60225 52 61095 1...


In [34]:
submission.to_csv("submission.csv", index=False)