Credits:
* https://www.kaggle.com/rdizzl3/hpa-segmentation-masks-no-internet
* https://www.kaggle.com/frlemarchand/generate-masks-from-weak-image-level-labels/


# Installation

In [None]:
!pip install -q "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install -q "../input/hpapytorchzoozip/pytorch_zoo-master"
!pip install -q "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"

In [None]:
import os
# Making pretrained weights work without needing to find the default filename
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
        os.makedirs('/root/.cache/torch/hub/checkpoints/')
# !cp '../input/resnet50/resnet50.pth' '/root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth'
!cp '../input/resnet34/resnet34.pth' '/root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth'

In [None]:
from fastai.vision.all import *
import pandas as pd
import numpy as np
from tqdm.autonotebook import tqdm
import imageio

In [None]:
def build_image_names(image_id: str) -> list:
    # mt is the mitchondria
    mt = f'/kaggle/input/hpa-single-cell-image-classification/test/{image_id}_red.png'    
    # er is the endoplasmic reticulum
    er = f'/kaggle/input/hpa-single-cell-image-classification/test/{image_id}_yellow.png'    
    # nu is the nuclei
    nu = f'/kaggle/input/hpa-single-cell-image-classification/test/{image_id}_blue.png'    
    return [[mt], [er], [nu]]

In [None]:
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

NUC_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth'
CELL_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth'

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device='cuda',
    padding=True,
    multi_channel_model=True
)

In [None]:
import base64
import numpy as np
from pycocotools import _mask as coco_mask
import typing as t
import zlib


def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str.decode('ascii')

# Inference

In [None]:
tpath = Path('../input/hpa-single-cell-image-classification')

In [None]:
sub = pd.read_csv(tpath/'sample_submission.csv')

In [None]:
# sub = sub.sample(frac=0.03)
sub.ImageWidth.value_counts()

In [None]:
sub_dfs = []
for dim in sub.ImageWidth.unique():
    df = sub[sub['ImageWidth'] == dim].copy().reset_index(drop=True)
    sub_dfs.append(df)

In [None]:
bs = 8
for sub in sub_dfs:
    print(f'Starting prediction for image size: {sub.ImageWidth.loc[0]}')
    for start in range(0, len(sub), bs):
        if start + bs > len(sub): end = len(sub)
        else: end = start + bs
            
        images = []
        for row in range(start, end):
            image_id = sub['ID'].loc[row]
            img = build_image_names(image_id=image_id)
            images.append(img)

        images = np.stack(images).squeeze()
        images = np.transpose(images).tolist()

        try: 
            nuc_segmentations = segmentator.pred_nuclei(images[2])
            cell_segmentations = segmentator.pred_cells(images)

            predstrings = []
            for i in tqdm(range(len(cell_segmentations))):
                _, cell_mask = label_cell(nuc_segmentations[i], cell_segmentations[i])
                predstring = ''
                for j in range(np.max(cell_mask)):
                    bmask = (cell_mask == j)
                    enc = encode_binary_mask(bmask)
                    predstring += '0 1 ' + enc + ' '
                predstrings.append(predstring)

            assert len(predstrings) == len(sub.loc[start:end-1])
            sub['PredictionString'].loc[start:end-1] = predstrings
            
        except: continue

In [None]:
all_subs = pd.concat(sub_dfs, ignore_index=True, sort=False)

In [None]:
all_subs.to_csv('submission.csv', index=False)

In [None]:
all_subs.head()

In [None]:
all_subs.tail()