# Setups and Imports

In [None]:
%%capture
!pip install "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
!pip install "../input/hpapytorchzoozip/pytorch_zoo-master"
!pip install "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"

In [None]:
import tensorflow as tf
print(tf.__version__)

from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow_addons as tfa

# Ref: https://www.tensorflow.org/guide/gpu
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
import os
import re
import cv2
import glob
import imageio
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from skimage.transform import resize

%matplotlib inline

# HPA Segmentation tool related imports
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

# Imports for encoding binary masks
import base64
from pycocotools import _mask as coco_mask
import typing as t
import zlib

# Hyperparameters

In [None]:
WORKING_DIR_PATH = '../input/hpa-single-cell-image-classification/'

IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_SIZES = [1728, 2048, 3072] # available image sizes.

AUTOTUNE = tf.data.experimental.AUTOTUNE

LABELS = {
0: "Nucleoplasm",
1: "Nuclear membrane",
2: "Nucleoli",
3: "Nucleoli fibrillar center",
4: "Nuclear speckles",
5: "Nuclear bodies",
6: "Endoplasmic reticulum",
7: "Golgi apparatus",
8: "Intermediate filaments",
9: "Actin filaments",
10: "Microtubules",
11: "Mitotic spindle",
12: "Centrosome",
13: "Plasma membrane",
14: "Mitochondria",
15: "Aggresome",
16: "Cytosol",
17: "Vesicles and punctate cytosolic patterns",
18: "Negative"
}

PRED_THRESHOLD = 0.1

# Prepare Dataloader

In [None]:
df_submission = pd.read_csv(WORKING_DIR_PATH+'sample_submission.csv')
df_submission.head()

In [None]:
def decode_image(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_png(img, channels=1)
    # Normalize image
    img = tf.image.convert_image_dtype(img, dtype=tf.float32)

    return img

def parse_data(df_dict):
    # Get image names
    mt = WORKING_DIR_PATH+'test/'+df_dict['ID']+'_red.png'
    er = WORKING_DIR_PATH+'test/'+df_dict['ID']+'_yellow.png'
    nu = WORKING_DIR_PATH+'test/'+df_dict['ID']+'_blue.png'
    
    protein = WORKING_DIR_PATH+'test/'+df_dict['ID']+'_green.png'
    
    # Red channel
    red = tf.io.read_file(mt)
    red = decode_image(red)
    # Green channel
    green = tf.io.read_file(protein)
    green = decode_image(green)
    # Blue channel
    blue = tf.io.read_file(nu)
    blue = decode_image(blue)
    
    # Stack channels to make image
    image_for_classification = tf.experimental.numpy.dstack((red, green, blue))
    # resize image for classification as per trained model requirement
    image_for_classification = tf.image.resize(image_for_classification, [IMG_HEIGHT, IMG_WIDTH])
    
    return {'id': df_dict['ID'],
            'mt': mt,
            'er': er,
            'nu': nu,
            'image': image_for_classification}

In [None]:
# Consume submission CSV file
test_ds = tf.data.Dataset.from_tensor_slices(dict(df_submission))

# Test Dataset
test_ds = (
    test_ds
    .map(parse_data, num_parallel_calls=AUTOTUNE)
    .batch(1)
    .prefetch(AUTOTUNE)
)

### Visualize a sample

In [None]:
test_data_dict = next(iter(test_ds))
plt.imshow(tf.squeeze(test_data_dict['image']));

# Segmentation Model

In [None]:
NUC_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth"
CELL_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth"

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device="cuda",
    padding=True,
    multi_channel_model=True,
)

# Classification Model

In [None]:
MODEL_PATH = '../input/hpa-models/effnet_multi_label_1.h5'

tf.keras.backend.clear_session()
classifier = tf.keras.models.load_model(MODEL_PATH)
classifier.summary()

# Utilities

In [None]:
def encode_binary_mask(mask, mask_val):
  """Converts a binary mask into OID challenge encoding ascii text."""
  mask = np.where(mask==mask_val, True, False)
  
  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str

# Run Inference

In [None]:
# Returns true if the nucleus of the cell is on the edge of the image.
def is_border_nuclei(contour_points):
    unique_points = np.unique(contour_points)
    # basically if any point is 0 that means its touching the edge of the image.
    if 0 in unique_points:
        return True
    return False

def clean_nuclei_mask_vals(nuclei_mask):
    nuclei = np.unique(nuclei_mask)
    
    nuclei_list = []
    
    for nucleus in nuclei:
        # get inidivual nucleus mask
        nucleus_mask = np.where(nuclei_mask==nucleus, 1,0).astype('uint8')
        
        # get contour for cell and nucleus
        nucleus_cnts, _ = cv2.findContours(nucleus_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if not is_border_nuclei(nucleus_cnts[0]): # If not touching the boundary
            nuclei_list.append(nucleus)
        
    return nuclei_list

In [None]:
for data_dict in tqdm(test_ds):
    # Perform segmentation
    # For nuclei segmentation only blue channel is required.
    nuc_segmentation = segmentator.pred_nuclei([data_dict['nu'].numpy()[0].decode('UTF8')])
    # For full cells all the three reference(except green) channels are required.
    cell_segmentation = segmentator.pred_cells([[data_dict['mt'].numpy()[0].decode('UTF8')], 
                                                [data_dict['er'].numpy()[0].decode('UTF8')], 
                                                [data_dict['nu'].numpy()[0].decode('UTF8')]])
    # Get cell mask
    nuclei_mask, cell_mask = label_cell(nuc_segmentation[0], cell_segmentation[0])
    
    # Unique cell ids.
    cells = np.unique(cell_mask)
    # Get the unique nucleus ids not bordering the image.
    nuclei = clean_nuclei_mask_vals(nuclei_mask)
        
    # Perform classification - **Image level classification**
    preds = classifier.predict(data_dict['image'])[0]
    
    # Post process prediction scores
    threshold_mask = tf.math.greater(preds, [PRED_THRESHOLD])
    threshold_mask = tf.where(threshold_mask, x=preds[0], y=[0])
    vals, idxs = tf.math.top_k(threshold_mask, k=4)
    
    prediction_id = ""
    # post process
    for mask_val in cells[1:]:
        # Get rle encoded mask
        rle = encode_binary_mask(cell_mask, mask_val)
        
        # The cell with its nucleus bordering the image is discarded.
        if mask_val in nuclei:
            # **Assign same image level prediction to each segmented cell**
            if len(vals)==0: # no label greater than the PRED_THRESHOLD
                prediction_id += f"0 1 {rle.decode('utf-8')} " # assigning label 0 (most probable class) with confidence 1
            else:
                for val, idx in zip(vals, idxs): # multi-labeling
                    if val != 0.:
                        prediction_id += f"{idx} {val} {rle.decode('utf-8')} "            

    # Replace PredictionString in-place
    df_submission.loc[df_submission['ID'] == data_dict['id'], 'PredictionString'] = prediction_id

In [None]:
df_submission.to_csv("/kaggle/working/submission.csv", index=False)
df_submission.head()