Hello fellow Kagglers,

This notebook demonstrates the inference using the Mask-RCNN EfficientNetV2 implementation from [this](https://www.kaggle.com/markwijkhuizen/sartorius-mask-rcnn-efficientnetv2-train-2h) notebook.

The inference consists of predicting all instances for a test image, removing overlap and run-length encoding each instance.

**V2**

- Added per cell_type minimum confidence level
- Added per cell_type minimum size based on 1-percentile cell_type size
- Mask-RCNN model architecture improvements

**V3**
- Instance confidence level based on semantic mask trained in [this](https://www.kaggle.com/markwijkhuizen/sartorius-training-upsampling-tf-public) notebook. The predicted semantic mask predicts which pixel belong to any cell, it does not segment instances nor does it predict the cell type. It does however predicts whether a pixel belongs to an instance. The predicted instance mask is multiplied with the semantic mask and divided by the instance size to get a mean pixel confidence score. This score is thresholded to filter out instances which the semantic mask predictor does not recognise as cells.

In [None]:
# Library to silence Tensorflow Logs
!pip install -q /kaggle/input/maskrcnn-tf-2-efficientnetv2-caching/silence_tensorflow-1.1.1

import silence_tensorflow.auto

In [None]:
# Install LZ4 library from wheel file
!pip install -q /kaggle/input/maskrcnn-tf-2-efficientnetv2-caching/lz4-3.1.3-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
# Add the EfficientNetV2 and Mask-RCNN folder to path, which allows importing them
import sys
sys.path.append('/kaggle/input/maskrcnn-tf-2-efficientnetv2-caching/Instance_Segmentation/efficientnetv2')
sys.path.append('/kaggle/input/maskrcnn-tf-2-efficientnetv2-caching/Instance_Segmentation/Mask_RCNN')

In [None]:
import matplotlib.pyplot as plt
import mrcnn.utils as utils
import mrcnn.model as modellib
import numpy as np
import pandas as pd
import tensorflow as tf

import os
import sys
import json
import time
import skimage
import imageio
import glob
import effnetv2_model

from PIL import Image, ImageDraw
from tqdm.notebook import tqdm
from sklearn.model_selection import KFold
from PIL import Image, ImageEnhance
from mrcnn.config import Config
from mrcnn import visualize

# ignore warnings to make outputs clearer
import warnings
warnings.filterwarnings('ignore')

print(f'Python Version: {sys.version}')
print(f'Tensorflow Version: {tf.__version__}')
print(f'Tensorflow Keras Version: {tf.keras.__version__}')

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
# Image Dimension
HEIGHT = 520
WIDTH = 704
SHAPE = (HEIGHT, WIDTH)

# Image target dimension, divisible by 64
HEIGHT_TARGET = 576
WIDTH_TARGET = 704
SHAPE_TARGET = (HEIGHT_TARGET, WIDTH_TARGET)
BATCH_SIZE = 1

DEBUG = True
DEBUG_SIZE = 50

# Make Mark-RCNN a pixel confidence mask instead of a binary mask
PIXEL_CONFIDENCE_MASKS = True

In [None]:
train = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')

# Unique Cell Names
CELL_NAMES = np.sort(train['cell_type'].unique())
print(f'CELL_NAMES: {CELL_NAMES}')

# Inference Config

- **THRESHOLD_DICT**: per cell_type instance threshold, instances need a minimum confidence to be included
- **MIN_SIZE_DICT**: per cell_type minimum instance pixel size, instances need a minimum pixel size to be included
- **MIN_CONFIDENCE_PRED_DICT**: per cell_type minimum semantic mask confidence, instance should have a minimum intensity in the predicted semantic mask
- **MASK_THRESHOLDS_DICT**: per cell type minimum pixel confidence, pixels in a predicted mask need a minimum confidence to be included

In [None]:
# Threshold Dictionar
THRESHOLD_DICT = {
    'astro': 0.70,  # Large Bodies
    'cort': 0.95,   # Sparse Dots
    'shsy5y': 0.70, # Many Connected Dots
}

LABEL2THRESHOLD = dict([(CELL_NAMES.tolist().index(name) + 1, THRESHOLD_DICT[name]) for name in CELL_NAMES])
print(f'LABEL2THRESHOLD: {LABEL2THRESHOLD}')

# Minimum Instance Size Dictionary
MIN_SIZE_DICT = {
    'astro': 110,  # Large Bodies
    'cort': 60,   # Sparse Dots
    'shsy5y': 50, # Many Connected Dots
}
LABEL2MIN_SIZE = dict([(CELL_NAMES.tolist().index(name) + 1, MIN_SIZE_DICT[name]) for name in CELL_NAMES])
print(f'LABEL2MIN_SIZE: {LABEL2MIN_SIZE}')

# Minimum Mask Confidence
MIN_SEM_MASK_CONF_DICT = {
    'astro': 0.30,  # Large Bodies
    'cort': 0.30,   # Sparse Dots
    'shsy5y': 0.30, # Many Connected Dots
}
LABEL2MIN_SEM_MASK_CONF = dict([(CELL_NAMES.tolist().index(name) + 1, MIN_SEM_MASK_CONF_DICT[name]) for name in CELL_NAMES])
print(f'LABEL2MIN_SEM_MASK_CONF: {LABEL2MIN_SEM_MASK_CONF}')

# Minimum Mask Confidence
MASK_THRESHOLDS_DICT = {
    'astro': 0.30,  # Large Bodies
    'cort': 0.25,   # Sparse Dots
    'shsy5y': 0.40, # Many Connected Dots
}
LABEL2MASK_THRESHOLD = dict([(CELL_NAMES.tolist().index(name) + 1, MASK_THRESHOLDS_DICT[name]) for name in CELL_NAMES])
print(f'LABEL2MASK_THRESHOLD: {LABEL2MASK_THRESHOLD}')

# Mask-RCNN Inference Config

In [None]:
class InferenceConfig(Config):
    NAME = "cell"

    # Set batch size to 1.
    GPU_COUNT = 1
    IMAGES_PER_GPU = BATCH_SIZE
    
    # Number of Classes
    NUM_CLASSES = 1 + len(CELL_NAMES)

    # Image Dimensions
    IMAGE_MIN_DIM = HEIGHT_TARGET
    IMAGE_MAX_DIM = WIDTH_TARGET
    IMAGE_SHAPE = [HEIGHT_TARGET, WIDTH_TARGET, 3]
    IMAGE_RESIZE_MODE = 'none'

    STEPS_PER_EPOCH = DEBUG_SIZE if DEBUG else int(N_SAMPLES / BATCH_SIZE)
    
    BACKBONE = 'efficientnetv2-b0'

    # Training Structure
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    
    # Training Structure
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    # Regions of Interest
    PRE_NMS_LIMIT = 6000
    # Non Max Supression
    POST_NMS_ROIS_TRAINING = 2000
    POST_NMS_ROIS_INFERENCE = 2000
    # Instances
    MAX_GT_INSTANCES = 790
    TRAIN_ROIS_PER_IMAGE = 200
    DETECTION_MAX_INSTANCES = 400
    
    # Thresholds
    RPN_NMS_THRESHOLD = 0.70        # IoU Threshold for RPN proposals and GT
    DETECTION_MIN_CONFIDENCE = 0.50 # Non-Background Confidence Threshold
    DETECTION_NMS_THRESHOLD = 0.30  # IoU Threshold for ROI and GT
    ROI_POSITIVE_RATIO = 0.33
    
    # Mini Mask
    USE_MINI_MASK = False
    MINI_MASK_SHAPE = (112, 112)
    MASK_SHAPE = (56, 56)
    
    TRAIN_BN = False
    
    # Learning Rate
    LEARNING_RATE = 0.004
    N_WARMUP_STEPS = 2
    
    PIXEL_CONFIDENCE_MASKS = PIXEL_CONFIDENCE_MASKS

inference_config = InferenceConfig()
inference_config.display()

# Load Mask-RCNN Model

In [None]:
# Recreate the model in inference mode
model_dir = '/kaggle/input/sartorius-maskrcnn-efficientnetv2-dataset/model_checkpoints'
model = modellib.MaskRCNN(mode='inference', config=inference_config, model_dir=model_dir)

In [None]:
# Set EfficientNetV2-B0 Head Untrainable
if 'efficientnetv2-b' in  inference_config.BACKBONE:
    model.keras_model.layers[1].layers[-1].trainable = False

# Find Last Model
model_path = model.find_last()

# Load Model Weights
print(f'Loading weights from: {model_path}')
model.load_weights(model_path, by_name=True)

# Semantic Segmentation Model

In [None]:
# Inspiration: https://www.tensorflow.org/tutorials/generative/pix2pix#build_an_input_pipeline_with_tfdata
def upsample(x, concat, filters, size, name, dropout=0.0):
    initializer = tf.random_normal_initializer(0., 0.02)

    x = tf.keras.layers.Conv2DTranspose(
            filters, # Number of Convolutional Filters
            size, # Kernel Size
            strides=2, # Kernel Steps
            padding='SAME', # Keep Dimensions
            kernel_initializer=initializer, # Weight Initializer
            use_bias=False, # Do not use Bias only Weights
            name=f'Conv2DTranspose_{name}' # Name of Layer
        )(x)
    
    x = tf.keras.layers.BatchNormalization(name=f'BatchNormalization_{name}')(x)

    if dropout > 0.0:
        x = tf.keras.layers.Dropout(dropout, name=f'Dropout_{name}')(x)

    x = tf.keras.layers.ReLU(name=f'ReLy_{name}')(x)
    x = tf.keras.layers.Concatenate(name=f'Concatenate_{name}')([x, concat])

    return x

In [None]:
def get_semantic_segmentation_model():
    # enable XLA optmizations
    tf.config.optimizer.set_jit(True)
    
    # EfficientNetV2 Backbone
    cnn = effnetv2_model.get_model(f'efficientnetv2-s', include_top=False, weights=None)

    # Inputs, note the names are equal to the dictionary keys in the dataset
    grayscale_image = tf.keras.layers.Input([HEIGHT_TARGET, WIDTH_TARGET, 1], name='image', dtype=tf.float32)

    # CNN call, we need only the output layer
    rgb_image = tf.keras.layers.Conv2D(3, kernel_size=1, strides=1)(grayscale_image)
    embedding, up5, up4, up3, up2, up1 = cnn(rgb_image, with_endpoints=True)
    print(f'embedding shape: {embedding.shape} up1 shape: {up1.shape}, up2 shape: {up2.shape}')
    print(f'up3 shape: {up3.shape}, up4 shape: {up4.shape}, up5 shape: {up5.shape}')


    x = upsample(up1, up2, up2.shape[-1] * 2, 3, 'upsample1_17x22', dropout=0.00)
    x = upsample(x, up3, up3.shape[-1] * 2, 3, 'upsample2_34x44', dropout=0.00)
    x = upsample(x, up4, up4.shape[-1] * 2, 3, 'upsample3_68x88', dropout=0.00)
    x = upsample(x, up5, up5.shape[-1] * 2, 3, 'upsample4_136x176', dropout=0.00)

    output = tf.keras.layers.Conv2DTranspose(
            filters=1,
            kernel_size=3,
            strides=2,
            padding='same',
            activation='sigmoid'
        )(x)

    model = tf.keras.models.Model(inputs=grayscale_image, outputs=output)
    model.load_weights('/kaggle/input/sartorius-training-upsampling-tf-public-dataset/model.h5')
    model.trainable = False

    return model

In [None]:
semantic_segmentation_model = get_semantic_segmentation_model()

In [None]:
semantic_segmentation_model.summary()

# Utility Functions

In [None]:
# Submission raises error when there is overlap between instances
# This function removes the overlaps
# Inspiration: https://www.kaggle.com/susnato/sartorius-segmentation-mask-rcnn-tf-inference
def fix_overlap(msk0):
    """
    Args:
        mask: multi-channel mask, each channel is an instance of cell, shape:(520,704,None)
    Returns:
        multi-channel mask with non-overlapping values, shape:(520,704,None)
    """
    if PIXEL_CONFIDENCE_MASKS:
        msk = np.where(msk0 >= 0.5, 1, 0).astype(np.bool)
    else:
        msk = np.array(msk0)
        
    msk = np.pad(msk, [[0,0],[0,0],[1,0]])
    ins_len = msk.shape[-1]
    msk = np.argmax(msk,axis=-1)
    msk = tf.keras.utils.to_categorical(msk, num_classes=ins_len)
    msk = msk[...,1:]
    msk = msk * msk0
    return msk

def has_overlap(msk):
    msk = msk.astype(np.bool).astype(np.uint8) # binary mask
    return np.any(np.sum(msk, axis=-1)>1) # only one channgel will contain value

In [None]:
# Run Length Encode an instance
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    img = unpad_image(img)
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

# Padding Utility Functions

In [None]:
# Removes the padding from an image
def unpad_image(image):
    offset_h = (HEIGHT_TARGET - HEIGHT) // 2
    offset_w = (WIDTH_TARGET - WIDTH) // 2
    
    return image[offset_h:offset_h+HEIGHT, offset_w:offset_w+WIDTH]

In [None]:
# Pads an image
def pad_image(image, constant_values):
    pad_h = (HEIGHT_TARGET - HEIGHT) // 2
    pad_w = (WIDTH_TARGET - WIDTH) // 2
    
    return np.pad(image, ((pad_h, pad_h), (pad_w, pad_w)), constant_values=constant_values)

# Prepare Test Images

In [None]:
!rm -rf test ; mkdir test

In [None]:
# Create padded images with contrast enhancement
for file_path in tqdm(glob.glob('/kaggle/input/sartorius-cell-instance-segmentation/test/*.png')):
    # ReadImage
    image = imageio.imread(file_path)
    # Pad Image
    image = pad_image(image, 128)
    
    # Create "RGB" image by stacking the image 3 times
    image_mask_data_fusion = np.stack((image, image, image), axis=2)
    
    # Write image to working directory
    image_id = file_path.split('/')[-1].split('.')[0]
    imageio.imwrite(f'test/{image_id}.png', image_mask_data_fusion)

# Prediction Loop

In [None]:
# Submission Dictionary
submission_dict = {
    'id': [],
    'predicted': [],
}

for idx ,file_path in enumerate(tqdm(sorted(glob.glob('test/*.png')))):
    # Image ID
    image_id = file_path.split('/')[-1].split('.')[0]
    # Load Image
    img = skimage.io.imread(file_path)
    # Get Mask-RCNN Predictions
    results = model.detect([img], verbose= idx < 3)
    r = results[0]
    
    # Predicted Semantic Mask
    img_norm = (((img[:,:,0].astype(np.float32) - 128) / 128) / 0.589)
    img_batch = np.expand_dims(img_norm, [0, 3])
    semantic_mask_pred = semantic_segmentation_model.predict_on_batch(img_batch).squeeze()
    
    plt.figure(figsize=(16, 16))
    plt.title(f'Predicted Semantic Mask {image_id}', size=24)
    plt.imshow(semantic_mask_pred)
    plt.axis(False)
    plt.show()
    
    # Plot Prediction for the 3 Test Images
    if idx < 3:
        print(f'image_id: {image_id}')
        visualize.display_instances(
            img,
            r['rois'],
            r['masks'],
            r['class_ids'],
            ['BG'] + CELL_NAMES.tolist(),
            r['scores'],
            figsize=(16,16)
        )
    
    # Optional Pixel Confidence to Binary Mask Conversion
    if PIXEL_CONFIDENCE_MASKS:
        mask_thresholds = [LABEL2MASK_THRESHOLD.get(class_id) for class_id in r['class_ids']]
        r['masks'] = r['masks'] > mask_thresholds
        
    # Move Axis of Predicted Mask [W, H, N_INSTANCES] -> [N_INSTANCES, W, H]
    masks_fixed = np.moveaxis(fix_overlap(r['masks']), -1, 0)
    # Loop Through Instances
    for idx, (m, score, label) in enumerate(zip(masks_fixed, r['scores'], r['class_ids'])):
        # Cast to boolean mask with per cell_type mask threshold
        if PIXEL_CONFIDENCE_MASKS:
            m = m >= LABEL2MASK_THRESHOLD[label]

        # Instance Size, sum will count True values
        m_size = m.sum()
        
        # Predict Confidence using the predicted mask
        semantic_mask__pred_confidence = (semantic_mask_pred * m).sum() / m_size
        
        # Requirements for instance to be included
        # Minimum Instance Confidence and Minimum Instance Size and Semantic Mask Confidence
        guard = score > LABEL2THRESHOLD[label] and m_size > LABEL2MIN_SIZE[label] and semantic_mask__pred_confidence > LABEL2MIN_SEM_MASK_CONF[label]
        
        # Always Include first instance to have at least 1 instance per test image, prevents submission error
        if idx == 0 or guard:
            # Image Id
            submission_dict['id'].append(image_id)
            # Instance Mask
            submission_dict['predicted'].append(rle_encode(m))

# Submission

In [None]:
# Create a Pandas DataFrame from prediction dictionary
submission = pd.DataFrame.from_dict(submission_dict)

In [None]:
display(submission.head())

In [None]:
display(submission.info())

In [None]:
# Save submission as CSV
submission.to_csv('submission.csv', index=False)