# Cell instance segmentation

## Check the native relevant configurations of kaggle env

In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version
# Check python version
!python --version

Install essential packages

### Install and check PyTorch

In [None]:
# Replace the current torch version with the following specified one

!pip install '../input/cellis/mmdetection_cuda110/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '../input/cellis/mmdetection_cuda110/torchvision-0.8.0-cp37-cp37m-linux_x86_64.whl' --no-deps

In [None]:
# Check Pytorch installation on Virtual Machine of Google cloud

torch_version = ''
try:
    import torch, torchvision
    torch_version = torch.__version__
    print(torch.__version__)
    if torch.cuda.is_available():
        cur_cuda_version = torch_version.split('+')[1]
        print(f'cuda is available and its version: {cur_cuda_version}')
    else:
        print('cuda is not available')
except ImportError as error:
    # Output expected ImportErrors.
    print(error.__class__.__name__ + ": " + error.message)
    # install pytorch again
    !pip install '../input/cellis/mmdetection_cuda110/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
    !pip install '../input/cellis/mmdetection_cuda110/torchvision-0.8.0-cp37-cp37m-linux_x86_64.whl' --no-deps
    import torch, torchvision
    torch_version = torch.__version__
    print(torch.__version__)
    if torch.cuda.is_available():
        cur_cuda_version = torch_version.split('+')[1]
        print(f'cuda is available and its version: {cur_cuda_version}')
    else:
        print('cuda is not available')

In [None]:
# check the gpu device being used

if torch.cuda.is_available():
  print('the device name is:')
  print(torch.cuda.get_device_name(torch.cuda.current_device()))
  print('Memory Usage:')
  print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
  print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
else:
  print('No gpu device is available.')

### Install other relevant packages for mmdetection

In [None]:
# Install mmcv-full and its dependencies

!pip install '/kaggle/input/cellis/mmdetection_cuda110/addict-2.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/yapf-0.31.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/terminal-0.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/terminaltables-3.1.0-py3-none-any.whl' --no-deps
#!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmcv_full-1.3.17-cp37-cp37m-manylinux1_x86_64.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmcv_full-1.4.0-cp37-cp37m-manylinux1_x86_64.whl' --no-deps

# Install coco API
!rm -rf pycocotools-2.0.3
!rm -rf mmpycocotools-12.0.3
!cp -r /kaggle/input/cellis/mmdetection_cuda110/pycocotools-2.0.3 /kaggle/working/
!cp -r /kaggle/input/cellis/mmdetection_cuda110/mmpycocotools-12.0.3 /kaggle/working/
!pip install '/kaggle/working/pycocotools-2.0.3/pycocotools-2.0.3' --no-deps
!pip install '/kaggle/working/mmpycocotools-12.0.3/mmpycocotools-12.0.3' --no-deps

In [None]:
# Install dependencies of mmdetection (CBNetV2) first according to its following requirements at a full scale
## -r requirements/build.txt
## -r requirements/optional.txt
## -r requirements/runtime.txt
## -r requirements/tests.txt

!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/codecov-2.1.12-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/interrogate-1.5.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/isort-4.3.21-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/ubelt-0.10.2-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/kwarray-0.5.21-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/onnx-1.7.0-cp37-cp37m-manylinux1_x86_64.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/onnxruntime-1.5.1-cp37-cp37m-manylinux2014_x86_64.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/xdoctest-0.12.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/imagecorruptions-1.1.2-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/timm-0.4.12-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/pathlib2-2.3.6-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/cityscapesScripts-2.2.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/coloredlogs-15.0.1-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/humanfriendly-10.0-py2.py3-none-any.whl' --no-deps
!rm -rf typing-3.7.4.3
!cp -r /kaggle/input/cellis/mmdetection_cuda110/mmdet-depends/typing-3.7.4.3 /kaggle/working/
!pip install '/kaggle/working/typing-3.7.4.3/typing-3.7.4.3' --no-deps

In [None]:
# Install apex and its dependencies which are specified for cbnetv2

!pip install '/kaggle/input/cellis/mmdetection_cuda110/cxxfilt-0.3.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/cellis/mmdetection_cuda110/Sphinx-4.1.2-py3-none-any.whl' --no-deps
!rm -rf apex
!cp -r /kaggle/input/cellis/mmdetection_cuda110/apex /kaggle/working/
%cd /kaggle/working/apex
!pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./

# to go back to /kaggle/working/
%cd .. 

### Install and configure CBNetV2 for cell instance segmentation

In [None]:
## Install cbnetv2, a version based on mmdetection 2.14.0
!rm -rf CBNetV2
!rm -rf cbnetv2

!cp -r /kaggle/input/cellis/mmdetection_cuda110/CBNetV2 /kaggle/working/
!mv /kaggle/working/CBNetV2 /kaggle/working/cbnetv2
%cd /kaggle/working/cbnetv2
!pip install -e .
# or !python setup.py develop

# to go back to /kaggle/working/
%cd .. 


In [None]:
# Check the installation w.r.t. its effectiveness
## check TensorFlow installation when it is needed and installed above
## import tensorflow as tf
## tf.test.gpu_device_name()
## Standard output is '/device:GPU:0'

#include cbnetv2 to PYTHONPATH
import sys
sys.path.insert(0, "/kaggle/working/cbnetv2/")
# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

# import other useful python packages
import pathlib2
import os
print(f'the current work dir is: {os.getcwd()}')
# print(os.path.dirname(os.getcwd()))
import time
import os.path as osp
import PIL
import numpy as np
import pandas as pd
import mmcv
import matplotlib.pyplot as plt
import cv2

# set up checkpoints dir for storing the checkpoints
!mkdir -p cbnetv2/checkpoints

### Macros definition

In [None]:
from easydict import EasyDict as edict

__C = edict()
local_cfg = __C

__C.USE_GOOGLE_DRIVE = False
__C.CURRENT_DIR = pathlib2.Path.cwd()
print("current dir: ", __C.CURRENT_DIR.as_posix())
# the highest level dir that holds all relevant files
__C.PROJECT_DIR = __C.CURRENT_DIR.parent
print("project dir: ", __C.PROJECT_DIR.as_posix())
__C.DATA_DIR = __C.PROJECT_DIR / 'input' / 'sartorius-cell-instance-segmentation'
print("dataset dir: ", __C.DATA_DIR.as_posix())

__C.RANDOM_STATE = 42

## Define relevant path variables

In [None]:
livecell_ds_path = local_cfg.DATA_DIR / 'LIVECell_dataset_2021'
print("livecell dataset dir: ", livecell_ds_path.as_posix())
# TODO the following syntax "livecell_ds_path.iterdir()" seems like to 
# be not deterministic about the order of returned element
# Please check it!
livecell_ds_imgs_path = None
livecell_ds_annots_path = None
livecell_ds_path_list = [x for x in livecell_ds_path.iterdir() if x.is_dir()]
for path_item in livecell_ds_path_list:
  if 'annotations' in path_item.as_posix():
    livecell_ds_annots_path = path_item
  else:
    livecell_ds_imgs_path = path_item

print("livecell dataset annot dir: ", livecell_ds_annots_path.as_posix())
print("livecell dataset image dir: ", livecell_ds_imgs_path.as_posix())

livecell_train_meta_path = livecell_ds_annots_path / 'LIVECell' / 'livecell_coco_train.json'
livecell_val_meta_path = livecell_ds_annots_path / 'LIVECell' / 'livecell_coco_val.json'
livecell_test_meta_path = livecell_ds_annots_path / 'LIVECell' / 'livecell_coco_test.json'

livecell_train_val_img_path = livecell_ds_imgs_path / 'livecell_train_val_images'
livecell_test_img_path = livecell_ds_imgs_path / 'livecell_test_images'

In [None]:
# Let's take a look at the dataset image

img = mmcv.imread((livecell_test_img_path / 'A172' / 'A172_Phase_C7_1_00d00h00m_1.tif').as_posix())
plt.figure(figsize=(15, 10))
plt.imshow(mmcv.bgr2rgb(img))
plt.show()

## Perform inference

### Configure the cbnetv2 for cell instance segmentation

**Please note: the following code block as configuring one for cell instance segmentation inference is only temporary.**

In [None]:
# THIS IS A TEMPORARY CONFIG FILE FOR CELL INSTANCE SEGMENTATION INFERENCE
!cp /kaggle/input/competckp/mask_rcnn_r50_fpn_compet.py /kaggle/working/cbnetv2/configs/mask_rcnn
#check the copy results
!ls /kaggle/working/cbnetv2/configs/mask_rcnn
# THIS IS A TEMPORARY CHECKPOINT FOR CELL INSTANCE SEGMENTATION INFERENCE
!cp /kaggle/input/competckp/epoch_10.pth /kaggle/working/cbnetv2/checkpoints/ 
#check the copy results
!ls /kaggle/working/cbnetv2/checkpoints/ 

### Initialize the detector based on given config file and checkpoint

In [None]:
from mmdet.apis import inference_detector, init_detector, show_result_pyplot


# Choose to use a config and initialize the detector
config = 'cbnetv2/configs/mask_rcnn/mask_rcnn_r50_fpn_compet.py'

# Setup a checkpoint file to load
checkpoint = 'cbnetv2/checkpoints/epoch_10.pth'

# initialize the detector
model = init_detector(config, checkpoint, device='cuda:0')

In [None]:
# msk_lst: list of numpy arrays of shape (520,704)

def check_overlap(msks):
    """
    check if the masks of each object in the image are overlapped with each other
    :param msks: list a list of numpy ndarrays standing for binary masks
    :return: boolean True if overlapping, False otherwise
    """
    msk = np.concatenate([msk[..., np.newaxis] for msk in msks], axis=-1) # (520, 704, None)
    msk = msk.astype(np.bool).astype(np.uint8)
    return np.any(np.sum(msk, axis=-1)>1)

def sort_msks(msks, c_order):
    """
    sort masks of objects according to the order in terms of ascending row/column first and ascending position second
    :param msks: list a list of numpy ndarrays standing for binary masks
    :param c_order: boolean True if row should be first False if column
    :return:
    """
    if c_order:
        msks = msks
        flats = [msk.flatten() for msk in msks]
        pos_1st_non_zero_lst = [np.where(flat==1)[0][0] if np.all((flat!= 0)) else len(flats)+100 for flat in flats]
        sorted_indices = sorted(range(len(pos_1st_non_zero_lst)), key=lambda k: pos_1st_non_zero_lst[k])
        sorted_msks = [msks[ind] for ind in sorted_indices]
    else:
        msks = [msk.T for msk in msks]  # len(msks)=None, (704,520)
        flats = [msk.flatten() for msk in msks]
        pos_1st_non_zero_lst = [np.where(flat==1)[0][0] if np.all((flat!= 0)) else len(flats)+100 for flat in flats]
        sorted_indices = sorted(range(len(pos_1st_non_zero_lst)), key=lambda k: pos_1st_non_zero_lst[k])
        sorted_msks = [msks[ind].T for ind in sorted_indices]

    return sorted_msks

def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor: the same as tf.keras.utils.to_categorical w.r.t. functionality"""
    return np.eye(num_classes, dtype='uint8')[y]

def fix_overlap(msk_lst, random=True, c_order=False):
    """
    Args:
        msk_lst: multi-channel mask, each channel is an instance of cell, shape:(520,704,None)
    Returns:
        multi-channel mask with non-overlapping values, shape:(520,704,None)
    """

    if random:
        pass
    else:
        msk_lst = sort_msks(msk_lst, c_order)
    msks = np.concatenate([msk[..., np.newaxis] for msk in msk_lst], axis=-1) # (520, 704, None)
    msks = np.pad(msks, [[0,0],[0,0],[1,0]])
    ins_len = msks.shape[-1]
    msk = np.argmax(msks, axis=-1) # (520, 704)
    msk = to_categorical(msk, ins_len)
    # msk = tf.keras.utils.to_categorical(msk, num_classes=ins_len)
    msk = msk[...,1:]
    msk = msk[...,np.any(msk, axis=(0,1))]  # remove only-zero-valued mask

    return msk

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    #img = np.transpose(img)  # since the right numbered order should be first from left to right, then top to bottom
    #print(img.flags['F_CONTIGUOUS'])
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
    
def create_pred_result(model, test_imgs):
    for test_img in test_imgs:
        result = inference_detector(model, test_img)
        name_id = os.path.basename(test_img).split('.')[0]
        yield name_id, result

In [None]:
## TEMP for testing the consumption of memory 
'''
import shutil
!rm -rf /kaggle/working/target_ram
!mkdir -p /kaggle/working/target_ram
train_dir_ram = '/kaggle/input/sartorius-cell-instance-segmentation/train_semi_supervised'
target_dir_ram = '/kaggle/working/target_ram'
sample_cnts = 270
test_imgs_ram = [os.path.join(str(train_dir_ram), name) for name in os.listdir(str(train_dir_ram))]
cnt = 0
for test_img_ram in test_imgs_ram:
    if cnt < sample_cnts:
        target = os.path.join(target_dir_ram, test_img_ram.split('/')[-1])
        shutil.copyfile(test_img_ram, target)
    else:
        break
    cnt += 1
print(len([name for name in os.listdir(target_dir_ram)]))
'''

In [None]:
# TEMP for testing the consumption of memory 
'''
test_imgs_dir = '/kaggle/working/target_ram'
test_imgs = [os.path.join(str(test_imgs_dir), name) for name in os.listdir(str(test_imgs_dir))]
random = False # when true, the value of c_order does not matter
c_order = False # when true, read column first, otherwise, read row first
sort_rle = False # when true, the Kaggle rles should be sorted according to their start pos
sub_lst = []
pred_result_gen = create_pred_result(model, test_imgs)
'''

In [None]:
# TEMP for testing the consumption of memory 
'''
tst_result = next(pred_result_gen)
'''

In [None]:
# TEMP for testing the consumption of memory 
'''
test_id, value = tst_result
value[1][0][0].shape
'''

In [None]:
# TEMP for testing the consumption of memory 
'''
for test_id, value in pred_result_gen:
    msk_lst = value[1][0]
    result_df = pd.DataFrame(columns=['id', 'predicted'])
    overlap = check_overlap(msk_lst)
    if overlap:
        msk = fix_overlap(msk_lst, random, c_order)  # (520,704,None)
        msks_lst = list(np.transpose(msk, (2,0,1)))
        print(f"masks list has length: {len(msks_lst)}")
    else:
        msks_lst = msk_lst
        print(f"masks list has length: {len(msks_lst)}")
    rle = list(map(rle_encode, msks_lst))
    print(f"rles list has length: {len(rle)}")
    if sort_rle:
        rle_1st_ind_lst = [int(rle_str.split(' ')[0]) for rle_str in rle]
        sorted_indices = sorted(range(len(rle_1st_ind_lst)), key=lambda k: rle_1st_ind_lst[k])
        rle = [rle[ind] for ind in sorted_indices]

    id_lst = [str(test_id)] * len(rle)
    print(f"the first item of id list is: {id_lst[0]}")
    result_df.loc[:, 'id'] = id_lst
    result_df.loc[:, 'predicted'] = rle
    sub_lst.append(result_df)
    print(f"submission list has length: {len(sub_lst)}")
    print("====================")

subdf = pd.concat(sub_lst)
print(f"subdf has length: {subdf.shape[0]}")
print(f"subdf has type of id: {subdf.loc[:, 'id'].dtype} and type of rle: {subdf.loc[:, 'predicted'].dtype}")
subdf.head()
'''

### Do inference and submit results

**The requirements when converting the binary masks to format required by Kaggle**:
- The rle must be output in kaggle format, and column first - row second
- The pairs are sorted, positive and the decoded pixel values are not duplicated
- no two predicted masks for the same image are overlapping

In [None]:
# Use the detector to do inference
test_imgs_dir = '/kaggle/input/sartorius-cell-instance-segmentation/test'
test_imgs = [os.path.join(str(test_imgs_dir), name) for name in os.listdir(str(test_imgs_dir))]
random = True # when true, the value of c_order does not matter
c_order = False # when true, read row first, otherwise, read column first
sort_rle = False # when true, the Kaggle rles should be sorted according to their start pos
sub_lst = []
pred_result_gen = create_pred_result(model, test_imgs)

In [None]:
# This code snippet is the one way to post process the inferred results, which requires the classes included in the result
THRESHOLDS = [.15, .35, .55]
MIN_PIXELS = [75, 150, 75]
#MIN_PIXELS = [1, 1, 1]

for test_id, value in pred_result_gen:
    result_df = pd.DataFrame(columns=['id', 'predicted'])
    mask_lst = []
    for ind, msk_lst in enumerate(value[1]):
        class_id = ind # 0->1st class (shsy5y), 1->2nd class (astro), 2->3rd class (cort)
        scores = value[0][class_id][:, 4]
        take = scores >= THRESHOLDS[class_id]
        pred_msks = np.array(msk_lst)[take]
        used = np.zeros((520, 704), dtype=int)
        for mask in pred_msks:
            mask = mask * (1-used)
            if mask.sum() >= MIN_PIXELS[class_id]: # skip predictions with small area
                used += mask
                mask_lst.append(mask)
                #res.append(rle_encode(mask))   
    if check_overlap(mask_lst):
        print('Overlap found!')
        msks = fix_overlap(mask_lst, random, c_order)  # return msk:(520,704,None)
        msks_lst = list(np.transpose(msks, (2,0,1)))  # return msks_lst: list(ndarray) ndarray:(520,704)
    else: 
        msks_lst = mask_lst
    res = list(map(rle_encode, msks_lst))
    id_lst = [str(test_id)] * len(res)
    result_df.loc[:, 'id'] = id_lst
    result_df.loc[:, 'predicted'] = res
    sub_lst.append(result_df)

subdf = pd.concat(sub_lst)
subdf.head()

In [None]:
if check_overlap(msks_lst):
    print('Overlap found!')
else:
    print("No overlap, pass")

In [None]:
!ls /kaggle/working/

In [None]:
'''
for test_id, value in pred_result_gen:
    msk_lst = value[1][0]
    result_df = pd.DataFrame(columns=['id', 'predicted'])
    overlap = check_overlap(msk_lst)
    if overlap:
        msk = fix_overlap(msk_lst, random, c_order)  # (520,704,None)
        msks_lst = list(np.transpose(msk, (2,0,1)))
    else:
        msks_lst = msk_lst
    rle = list(map(rle_encode, msks_lst))
    if sort_rle:
        rle_1st_ind_lst = [int(rle_str.split(' ')[0]) for rle_str in rle]
        sorted_indices = sorted(range(len(rle_1st_ind_lst)), key=lambda k: rle_1st_ind_lst[k])
        rle = [rle[ind] for ind in sorted_indices]

    id_lst = [str(test_id)] * len(rle)
    result_df.loc[:, 'id'] = id_lst
    result_df.loc[:, 'predicted'] = rle
    sub_lst.append(result_df)

subdf = pd.concat(sub_lst)
subdf.head()
'''

In [None]:
# Let's plot the result
# import random
# img_format = "png"
# sampled_id = random.choice(list(results.keys()))
# img_path = os.path.join(str(test_imgs_dir), '.'.join((sampled_id, img_format)))
# show_result_pyplot(model, img_path, results[sampled_id], score_thr=0.3)

In [None]:
subdf.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
%cd /kaggle/working
!ls

In [None]:
!rm -rf mmpycocotools-12.0.3
!rm -rf typing-3.7.4.3
!rm -rf apex
!rm -rf pycocotools-2.0.3
!rm -rf cbnetv2
#!rm -f __notebook_source__.ipynb
!ls
#!rm -rf target_ram