# **KDDM 2 SARTORIUS CELL INSTANCE SEGMENTATION CASCADE MASK-RCNN (MMDETECTION) TRAIN AND INFERENCE NOTEBOOK**

# **INSTALL All DEPENDENCIES**
* **Install PyTorch version required by MMDetection**
* **Install MMDetection on and required dependencies**

In [None]:
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchvision-0.8.1+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchaudio-0.7.0-cp37-cp37m-linux_x86_64.whl' --no-deps

In [None]:
!pip install '/kaggle/input/mmdetectionv2140/addict-2.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/yapf-0.31.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminal-0.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminaltables-3.1.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmcv_full-1_3_8-cu110-torch1_7_0/mmcv_full-1.3.8-cp37-cp37m-manylinux1_x86_64.whl' --no-deps

!cp -r /kaggle/input/mmdetectionv2140/pycocotools-2.0.2/* /kaggle/working/
!cp -r /kaggle/input/mmdetectionv2140/mmpycocotools-12.0.3/* /kaggle/working/

!pip install '/kaggle/working/mmpycocotools-12.0.3/' --no-deps
!pip install '/kaggle/working/pycocotools-2.0.2/' --no-deps

!rm -rf /kaggle/working/pycocotools-2.0.2/
!rm -rf /kaggle/working/mmpycocotools-12.0.3/

!rm -rf mmdetection

!cp -r /kaggle/input/mmdetectionv2140/mmdetection-2.14.0/ /kaggle/working/mmdetection/
%cd /kaggle/working/mmdetection
!pip install -e .

* **Check installation**

In [None]:
import torch
print(torch.__version__, torch.cuda.is_available(),  torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'NO CUDA DEVICE')
!nvcc --version

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
import mmcv
print(mmcv.__version__)

from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print("Cuda:", get_compiling_cuda_version(), "Compiler:", get_compiler_version())

# **TRAINING**
* **Define model and settings for training**
* **Execute training**

* **Set MMDetection model config**

In [None]:
import mmcv

#choose a configuration
model_name = "cascade_mask_rcnn_r50_fpn_1x_coco"
#model_name = "cascade_mask_rcnn_r101_fpn_20e_coco"
#model_name = "cascade_mask_rcnn_x101_64x4d_fpn_1x_coco"

#load config
cfg_name = model_name + ".py"
cfg_file = "/kaggle/input/kddm2/mmdet/configs/" + cfg_name
cfg = mmcv.Config.fromfile(cfg_file)

###############################################################################################################

#optionally update some parameters in the config
cfg["data"]["train"]["ann_file"] = "/kaggle/input/kddm2/mmdet/annotation/sartorius_coco_train.json"
cfg["data"]["val"]["ann_file"] = "/kaggle/input/kddm2/mmdet/annotation/sartorius_coco_val.json"
cfg["data"]["test"]["ann_file"] = "/kaggle/input/kddm2/mmdet/annotation/sartorius_coco_val.json"
cfg["checkpoint_config"]["interval"] = 1
"""
cfg["optimizer"]["lr"] = 0.02 / 8
cfg["runner"]["max_epochs"] = 15
cfg["lr_config"] = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[10, 14]
)
"""

###############################################################################################################

#store updated config file
with open(cfg_name, "w") as f:
    f.write(cfg.pretty_text)
print(cfg.pretty_text)

* **Copy livecell dataset if using auxiliary data**

In [None]:
lc_folder = "/kaggle/input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/images/livecell_train_val_images/SHSY5Y"
sar_folder = "/kaggle/input/sartorius-cell-instance-segmentation/train"
target_folder = "/kaggle/working/data"

!rm -rf {target_folder}
!mkdir {target_folder}

import cv2, os, shutil
from tqdm import tqdm
if "_.py" in cfg_name:
    files = os.listdir(lc_folder)
    for i in tqdm(range(len(files))):
        fname = files[i]
        img = cv2.imread(os.path.join(lc_folder, fname))
        out_fname = fname.split('.')[0] + '.png'
        cv2.imwrite(os.path.join(target_folder, out_fname), img)
    files = os.listdir(sar_folder)
    for i in tqdm(range(len(files))):
        fname = files[i]
        shutil.copy(os.path.join(sar_folder, fname), os.path.join(target_folder, fname))
print("files:", len(os.listdir(target_folder)))

* **Train model**

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
import os
import albumentations

datasets = [build_dataset(cfg.data.train)]
model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES

mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True, meta=dict(config=cfg.pretty_text))

# **INFERENCE**

In [None]:
#https://www.kaggle.com/vexxingbanana/mmdetection-neuron-inference
#https://www.kaggle.com/qq1623620766/sartorius-mmdetection-infer#Submission
from mmdet.apis import inference_detector, init_detector, show_result_pyplot, set_random_seed
import cupy as cp
import numpy as np
import os
import pandas as pd
from pycocotools import mask as cocomask
from tqdm import tqdm


#convert masks to run-length-encoded masks
def mask2rle(msk):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    msk    = cp.array(msk)
    pixels = msk.flatten()
    pad    = cp.array([0])
    pixels = cp.concatenate([pad, pixels, pad])
    runs   = cp.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


#remove mask overlaps (not allowed in the competition)
def remove_overlapping_pixels(new_mask, old_masks):
    for old_mask in old_masks:
        pixel_in_both_masks = np.logical_and(new_mask, old_mask)
        if np.sum(pixel_in_both_masks) > 0:
            new_mask[pixel_in_both_masks] = 0
    return new_mask


output = {"rles": [], "ids": []}
confidence_thresholds = {0: 0.05, 1: 0.5, 2: 0.2} #confidence thresold per cell class
chkpt_file = cfg["work_dir"] + "/latest.pth" #select model checkpoint
model = init_detector(cfg, chkpt_file)
test_files = sorted(os.listdir("/kaggle/input/sartorius-cell-instance-segmentation/test"))

#iterate over test images and perform inference
for file in test_files:
    print("\nTEST FILE:", file)
    img = mmcv.imread('/kaggle/input/sartorius-cell-instance-segmentation/test/' + file)
    result = inference_detector(model, img)
    masks_for_image = []
    
    ################################################################################
    
    #get class with highest confidence score in this image and use only masks from this class
    detected_idx = -1
    highest_cnf = 0
    for i in range(len(result[0])):
        print("class {:d} has {:d} anns".format(i, result[0][i].shape[0]))
        if result[0][i].shape[0] == 0:
            continue
        highest_cnf_for_class = result[0][i][0,4] 
        if highest_cnf_for_class > highest_cnf:
            detected_idx = i
            highest_cnf = highest_cnf_for_class

    print("DETECTED INDEX:", detected_idx)
    if detected_idx == -1:
        print("NO MASK DETECTED IN THIS IMAGE?!")
        continue
    ################################################################################
    
    #get all predicted masks for current image
    res_cnfs = result[0][detected_idx][:,4]
    res_segms = result[1][detected_idx]
    res_bbox = result[0][detected_idx][:,:4]
    print("cnfs", res_cnfs.shape, "res_bbox", len(res_bbox)) 

    for res_idx in range(len(res_bbox)):
        cnf = res_cnfs[res_idx]
        bbox = [int(x) for x in res_bbox[res_idx]]
        if cnf >= confidence_thresholds[i]:
            new_mask = np.full((img.shape[0], img.shape[1]), False)
            for aa in range(bbox[1], bbox[3]):
                for bb in range(bbox[0], bbox[2]):
                    new_mask[aa, bb] = True
            new_mask = remove_overlapping_pixels(new_mask, masks_for_image)
            masks_for_image.append(new_mask)
            
    ################################################################################################
    
    #convert predictions to rle masks
    for mask in masks_for_image:
        rle_mask = mask2rle(mask)
        output["rles"].append(rle_mask)
        output["ids"].append(str(file.split('.')[0]))

    
    #store image with overlayed mask predictions
    img_mask_overlay = np.full((520, 704), False, bool)
    for mask in masks_for_image:
        img_mask_overlay = np.logical_or(img_mask_overlay, mask)
    for aa in range(img_mask_overlay.shape[0]):
        for bb in range(img_mask_overlay.shape[1]):
            if img_mask_overlay[aa, bb]:
                img[aa, bb, 0] = 255
                img[aa, bb, 1] = 0
                img[aa, bb, 2] = 0  
    mmcv.imwrite(img, "/kaggle/working/" + file)
    

#store predictions in competition format
indexes = []
for i, segm in enumerate(output["rles"]):
    if segm == '':
        indexes.append(i)
for element in sorted(indexes, reverse = True):
    del output["rles"][element]
    del output["ids"][element]
    
files = pd.Series(output["ids"], name='id')
preds = pd.Series(output["rles"], name='predicted')
submission_df = pd.concat([files, preds], axis=1)
print(submission_df.head())
submission_df.to_csv('/kaggle/working/submission.csv', index=False)

for file in test_files:
    print("final masks in file '{:s}': {:d}".format(file, output["ids"].count(str(file.split('.')[0]))))

In [None]:
!ls /kaggle/working
!echo ""
%cd /kaggle/working
!mv {cfg["work_dir"]}/* /kaggle/working
!rm -rf {cfg["work_dir"]}
!rm -rf /kaggle/working/mmdetection
!rm -rf /kaggle/working/data
!echo ""
!ls /kaggle/working