# **Train and Inference Notebooks**

https://www.kaggle.com/vexxingbanana/sartorius-mmdetection-training

https://www.kaggle.com/vexxingbanana/mmdetection-neuron-inference

# **References**

https://www.kaggle.com/dschettler8845/sartorius-segmentation-eda-and-baseline

https://www.kaggle.com/ihelon/cell-segmentation-run-length-decoding

https://www.kaggle.com/stainsby/fast-tested-rle

https://www.kaggle.com/paulorzp/run-length-encode-and-decode

https://www.kaggle.com/awsaf49/sartorius-mmdetection-infer

https://www.kaggle.com/awsaf49/sartorius-mmdetection-train

https://www.kaggle.com/evancofsky/sartorius-torch-lightning-mask-r-cnn/notebook

# **Install PyCocoTools**

In [None]:
!pip install '/kaggle/input/mmdetectionv2140/pycocotools-2.0.2/pycocotools-2.0.2' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmpycocotools-12.0.3/mmpycocotools-12.0.3' --no-deps

In [None]:
import sklearn
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import json
import glob
import pycocotools
from pycocotools import mask
import random
import cv2
import re

# **Helper Functions**

In [None]:
IMG_WIDTH = 704
IMG_HEIGHT = 520

In [None]:
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def flatten_l_o_l(nested_list):
    """ Flatten a list of lists """
    return [item for sublist in nested_list for item in sublist]


def load_json_to_dict(json_path):
    """ tbd """
    with open(json_path) as json_file:
        data = json.load(json_file)
    return data

In [None]:
def get_img_and_mask(img_path, annotation, width, height):
    """ Capture the relevant image array as well as the image mask """
    img_mask = np.zeros((height, width), dtype=np.uint8)
    for i, annot in enumerate(annotation): 
        img_mask = np.where(rle_decode(annot, (height, width))!=0, i, img_mask)
    img = cv2.imread(img_path)[..., ::-1]
    return img[..., 0], img_mask

def plot_img_and_mask(img, mask, invert_img=True, boost_contrast=True):
    """ Function to take an image and the corresponding mask and plot
    
    Args:
        img (np.arr): 1 channel np arr representing the image of cellular structures
        mask (np.arr): 1 channel np arr representing the instance masks (incrementing by one)
        invert_img (bool, optional): Whether or not to invert the base image
        boost_contrast (bool, optional): Whether or not to boost contrast of the base image
        
    Returns:
        None; Plots the two arrays and overlays them to create a merged image
    """
    plt.figure(figsize=(20,10))
    
    plt.subplot(1,3,1)
    _img = np.tile(np.expand_dims(img, axis=-1), 3)
    
    # Flip black-->white ... white-->black
    if invert_img:
        _img = _img.max()-_img
        
    if boost_contrast:
        _img = np.asarray(ImageEnhance.Contrast(Image.fromarray(_img)).enhance(16))
        
    plt.imshow(_img)
    plt.axis(False)
    plt.title("Cell Image", fontweight="bold")
    
    plt.subplot(1,3,2)
    _mask = np.zeros_like(_img)
    _mask[..., 0] = mask
    plt.imshow(mask, cmap='rainbow')
    plt.axis(False)
    plt.title("Instance Segmentation Mask", fontweight="bold")
    
    merged = cv2.addWeighted(_img, 0.75, np.clip(_mask, 0, 1)*255, 0.25, 0.0,)
    plt.subplot(1,3,3)
    plt.imshow(merged)
    plt.axis(False)
    plt.title("Cell Image w/ Instance Segmentation Mask Overlay", fontweight="bold")
    
    plt.tight_layout()
    plt.show()

In [None]:
def polygonFromMask(maskedArr, idx):
  # adapted from https://github.com/hazirbas/coco-json-converter/blob/master/generate_coco_json.py
  contours, _ = cv2.findContours(maskedArr, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  segmentation = []
  valid_poly = 0
  for contour in contours:
  # Valid polygons have >= 6 coordinates (3 points)
     if contour.size >= 6:
        segmentation.append(contour.astype(float).flatten().tolist())
        valid_poly += 1
  if valid_poly == 0:
     raise ValueError(idx)
  return [segmentation]

# **Create Coco Json File**

In [None]:
train_df = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')

In [None]:
lines = []
for f in train_df.itertuples():
    lines.append('../input/sartorius-cell-instance-segmentation/train/' + f[1] + '.png')
lins = pd.Series(lines, name='img_path')
train_df = pd.concat([train_df, lins], axis=1)

In [None]:
tmp_df = train_df.drop_duplicates(subset=["id", "img_path"]).reset_index(drop=True)
tmp_df["annotation"] = train_df.groupby("id")["annotation"].agg(list).reset_index(drop=True)
train_df = tmp_df.copy()

In [None]:
train_df, val_df = train_test_split(train_df, train_size=0.95, random_state=0)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
categories = {"cort": 2, "shsy5y": 1, "astro": 3}

In [None]:
output_json_dict = {
    "images": [],
    "annotations": [],
    "categories": []
}

In [None]:
category_dict = {"id": 1, "name": "shsy5y", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)
category_dict = {"id": 2, "name": "cort", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)
category_dict = {"id": 3, "name": "astro", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)

In [None]:
def get_img_and_annot_info(df, annot_id_start=1):
    for f in df.itertuples():
        image_id = f[0]
        file_path = f[-1]
        width = f[3]
        height = f[4]
        category = categories[f[5]]
#         img, mk = get_img_and_mask(file_path, f[2], width, height)
        image_info = {
            "id": image_id,
            "width": width,
            "height": height,
            "file_name": file_path,
        }
        output_json_dict['images'].append(image_info)
        for annot in np.unique(f[2]):
#             print(annot)
#             img, annotation = get_img_and_mask(file_path, annot, width, height)
            annotation = rle_decode(annot, (IMG_HEIGHT, IMG_WIDTH))
#             print(np.unique(annotation))
            _, count = np.unique(annotation, return_counts=True)
#             print(annotation.shape)
            annot_mask = annotation.astype(np.bool)
#             print(annot_mask)
            annot_mask = np.asfortranarray(annot_mask)
#             print(np.unique(annot_mask))
            Rs = mask.encode(annot_mask)
            Rs['counts'] = Rs['counts'].decode('utf-8')
#             print(Rs)
            bbox = mask.toBbox(Rs)
            bbox_list = []
            for element in bbox:
                bbox_list.append(int(element))
#             print(bbox_list)
#             print(Rs)
            annot_dict = {
                "category_id": category,
                "segmentation": Rs,
                "area": int(mask.area(Rs)),
                "bbox": bbox_list,
                "id": annot_id_start,
                "image_id": image_id,
                "iscrowd": 0}
            output_json_dict["annotations"].append(annot_dict)
            annot_id_start += 1

In [None]:
# def get_img_and_annot_info(df, annot_id_start=1):
#     for f in df.itertuples():
#         image_id = f[0]
#         file_path = f[-1]
#         width = f[3]
#         height = f[4]
#         category = categories[f[5]]
#         img, mk = get_img_and_mask(file_path, f[2], width, height)
#         image_info = {
#             "id": image_id,
#             "width": width,
#             "height": height,
#             "file_name": file_path,
#         }
#         output_json_dict['images'].append(image_info)
#         for annot in np.unique(mk):
#             annotation = []
#             if annot != 0:
#                 annot_mask = mk == annot
#                 _, count = np.unique(annot_mask, return_counts=True)
#                 if count[1] >= 6 and (image_id != 270 and annot != 220) and (image_id!= 300 and annot != 16): #Doesn't give valid annotation otherwise
#                     annot_mask = np.expand_dims(annot_mask, axis=2)
#                     annot_mask = np.asfortranarray(annot_mask)
#                     Rs = mask.encode(annot_mask)
#                     assert len(Rs) == 1
#                     coco_seg = Rs[0]
#                     bbox = mask.toBbox(coco_seg)
#                     bbox_list = []
#                     for element in bbox:
#                         bbox_list.append(int(element))
#                     polygon_seg = mask.decode(coco_seg)
#                     polygon_segm = polygonFromMask(polygon_seg, image_id)
#                     annot_dict = {
#                         "category_id": category,
#                         "segmentation": polygon_segm[0],
#                         "area": int(mask.area(coco_seg)),
#                         "bbox": bbox_list,
#                         "id": annot_id_start,
#                         "image_id": image_id,
#                         "iscrowd": 0}
#                     output_json_dict["annotations"].append(annot_dict)
#                     annot_id_start += 1

In [None]:
get_img_and_annot_info(train_df)
with open('train_dataset.json', 'w') as f:
    output_json = json.dumps(output_json_dict)
    f.write(output_json)

In [None]:
output_json_dict = {
    "images": [],
    "annotations": [], 
    "categories": []
}

category_dict = {"id": 1, "name": "shsy5y", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)
category_dict = {"id": 2, "name": "cort", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)
category_dict = {"id": 3, "name": "astro", "supercategory": "none"}
output_json_dict["categories"].append(category_dict)

In [None]:
get_img_and_annot_info(val_df)
with open('val_dataset.json', 'w') as f:
    output_json = json.dumps(output_json_dict)
    f.write(output_json)