<h2 style="font-family: Verdana; font-size: 40px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;"> LIVE Cell Dataset Loading </h2>

<h5 style="font-family: Verdana; line-height: 160%"> In this notebook, I pretend to cover all the steps related to retrieving all the data contained in the LiveCell Datasat using the same format as the train and test folders. </h5>
<hr>
    
<h2 style="font-family: Verdana; font-size: 25px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;"> 1. Libraries </h2>

In [None]:
import numpy as np
from collections import namedtuple
import pandas as pd
import copy
import matplotlib.pyplot as plt
import cv2
import math
import os
import rasterio
from matplotlib.path import Path

<h2 style="font-family: Verdana; font-size: 25px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;">2. Helper Functions</h2>

In [None]:
def mask_decode(mask):
    array = np.zeros((520, 704))
    for label in mask:
        s = label.split()
        starts = list(map(lambda x: int(x) - 1, s[0::2]))
        lengths = list(map(int, s[1::2]))
        ends = [x + y for x, y in zip(starts, lengths)]
        img = np.zeros((520*704), dtype=np.float32)            
        for start, end in zip(starts, ends):
            img[start : end] = 1 
        array += img.reshape((520, 704))
    return array.clip(0, 1)

def rle_encode(img):
    """ TBD
    
    Args:
        img (np.array): 
            - 1 indicating mask
            - 0 indicating background
    
    Returns: 
        run length as string formated
    """
    
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

PatientInfoTuple = namedtuple(
    'PatientInfoTuple',
    'id, cell_type, annotations'
)

def getPatientsInfo():
    df = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')
    patientsInfo_list = list()
    
    for id in df.id.unique():
        cell_type = df[df.id == id].cell_type.unique()[0]
        annotations = df[df["id"] == id]["annotation"].tolist()
        
        patientsInfo_list.append(PatientInfoTuple(
            id,
            cell_type,
            annotations
        ))
        
    return patientsInfo_list

<h2 style="font-family: Verdana; font-size: 25px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;"> 3. From Json File to Dictionary </h2>
    
<h5 style="font-family: Verdana; line-height: 160%"> In this notebook, I'll work with SHSY5Y cells but the procedure can be applied to any kind of the cells covered in the LiveCell Competition. </h5>
    
<h5 style="font-family: Verdana; line-height: 160%"> In the following cell, I'll first load the json file that contains all the information related to the shsy5y cells. Then, I'll convert the json file into a dictionary in which, using the id of the images as the key, I'll store the segmentation provided in the competition, the boundary box of each cell and the path to the image </h5>

In [None]:
import json

SHSY5Y_PATH = "../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/annotations/LIVECell_single_cells/shsy5y/livecell_shsy5y_train.json"

with open("../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/annotations/LIVECell_single_cells/shsy5y/livecell_shsy5y_train.json") as f:
    data = json.load(f)

ids = list()
for i,img_dict in enumerate(data["images"]):
    ids.append(data["images"][i]["id"])

d = {k: {"segmentation": [],"bbox": [], "path": []} for k in ids}

for i in range(len(d)):
    d[data["images"][i]["id"]]["path"].append(data["images"][i]["original_filename"])

for key in data["annotations"].keys():
    id = data["annotations"][key]["image_id"]
    seg = data["annotations"][key]["segmentation"][0]
    bbox = data["annotations"][key]["bbox"]
    
    d[id]["segmentation"].append(seg)    
    d[id]["bbox"].append(bbox)

<h2 style="font-family: Verdana; font-size: 25px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;">4. LiveCell Segmentation vs Sartorious Annotation</h2>

In [None]:
# Sartorius Annotation
patientsInfo_list = copy.copy(getPatientsInfo())

img_example_path = cv2.imread('../input/sartorius-cell-instance-segmentation/train/0030fd0e6378.png')
mask_example = mask_decode(patientsInfo_list[0][2])

plt.figure(figsize=(15,15))
plt.imshow(img_example_path)
plt.imshow(mask_example, alpha=0.35)

In [None]:
# LiveCell Segmentation
array = np.zeros((520,704))

for img_mask in d[1564017]["segmentation"]:
    y = img_mask[0::2]
    x = img_mask[1::2]

    for i in range(int(len(img_mask)/2)):
        array[math.floor(x[i])-1][math.floor(y[i])-1] = 0.5

plt.figure(figsize=(15,15))
        
imew = os.path.join("../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/images/livecell_train_val_images", "SHSY5Y", d[1564017]["path"][0][:-4]+".tif")
imew = rasterio.open(imew)
imew = imew.read(1)
plt.imshow(imew, cmap="gray")
plt.imshow(array, alpha=0.35)

<h2 style="font-family: Verdana; font-size: 25px; font-style: normal; font-weight: bold; text-transform: none; letter-spacing: 2px; color: #2874A6; background-color: #ffffff;">5. LiveCell Segmentation Transformation</h2>

In [None]:
array = np.zeros((520, 704))
for img_mask in d[1564017]["segmentation"]:

    x = img_mask[0::2]
    y = img_mask[1::2]
    
    arr = [(x, y) for (x, y) in zip(y,x)]
    vertices = np.asarray(arr)
    path = Path(vertices)
    xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()
    x, y = np.mgrid[:520, :704]
    
    # mesh grid to a list of points
    points = np.vstack((x.ravel(), y.ravel())).T

    # select points included in the path
    mask = path.contains_points(points)
    path_points = points[np.where(mask)]

    # reshape mask for display
    img_mask = mask.reshape(x.shape)
    img_mask = img_mask.astype(np.int)
    array += img_mask

plt.figure(figsize=(15,15))
plt.imshow(array.clip(0, 1))

<h5 style="font-family: Verdana; line-height: 160%">Now that we have our mask in an appropriate format, it's time to encode it, just as the Sartorius Challenge.</h5>

In [None]:
seg_list = list()
for img_mask in d[1564017]["segmentation"]:

    x = img_mask[0::2]
    y = img_mask[1::2]
    
    arr = [(x, y) for (x, y) in zip(y,x)]
    vertices = np.asarray(arr)
    path = Path(vertices)
    xmin, ymin, xmax, ymax = np.asarray(path.get_extents(), dtype=int).ravel()
    x, y = np.mgrid[:520, :704]
    
    # mesh grid to a list of points
    points = np.vstack((x.ravel(), y.ravel())).T

    # select points included in the path
    mask = path.contains_points(points)
    path_points = points[np.where(mask)]

    # reshape mask for display
    img_mask = mask.reshape(x.shape)
    img_mask = img_mask.astype(np.int)
    # ENCODED MASK
    encoded_img_mask = rle_encode(img_mask)
    seg_list.append(encoded_img_mask)


seg_list[0]