# Sartorius - Cell Instance Segmentation
![](https://storage.googleapis.com/kaggle-competitions/kaggle/30201/logos/header.png?t=2021-09-03-15-27-46)

# Import packages

In [None]:
# import libraries 
import numpy as np 
import pandas as pd
import os
from pathlib import Path
import cv2
import copy

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing

from IPython.display import clear_output
import matplotlib.pyplot as plt

import imageio

from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from tqdm import tqdm
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

from skimage.morphology import label
import random

# Load input files

In [None]:
# input
DIR = '../input/sartorius-cell-instance-segmentation'
IMG_HEIGHT, IMG_WIDTH = (512, 512)
HEIGHT, WIDTH = (520,704)
IMG_CHANNELS = 1
TEST_PATH = DIR + '/test/'

sample_submission = pd.read_csv(DIR + '/sample_submission.csv')

# output 
csv_output = os.path.join('./', 'submission.csv') 

In [None]:
#CODE FROM UnetFromScratch
# Reference: https://www.kaggle.com/ihelon/cell-segmentation-run-length-decoding

def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return 
    color: color for the mask
    Returns numpy array (mask)
    '''
    s = mask_rle.split()
    
    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
            
    for start, end in zip(starts, ends):
        img[start : end] = color
    
    return img.reshape(shape)


def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    ref: https://www.kaggle.com/dragonzhang/positive-score-with-detectron-3-3-inference
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def plot_masks(image_id, colors=True):
    labels = train_data[train_data["id"] == image_id]["annotation"].tolist()

    if colors:
        mask = np.zeros((520, 704, 3))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 3), color=np.random.rand(3))
    else:
        mask = np.zeros((520, 704, 1))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)

    image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{image_id}.png")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(16, 32))
    plt.subplot(3, 1, 1)
    plt.imshow(image)
    plt.title('Input image')
    plt.axis("off")
    plt.subplot(3, 1, 2)
    plt.imshow(image)
    plt.imshow(mask, alpha=0.5)
    plt.title('Input image with mask')
    plt.axis("off")
    plt.subplot(3, 1, 3)
    plt.imshow(mask)
    plt.title('Only mask')
    plt.axis("off")
    
    plt.show();
    
def get_mask(image_id, df):
    '''
    Uses rle_decode() to get ndarray from mask using image_id in dataframe (df).
    ref: https://www.kaggle.com/barteksadlej123/sartors-tf-starter
    '''
    current = df[df["id"] == image_id]
    labels = current["annotation"].tolist()
    
    mask = np.zeros((HEIGHT, WIDTH))
    for label in labels:
        mask += rle_decode(label, (HEIGHT, WIDTH))
    mask = mask.clip(0, 1)
    
    return mask

# Model

In [None]:
# reference: https://www.kaggle.com/karan23258/cell-instance-segmentation-unetfromscratch

model = load_model('../input/unet-new-model/model_011.h5')

# Predictions & PostProcessing

In [None]:
def get_sample(id_):
    path = TEST_PATH + id_
    img = imread(path + '.png')[:,:]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    img = np.expand_dims(img, axis = 2)
    
    return img

In [None]:
#most code from https://github.com/RohanTrix/Osteosarcoma-cell-Segmentation-using-Watershed/blob/master/Cell_segment.py
import cv2
import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage
from skimage import measure, color, io
from skimage.segmentation import clear_border


def watersheding(pred, id_):
    # create and process the mask
    mask = np.where(pred > 0.5, 125, 0)
    image_p = copy.deepcopy(mask)
    
    img = image_p
    img_save = cv2.imwrite('test.jpg', img)
    img = cv2.imread('test.jpg')
    cells=img[:,:,0]  #Blue channel. Image equivalent to grey image.

    #cv2.imshow('REAL',img)

    ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)


    kernel = np.ones((3,3),np.uint8)
    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)
    cv2.imwrite('opening.jpg',opening)

    opening = clear_border(opening)

    sure_bg = cv2.dilate(opening,kernel,iterations=10)

    dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)

    ret2, sure_fg = cv2.threshold(dist_transform,0.2*dist_transform.max(),255,0)

    sure_fg = np.uint8(sure_fg)
    unknown = cv2.subtract(sure_bg,sure_fg)

    ret3, markers = cv2.connectedComponents(sure_fg)

    #The entire background pixels is given value 0.
    #This means watershed considers this region as unknown.
    #So let us add 10 to all labels so that sure background is not 0, but 10
    markers = markers+10

    #Marked the region of unknown with zero
    markers[unknown==255] = 0
    '''plt.imshow(markers, cmap='jet')   #Look at the 3 distinct regions.
    plt.show()'''
    plt.imsave('Markers.jpg',markers)
    #Now we are ready for watershed filling. 
    markers = cv2.watershed(img,markers)
    
    return markers

In [None]:
def get_cell_instances(pred, id_):
    
    markers = watersheding(pred, id_)
    # reshape to original shape
    markers_2 = resize(markers, (HEIGHT, WIDTH), mode='constant', preserve_range=True)
    #markers_2 = copy.deepcopy(np.pad(markers, ((0, 8), (0, 192)), constant_values=0))
    # convert to pd.DataFrame
    m_2 = pd.DataFrame(markers_2)
    
    for idx in np.unique(markers_2):
        if idx > 10:
            cell_mask = np.where(m_2 == idx, 255, 0)
            rle = rle_encode(cell_mask)
            predictions.append((id_, rle))  


In [None]:
# ids of samples to test
test_ids = sample_submission['id'].unique().tolist()

# create the list of mask for each cell in the image
predictions = []

for id_ in test_ids:
    sample = np.asarray([get_sample(id_)])
    sample = model.predict(sample, verbose=1)
    get_cell_instances(sample[0,:,:,:], id_)
    print('Done)')

# Generate submission

In [None]:
# get test ids
test_ids = [cell[0] for cell in predictions]
# run length encoding
predicted = [cell[1] for cell in predictions]

In [None]:
# generate submission data frame 
submission = pd.DataFrame.from_dict({'id': test_ids, 'predicted': predicted} )
submission = submission.sort_values( ['id'], ascending=True )
submission.to_csv(csv_output, index=False)

In [None]:
#pd.read_csv(csv_output)

In [None]:
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background.
    ref: https://www.kaggle.com/inversion/run-length-decoding-quick-start
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)

def get_mask(image_id, df):
    '''
    Uses rle_decode() to get ndarray from mask using image_id in dataframe (df).
    ref: https://www.kaggle.com/barteksadlej123/sartors-tf-starter
    '''
    current = df[df["id"] == image_id]
    labels = current["annotation"].tolist()
    
    mask = np.zeros((HEIGHT, WIDTH))
    for label in labels:
        mask += rle_decode(label, (HEIGHT, WIDTH))
    mask = mask.clip(0, 1)
    
    return mask

In [None]:
HEIGHT, WIDTH = (520,704)
# ids of samples to test
image_ids = sample_submission['id'].unique().tolist()

sanity = pd.DataFrame.from_dict({'id': test_ids, 'annotation': predicted} )

for image_id in image_ids:
    img = cv2.imread(f"../input/sartorius-cell-instance-segmentation/test/{image_id}.png")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    mask = get_mask(image_id, sanity)

    print(img.shape)
    plt.figure(figsize=(16, 32))
    plt.imshow(img[:,:,0])
    plt.imshow(mask, alpha=0.3)
