In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import zipfile
import seaborn as sns
from tqdm.notebook import tqdm


*References:*

* www.kaggle.com/ihelon/cell-segmentation-run-length-decoding
* www.kaggle.com/inversion/run-length-decoding-quick-start
* www.kaggle.com/ihelon/cell-segmentation-run-length-decoding
* www.kaggle.com/robertlangdonvinci/sartorius-cell-segmentation-data-gen/notebook


In [None]:
ROOT_DIR = '/kaggle/input/sartorius-cell-instance-segmentation/'
TRAIN_CSV = os.path.join(ROOT_DIR, 'train.csv')
with open(TRAIN_CSV, 'r') as f:
     data_df = pd.read_csv(f, delimiter=',')
data_df[:10]


In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
IMAGE_DIR = os.path.join(ROOT_DIR, 'train')
count = 0
for dirname, _, filenames in os.walk(IMAGE_DIR):
    for filename in filenames:
        img = mpimg.imread(os.path.join(dirname, filename))
        imgplot = plt.imshow(img)
        print(img.shape)
        plt.show()
        count += 1
        if count == 3: break

In [None]:
# ref: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return 
    color: color for the mask
    Returns numpy array (mask)

    '''
    s = mask_rle.split()
    
    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
            
    for start, end in zip(starts, ends):
        img[start : end] = color
    
    return img.reshape(shape)



In [None]:
# ref: www.kaggle.com/ihelon/cell-segmentation-run-length-decoding
# ref: www.kaggle.com/robertlangdonvinci/sartorius-cell-segmentation-data-gen/notebook
def plot_masks(dataframe, image_id, colors=True):
    labels = dataframe[dataframe["id"] == image_id]["annotation"].tolist()

    if colors:
        mask = np.zeros((520, 704, 3))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 3), color=np.random.rand(3))
    else:
        mask = np.zeros((520, 704, 1))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)

    image = cv2.imread(os.path.join(ROOT_DIR, f"train/{image_id}.png"))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(15, 8))
    plt.subplot(131)
    plt.imshow(image)
    plt.title('raw image')
    plt.axis("off")
    plt.subplot(132)
    plt.imshow(image)
    plt.imshow(mask, alpha=0.6)
    plt.title('image + mask')
    plt.axis("off")
    plt.subplot(133)
    plt.imshow(mask)
    plt.title('mask only')
    plt.axis("off")
    plt.tight_layout()
    plt.show();

In [None]:
plot_masks(data_df, "0030fd0e6378", colors=True)

In [None]:
sns.countplot(x=data_df.cell_type)

In [None]:
cell_type = data_df['cell_type'].unique();cell_type

In [None]:
data_df.head()

In [None]:
data_df['cell_type'].replace({'shsy5y':1,'astro':2,'cort':3},inplace=True)
data_df['cell_type'] = pd.to_numeric(data_df['cell_type'])
data_df.head()


In [None]:
data_group = data_df.groupby('id')
data_group.head()


In [None]:
def build_mask(img_id, dataframe, color=1):
    temp = dataframe.get_group(img_id)
    temp_annot = temp.loc[:,'annotation'].tolist()
    mask = np.zeros((520, 704, 1))
    for label in temp_annot:
        mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)
    mask[mask==1] = color
    return mask



In [None]:
from scipy import stats
ctype_df = data_df[['id','cell_type']].groupby('id').agg(lambda x:stats.mode(np.array(x))[0]).reset_index()
ctype_df[:10]

In [None]:
OUT_TRAIN = 'TrainMask.zip'
files = np.array(list(zip(ctype_df['id'],ctype_df['cell_type'])))

In [None]:
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for idx in tqdm(range(0,len(files))):
        temp_mask = build_mask(files[idx][0],data_group, color=int(files[idx][1]))
        M = temp_mask.shape[0]//2
        N = temp_mask.shape[1]//2
        tiles = [temp_mask[x:x+M,y:y+N] for x in range(0,temp_mask.shape[0],M) for y in range(0,temp_mask.shape[1],N)]
        for j in range(4):
            mask1 = tiles[j]
            mask1 = cv2.imencode('.png',mask1)[1]
            img_out.writestr(files[idx][0] + f'_{j}_mask.png', mask1)

In [None]:
OUT_TRAIN = 'TrainImage.zip'

In [None]:
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for idx in tqdm(range(0,len(files))):
        image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{files[idx][0]}.png")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        temp_mask = image
        M = temp_mask.shape[0]//2
        N = temp_mask.shape[1]//2
        tiles = [temp_mask[x:x+M,y:y+N] for x in range(0,temp_mask.shape[0],M) for y in range(0,temp_mask.shape[1],N)]
        for j in range(4):
            mask1 = tiles[j]
            mask1 = cv2.imencode('.png',mask1)[1]
            img_out.writestr(files[idx][0] + f'_{j}.png', mask1)


