Refrences : https://www.kaggle.com/ihelon/cell-segmentation-run-length-decoding

In [None]:
import os
import seaborn as sns
import numpy as np
import pandas as pd
import cv2
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import PIL
import gc
import zipfile
import matplotlib.image as immg
from joblib import Parallel, delayed

In [None]:
df_trn = pd.read_csv("../input/sartorius-cell-instance-segmentation/train.csv")

In [None]:
display(df_trn.sample(10))

In [None]:
df_trn.describe()

In [None]:
np.random.rand(3)

In [None]:
# ref: https://www.kaggle.com/inversion/run-length-decoding-quick-start
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return 
    color: color for the mask
    Returns numpy array (mask)

    '''
    s = mask_rle.split()
    
    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
            
    for start, end in zip(starts, ends):
        img[start : end] = color
    
    return img.reshape(shape)

In [None]:
# www.kaggle.com/ihelon/cell-segmentation-run-length-decoding
def plot_masks(image_id, colors=True):
    labels = df_trn[df_trn["id"] == image_id]["annotation"].tolist()

    if colors:
        mask = np.zeros((520, 704, 3))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 3), color=np.random.rand(3))
    else:
        mask = np.zeros((520, 704, 1))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)

    image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{image_id}.png")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(20, 8))
    plt.subplot(131)
    plt.imshow(image)
    plt.title('raw image')
    plt.axis("off")
    plt.subplot(132)
    plt.imshow(image)
    plt.imshow(mask, alpha=0.6)
    plt.title('image + mask')
    plt.axis("off")
    plt.subplot(133)
    plt.imshow(mask)
    plt.title('mask only')
    plt.axis("off")
    plt.tight_layout()
    plt.show();

In [None]:
plot_masks("ffdb3cc02eef", colors=True)

In [None]:
plot_masks("73df2962444f", colors=True)

In [None]:
plot_masks("13325f865bb0", colors=True)

In [None]:
sns.countplot(x=df_trn.cell_type);

In [None]:
cell_type = df_trn['cell_type'].unique();cell_type

In [None]:
df_trn.head()

In [None]:
df_trn['cell_type'].replace({'shsy5y':1,'astro':2,'cort':3},inplace=True)
df_trn['cell_type'] = pd.to_numeric(df_trn['cell_type'])

In [None]:
df_grp = df_trn.groupby('id')

In [None]:
def build_mask(img_id,color=1):
    temp = df_grp.get_group(img_id)
    temp_annot = temp.loc[:,'annotation'].tolist()
    mask = np.zeros((520, 704, 1))
    for label in temp_annot:
        mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)
    mask[mask==1] = color
    return mask

In [None]:
from scipy import stats

In [None]:
ctype_df = df_trn[['id','cell_type']].groupby('id').agg(lambda x:stats.mode(np.array(x))[0]).reset_index()

In [None]:
files = np.array(list(zip(ctype_df['id'],ctype_df['cell_type'])))

In [None]:
OUT_TRAIN = 'TrainMask2x2.zip'

In [None]:
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for idx in tqdm(range(0,len(files))):
        temp_mask = build_mask(files[idx][0],color=int(files[idx][1]))
        M = temp_mask.shape[0]//2
        N = temp_mask.shape[1]//2
        tiles = [temp_mask[x:x+M,y:y+N] for x in range(0,temp_mask.shape[0],M) for y in range(0,temp_mask.shape[1],N)]
        for j in range(4):
            mask1 = tiles[j]
            mask1 = cv2.imencode('.png',mask1)[1]
            img_out.writestr(files[idx][0] + f'_{j}_mask.png', mask1)

In [None]:
OUT_TRAIN = 'TrainImage2x2.zip'

In [None]:
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for idx in tqdm(range(0,len(files))):
        image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{files[idx][0]}.png")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        temp_mask = image
        M = temp_mask.shape[0]//2
        N = temp_mask.shape[1]//2
        tiles = [temp_mask[x:x+M,y:y+N] for x in range(0,temp_mask.shape[0],M) for y in range(0,temp_mask.shape[1],N)]
        for j in range(4):
            mask1 = tiles[j]
            mask1 = cv2.imencode('.png',mask1)[1]
            img_out.writestr(files[idx][0] + f'_{j}.png', mask1)

In [None]:
600*4