In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from pathlib import Path
from tqdm.notebook import tqdm
import collections
import glob
import gc
from IPython.display import display
import cv2

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

import albumentations as al
from albumentations.pytorch.transforms import ToTensorV2

# config

In [None]:
ORIGINAL_PATH = Path('../input/sartorius-cell-instance-segmentation/')
TRAIN_IMAGE_PATH = ORIGINAL_PATH / 'train'
TEST_IMAGE_PATH = ORIGINAL_PATH / 'test'
TRAIN_SEMI_PATH = ORIGINAL_PATH / 'train_semi_supervised'
LIVE_CELL_ANT_PATH = ORIGINAL_PATH / 'LIVECell_dataset_2021' / 'annotations'
LIVE_CELL_IMG_PATH = ORIGINAL_PATH / 'LIVECell_dataset_2021' / 'images'
TRAIN_PATH = ORIGINAL_PATH / 'train.csv'
SUB_PATH = ORIGINAL_PATH / 'sample_submission.csv'

# Function

In [None]:
def rle2mask(rle, shape):
    
    mask = np.zeros(shape, dtype=int).flatten()
    rle = np.array(rle.split(), dtype=int)
    starts = rle[0::2] - 1
    lengths = rle[1::2]
    ends = starts + lengths
    for lo, hi in zip(starts, ends):
        mask[lo:hi] = 1
    mask = mask.reshape(shape)
    
    return mask

# Read data

In [None]:
sub_df = pd.read_csv(SUB_PATH)
sub_df.head()

In [None]:
train_df = pd.read_csv(TRAIN_PATH)
print(f'Number of image: {train_df.id.nunique()}')

train_df.tail()

In [None]:
cell_df = train_df.groupby(['cell_type'])['id'].count().reset_index(name='number_image')

fig = go.Figure()
fig.add_trace(go.Pie(values=cell_df.number_image, labels=cell_df.cell_type, hole=0.2, textinfo='label+value+percent'))
fig.update_layout(
    title={
        'text': 'Distribution of cell type', 'font': {'color': 'blue', 'size': 16},
        'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'
    },
    showlegend=False
)
    
fig.show()

del cell_df
gc.collect()

In [None]:
annot_df = train_df.pivot_table(index='id', columns='cell_type', values='annotation', aggfunc='count', fill_value=0).reset_index()
annot_df['number_annot'] = annot_df[['astro', 'cort', 'shsy5y']].apply(np.sum, axis=1)
annot_df['number_cell_type'] = annot_df[['astro', 'cort', 'shsy5y']].apply(lambda x: sum([1 if i!=0 else 0 for i in x]), axis=1)

display(annot_df.head())
cell_types = ['astro', 'cort', 'shsy5y']
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

astro = annot_df[annot_df.astro!=0].astro.values
cort = annot_df[annot_df.cort!=0].cort.values
shsy5y = annot_df[annot_df.shsy5y!=0].shsy5y.values

fig = ff.create_distplot([astro, cort, shsy5y], cell_types, colors=colors, bin_size=5)
fig.update_layout(
    title={
        'text': 'Distribution of cell type in each image', 'font': {'color': 'blue', 'size': 16},
        'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'
    }
)
fig.show()

In [None]:
train_image = glob.glob(f'{str(TRAIN_IMAGE_PATH)}/*.png')
train_semi_image = glob.glob(f'{str(TRAIN_SEMI_PATH)}/*.png')
test_image = glob.glob(f'{str(TEST_IMAGE_PATH)}/*.png')
print(f'Train image: {len(train_image)}, train semi: {len(train_semi_image)}, test image: {len(test_image)}')

# Display image

In [None]:
def display_image(df, image_folder, cell_type, number_sample=3):
    
    colors = {'astro': (249,38,114), 'cort': (166,226,46), 'shsy5y': (102,217,239)}
    cell_df = df[df.cell_type==cell_type].reset_index(drop=True)
    list_image = cell_df.id.unique()
    for idx in range(number_sample):
        image_id = np.random.choice(list_image)
        sample_df = cell_df[cell_df.id==image_id].reset_index(drop=True)
        image_path = [i for i in image_folder if image_id in i][0]
        
        #image display
        img = cv2.imread(image_path)
        fig, ax = plt.subplots(1, figsize=(30, 30))
        overlay = img.copy()
        
        mask = np.zeros(img.shape[:-1], dtype=np.uint8)
        for idx, rle in enumerate(sample_df.annotation):
            mask += rle2mask(rle, img.shape[:-1]).astype(np.uint8)
            
        mask = mask.clip(0, 1)
        mask_img = np.zeros_like(img)
        mask_img[mask==1, :] = colors[cell_type]
        final = cv2.addWeighted(mask_img, 0.5, img, 1, 0)
            
        ax.imshow(final)
        plt.title(f'{cell_type}: {image_id}', fontsize=30, color='blue')
        plt.show()
    
for cell_type in cell_types:
    display_image(train_df, train_image, cell_type, number_sample=3)

# Dataset

In [None]:
def aug(sub='train', size=(512, 512)):
    if sub=='train':
        return al.Compose([
            al.HorizontalFlip(p=0.5),
            al.VerticalFlip(p=0.5),
            al.Transpose(p=0.5),
            al.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, p=0.5),
            al.OneOf([
                al.MotionBlur(p=0.3),
                al.MedianBlur(p=0.3),
                al.Blur(p=0.4)
            ], p=0.3),
            al.OneOf([
                al.Sharpen(p=0.3),
                al.Emboss(p=0.3),
                al.RandomBrightnessContrast(p=0.4)
            ], p=0.3),
            al.Resize(size[0], size[1]),
            al.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2(transpose_mask=True)
        ], p=1)
    elif sub=='validation':
        return al.Compose([
            al.Resize(size[0], size[1]),
            al.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2(transpose_mask=True)
        ], p=1)
    elif sub=='test':
        return al.Compose([
            al.Resize(size[0], size[1]),
            al.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], p=1)

In [None]:
class sartoriousDataset(Dataset):
    def __init__(self, df, image_folder, size, has_transforms, sub='train'):
        super(sartoriousDataset, self).__init__()
        
        self.df = df
        self.image_folder = image_folder
        self.has_transforms = has_transforms
        self.sub = sub
        self.list_label = ['astro', 'cort', 'shsy5y']
        if self.has_transforms:
            self.transforms = aug(sub, size)
        
    def __len__(self):
        return len(self.image_folder)
    
    def __getitem__(self, idx):
        image_path = self.image_folder[idx]
        image_id = image_path.split('/')[-1][:-4]
        image_df = self.df[self.df.id==image_id].reset_index(drop=True)
        image = cv2.imread(image_path)
        h, w = image.shape[:-1]
        
        if self.sub != 'test':
            mask = np.zeros([h, w], dtype=np.uint8)
            for idx, (rle, label) in enumerate(zip(image_df.annotation, image_df.cell_type)):
                mask += rle2mask(rle, [h, w]).astype(np.uint8)
            mask = mask.clip(0, 1)
            label = self.list_label.index(label)
            if self.has_transforms:
                augument = self.transforms(image=image, mask=mask)
                image, mask = augument['image'], augument['mask']
            else:
                mask = torch.from_numpy(mask).float()
            return image, mask, label
        
        if self.has_transforms:
            image = self.transforms(image=image)['image']
        else:
            image = torch.from_numpy(image).float()
        return image_id, image
    
bs = 4
colors = {'astro': (0.1, 0.1, 0.1), 'cort': (0.5, 0.5, 0.5), 'shsy5y': (0.8, 0.8, 0.8)}
ds = sartoriousDataset(train_df, train_image, (512, 512), has_transforms=True, sub='train')
dl = DataLoader(ds, batch_size=bs, shuffle=True)

image, mask, label = next(iter(dl))
image = image.detach().cpu().numpy()
mask = mask.detach().cpu().numpy()
fig, ax = plt.subplots(1, bs, figsize=(30, 30))
for i in range(bs):
    img = image[i].transpose(1, 2, 0)
    img[(mask[i]==1), :] = colors[ds.list_label[label[i]]]
    ax[i].imshow(img)
    ax[i].set_title(f'{ds.list_label[label[i]]}')
    
del ds, dl
gc.collect()