## The goal of this notebook is to experiment with a different "label_cell" function.

### The code for segmentation is from the notebook by RDizzl3: 
https://www.kaggle.com/rdizzl3/hpa-segmentation-masks-no-internet

### While the original post-processing implementation gives superior results, it can be slow in practice. It would be nice to eventually find a faster version with similar quality. 

### The version here is still uses  hpacellsegmentator. It has a faster post-processing part with the expense of having lower quality. In my experience it gives a 5-6 times speed up.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import imageio
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames[:10]:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
%config Completer.use_jedi = False
import cv2

In [None]:
!pip install "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"

In [None]:
!pip install "../input/hpapytorchzoozip/pytorch_zoo-master"

In [None]:
!pip install "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"

In [None]:
def build_image_names(image_id: str) -> list:
    # mt is the mitchondria
    mt = f'/kaggle/input/hpa-single-cell-image-classification/train/{image_id}_red.png'
    
    # er is the endoplasmic reticulum
    er = f'/kaggle/input/hpa-single-cell-image-classification/train/{image_id}_yellow.png'
    
    # nu is the nuclei
    nu = f'/kaggle/input/hpa-single-cell-image-classification/train/{image_id}_blue.png'
    
    return [[mt], [er], [nu]]

In [None]:
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

NUC_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth'
CELL_MODEL = '../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth'

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device='cuda',
    padding=False,
    multi_channel_model=True
)

# A rudymentary algorithm to get large connected components

### You might want to experiment with shade, cutoff and distance

In [None]:
def Components(Image, shade = 230, cutoff = 5000, distance = 20):
    V = np.array((Image[:,:,2]> shade), dtype = np.int)
    V[:,0] = 0
    V[:, -1] = 0
    W = V[:,1:]- V[:,:-1]
    v = np.where(W==1)
    w = np.where(W == -1)
    
    n = len(v[0])
    In = []
    Out = []
    Comp = [i for i in range(n)]
    row = 0
    for i in range(n):
        new_row = 0 + v[0][i]
       
        if new_row == row:
            In.append(i)
        elif new_row == row+1:
            Out.append(i)
        
        else:
            for p in In:
                for q in Out:
                    a = v[1][p]
                    b = w[1][p]
                    c = v[1][q]
                    d = w[1][q]
             
                    if ((a <= c and c <=b) or (c <= a and a <= d)) and (b-a > distance and d-c > distance):
                    
                        if Comp[p] !=  Comp[q]:
                            # compute root of q:
                            root1 = p+0
                            root2 = Comp[p]
                            while root2 < root1:
                                root2, root1 = Comp[root2], root2
                            root3 = q+0
                            root4 = Comp[q]
                            while root4<root3:
                                root4, root3 = Comp[root4], root4
                            if root1 < root3:
                                Comp[root3] = root1
                                Comp[q] = root1
                                Comp[p] = root1
                            else:
                                Comp[root1] = root3
                                Comp[p] = root3
                                Comp[q] = root3
                                
                    
            
            if new_row == row+2:
                In, Out = Out, [i]
                row = row + 1
            else:
                In, Out = [i], []
                row = 0 + new_row
    
    
    
    for i in range(n):
        a = 0 + i
        b = Comp[i]
        while b < a:
            b, a  = Comp[b], b
        Comp[i] = b
    
    L1 = list(set(Comp)) 
    
    D1 = {i:0 for i in L1}
    Total_Weight = 0
    for i in range(n):
        a = v[1][i]
        b = w[1][i]
        Total_Weight+=(b-a+1)
        D1[Comp[i]]+=(b-a+1)
    
    L2 = [(-b,a) for a,b in D1.items() if b > cutoff]
    
    L2.sort()
    L2 = L2[:100]
    L2 = [a for b,a in L2]
    
    D2 = {}
    
    for i in range(len(L2)):
        D2[L2[i]] = i
    
    t = len(L2)
    x_max = [0 for _ in range(t)]
    x_min = [V.shape[1] for _ in range(t)]
    y_max = [0 for _ in range(t)]
    y_min = [V.shape[0] for _ in range(t)]
    
    Segmented_Image = np.zeros((Image.shape[0],Image.shape[1]), dtype = np.uint8)
    for i in range(n):
        if Comp[i] in L2:
            value = t - D2[Comp[i]] 
            row = v[0][i]
            a = v[1][i]
            b = w[1][i]
            Segmented_Image[row,a:b] = value
         

    return Segmented_Image      

# The two versions:

In [None]:
def Factory_Segmentation(file_id):
    images = build_image_names(file_id)
    cell = segmentator.pred_cells(images)[0]
    nuclei = segmentator.pred_nuclei(images[2])[0]
    fine_grained_segmentation = label_cell(nuclei,cell)[1]
    return fine_grained_segmentation

In [None]:
def Coarse_Segmentation(file_id):
    images = build_image_names(file_id)
    cell = segmentator.pred_cells(images)[0]
    coarse_segmentation = Components(cell)
    return coarse_segmentation


In [None]:
Train = pd.read_csv('../input/hpa-single-cell-image-classification/train.csv')
Train.head()

# Comparing speed (with GPU on)

### Without GPU, I think the speed is way too slow for large scale experiments

In [None]:
file_0 = Train['ID'][0]
file_1 = Train['ID'][1]

In [None]:
%%timeit
coarse_segmentation_0 = Coarse_Segmentation(file_0)

In [None]:
%%timeit
precise_segmentation_0 = Factory_Segmentation(file_0)

In [None]:
%%timeit
coarse_segmentation_1 = Coarse_Segmentation(file_1)

In [None]:
%%timeit
precise_segmentation_1 = Factory_Segmentation(file_1)

In [None]:
def color_image(file_id):
    a, b, c = build_image_names(file_id)
    image0 = cv2.imread(a[0])
    image1 = cv2.imread(b[0])
    image2 = cv2.imread(c[0])
    image = np.zeros_like(image0)
    image[:,:,0] = image0[:,:,0]
    image[:,:,1] = image1[:,:,0]
    image[:,:,2] = image2[:,:,0]
    return image

# Comparing Images

In [None]:
figs, axs = plt.subplots(1,3, figsize= (20,5))
axs[0].imshow(color_image(file_0))
axs[0].set_title('Picture')
axs[1].imshow(Factory_Segmentation(file_0))
axs[1].set_title('Factory Segmentation')
axs[2].imshow(Coarse_Segmentation(file_0))
axs[2].set_title('Coarse Segmentation')
plt.show()

In [None]:
figs, axs = plt.subplots(1,3, figsize= (20,5))
axs[0].imshow(color_image(file_1))
axs[0].set_title('Picture')
axs[1].imshow(Factory_Segmentation(file_1))
axs[1].set_title('Factory Segmentation')
axs[2].imshow(Coarse_Segmentation(file_1))
axs[2].set_title('Coarse Segmentation')
plt.show()

# More examples

In [None]:
for i in range(2,12):
    file = Train["ID"][i]
    figs, axs = plt.subplots(1,3, figsize= (20,5))
    axs[0].imshow(color_image(file))
    axs[0].set_title('Picture')
    axs[1].imshow(Factory_Segmentation(file))
    axs[1].set_title('Factory Segmentation')
    axs[2].imshow(Coarse_Segmentation(file))
    axs[2].set_title('Coarse Segmentation')
    plt.show()