# Segmentation of cell nucleii

## Setup

### Libraries

In [None]:
import cv2 
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Optional ( I like QT Graphs so i can zoom and i think %matplotlib widget sucks!)
%matplotlib qt

### Load data

Data comes from the git repo through [git lfs](https://git-lfs.github.com/)

In [None]:
images = []

for imagePath in Path("./data").glob("*.png"):
    image = cv2.imread(str(imagePath))
    if image.size > 0:
        images.append(image)
    else:
        print(f"Failed reading image {imagePath}")

images = np.stack(images)
print(images.shape)

In [None]:
def colPlot(images, **kwargs):
    fig, axs = plt.subplots(1,len(images))

    for im, ax in zip(images, axs):
        if im.ndim > 2: # if color
            ax.imshow(im[...,::-1]) # opencv is BGR
        else:
            ax.imshow(im, **kwargs)
    
    fig.tight_layout()

## Part 1

First we analyse the channels of the image and pick the best way to "grayscale" it.

The red channel is a highliting of cell nuceii, and the G and B channels (equivalent) are the grayscale, monochromatic image from the microscope.

### Plot all channels (RGB)

In [None]:
colPlot(images)

### Plot red channel and R-B for comparrison

* $I_R-I_B$ clearly shows nucleii with high contrast and no unwanted features.
* $I_R$ shows more detail for other parts of the cell. But that detail introduces unwanted features that don't have a very high contrast with nucleii

**Use $I_f=I_R-I_B$ for segmentation**

In [None]:
fig, axs = plt.subplots(2,len(images), sharey="col", sharex="col")

# the R channel
R = images[...,2] 

# the R-B difference image
diffRB = images[...,2] - images[...,0]

for i, (im, ax) in enumerate(zip(R, axs[0])):
    ax.set_title(f"$I_{{ {i+1}R }}$")
    ax.imshow(im, cmap="gray")
    ax.axis("off")

for i, (im, ax) in enumerate(zip(diffRB, axs[1])):
    ax.set_title(f"$I_{{ {i+1}R }} - I_{{ {i+1}B }}$")
    ax.imshow(im, cmap="gray")
    ax.axis("off")

fig.tight_layout()

### Histograms

We can also see in a log-histogram that $I_R-I_B$ has a more more distinct peak in its histogram, meaning higher contrast between backgrond and features.


In [None]:

def histColPlot(images:np.array, hist_args:dict):
    fig, axs = plt.subplots(2,len(images))
    for i, (im, ax) in enumerate(zip(images, axs[0])):
        ax.set_title(f"Image {i+1}")
        ax.imshow(im, cmap="gray")
        ax.axis("off")

    for i, (im, ax) in enumerate(zip(images, axs[1])):
        ax.set_title(f"Image {i+1} hist")
        ax.hist(im.flatten(), **hist_args)

    fig.tight_layout()

In [None]:
histColPlot(R, 
            hist_args=dict(bins=255,log=True))

In [None]:
histColPlot(diffRB, 
            hist_args=dict(bins=255,log=True))

### Determine $I_f$

$I_f$ is the grayscale image we'll use for segmentation. We do a linear stretching of the $I_R-I_B$ image so that $max(I_R-I_B)=255$ and $min(I_R-I_B)=0$

In [None]:
# This was the case for the initial images, 
# just put this here to make sure it holds up in the future
assert np.min(diffRB)==0

# np.max is calculated over ALL images
# This means e.g. we don't strech image 1 more than image 2
If = diffRB * 255.0 / np.max(diffRB)

If = If.astype(np.uint8)

histColPlot(If, hist_args=dict(bins=255, log=True))
plt.suptitle("$I_f$ and Histograms")

## Part 2