In [None]:
# Change directory so 'Polygeist' is in the path.
import os

os.chdir("../..")

import pathlib
import random
import shutil
import time

# OS & Utilities
from glob import glob

# Maths, Image, Science Libraries
import imageio.v3 as io
import numpy as np
from pqdm.processes import pqdm
from sklearn import metrics

# Polygeist libraries
import polygeist.colour as pc
from polygeist.slidecore.slide import (
    AperioSlide,
    SpectralSlideGenerator,
    SyntheticSlide,
)
from polygeist.training import train_model
from polygeist.utils import (
    calc_median_score_list,
    collect_cases,
    load_filenames_and_generate_conditions,
    plot_roc,
    region_count_score_list,
)

In [None]:
%matplotlib widget
from matplotlib import pyplot as plt

# Overview

This workbook explains the core priciples of the colourimetric based segmentation of microscope images.  It loads a number of basis functions for the Aperio L2 Microscope, brain and staining functions, and processes them to produce likely RGB values found within images.  These RGB values in the real images are then estimated for their basis componenets by fitting them to the likely RGB values.

This workbook should be threated as an executable record of the development process, for a full workflow, using the library functions, see WP2 onwards.

# Photometric Calibration of the microscope

Before we can segment our images for our stained protein, we must determine the sensor response from our microscope; given the integration of our stain transmission spectra, our lightsource, and our sensor sensitivity functions.

Here we:
* Load the sensitivities of the sensor, the illumination power of the lightsource (by wl) and the transmission of the stains / matter under examination.
* Calculate the sensor response array, to be used to decompose a sensor response

In [None]:
from polygeist.microscope import Microscope

In [None]:
ms = Microscope()

In [None]:
# Dab Stain
DAB = pc.Illuminant.from_file_with_range("spectral/histology/DABL20Vector.csv", ms.wl)
# Hemotoxylin
H = pc.Illuminant.from_file_with_range("spectral/histology/HemotoxylinC19.csv", ms.wl)
# Eosin
E = pc.Illuminant.from_file_with_range("spectral/histology//Eosin6um.csv", ms.wl)
# Healthy Brain Absorption
_B = pc.Illuminant.from_file_with_range(
    "spectral/histology/HealthyBrainAbsorption9um.csv", ms.wl
)

In [None]:
# Calculate the Transmission Function for the brain
_B[np.isnan(_B)] = 0
B = 1.0 - (_B / np.max(_B))

In [None]:
# Produce the array of sensor RGB values for the given spectra
responses = np.vstack([ms.response(s) for s in [DAB, H, ms.ls, B, E]])
print(responses)

# These are sensor specific responses for the brain transmission * filter responses,
# for the AT2 sensor and lightsource.
# See reports for how to generate them for other sensing systems.

# 7.10357511 12.61506218 13.59695489  # DAB
# 8.81961536 10.18302502  5.08567669  # Hematoxylin
# 11.02074647 15.41804066 12.77042165 # Light Source
# 17.05035857 17.64819458  9.17788779 # Brain Transmission
# 0.45574971  4.32897163  1.68161384  # Eosin

# Example Fit

Here we fit our loaded image to the computed functions and display the results.  This uses teh

In [None]:
# Load the data or use simulated data
simulated_data = True

In [None]:
# Put your path here if you are not using simulated data!
example_slide_path = "localnas/A-syn cases/PD/PD788/PD788-17_A-syn.svs"

if simulated_data:
    image = SpectralSlideGenerator(width=100, height=100).image
else:
    image = AperioSlide(example_slide_path).get_slide_with_pixel_resolution_in_microns(
        2.0
    )

In [None]:
plt.figure()
plt.imshow(image.astype(int))
plt.show()

In [None]:
# define the normalisation function we will use to scale within our planes
def normalise(F):
    F += max(np.abs(F.min()), np.abs(F.max()))
    F /= F.max()
    return F

In [None]:
# Convert our image to Nx3 Tristimulus values
T_I = image.reshape((image.shape[0] * image.shape[1], 3))

# Least squares fit to our response functions
T_x = np.linalg.lstsq(responses.T, T_I.T, rcond=None)

# Normalise our responses per map for DAB and Brain Transmission
DAB = normalise(T_x[0][0].copy())
BT = normalise(T_x[0][3].copy())

raw_DAB = DAB - BT
raw_DAB = raw_DAB.reshape(image.shape[0], image.shape[1])

_DAB = T_x[0][0].reshape(image.shape[0], image.shape[1])
_BT = T_x[0][3].reshape(image.shape[0], image.shape[1])

# Values below our raw threshold will be considered A-syn (typically BT has significantly more power)
raw_threshold = -0.3

In [None]:
# Show our decomposition
fig, axs = plt.subplots(2, 2)

fig.suptitle("Comparisons between Original, Decomposed & Isolated Signals")
axs[0, 0].set_title("RGB Image")
axs[0, 0].imshow(image.astype(int))
axs[0, 1].set_title("DAB Estimate")
axs[0, 1].imshow(normalise(_DAB))
axs[1, 0].set_title("Brain Transmission Estimate")
axs[1, 0].imshow(normalise(_BT))
axs[1, 1].set_title("Asyn Isolation")
# Here we threshold the raw DAb signal to get A-syn
axs[1, 1].imshow(raw_DAB < raw_threshold)

# Example Segmentation of Images Using the Microscope Calibration

Below you can see the output matrix of the above calibration can be used to segment a signal we are interested in.  We do this by calculating the linear combination of weightings to produce each observed pixel, given the possible sensor responses.  Then, to isolate the signal of interest (DAB), we can subtract a uniform signal (Brain Tissue), to produce just the stain response.  We will segment the brain slides and produce densities and JPEGs of the areas in question.

In [None]:
config = {
    # The threshold level in RGB to use for our prexisting RGB thresholding routines from Phase 1
    "B_channel_threshold": 20,
    # This is where we will be storing our segmeneted data
    "dump_path": "/run/media/brad/ScratchM2/maps_dump_512_jpeg/",  # maps_dump_512_jpeg
    # This is our window size, over which we will iterate.
    "map_stride": 512,
    # Our negative cases, our controls, are stored in the 'Controls' folder
    "negative_case_folder": "Controls",
    # Our positive, pd cases, are stored in the 'PD' folder
    "positive_case_folder": "PD",
    # Other search paths to look for cases
    "search_directories": [
        "/run/media/brad/TBSSD1/A-syn cases/",
        "/run/media/brad/TBSSD2/A-syn cases/",
    ],
    # These are our case filenames
    "case_files": "Data/filenames/asyn_files.txt",
    # For use with real data, this filters slides with the wrong index
    "slide_index_filter": "17_A",
    # Toggle this is you are using simulated data
    "simulated": simulated_data,
    # Number to simulate
    "simulation_n": 4,
}

In [None]:
def spectral_decompose_and_dump_jpeg(
    slice,
    stride=1000,
    process_raw=True,
    threshold=0.175,
    pc=0.0375,
    raw_threshold=-0.3,
    raw_pc=0.00125,
    jpeg_dump_path_and_name="start_name",
):

    # These are sensor specific responses for the brain transmission * filter responses, for the AT2 sensor
    # and lightsource.  See reports for how to generate them for other sensing systems
    responses = np.array(
        np.matrix(
            "[ 7.10357511 12.61506218 13.59695489 ; "  # DAB
            "8.81961536 10.18302502  5.08567669; "  # Hematoxylin
            "11.02074647 15.41804066 12.77042165 ; "  # Light Source
            "17.05035857 17.64819458  9.17788779; "  # Brain Transmission
            "0.45574971  4.32897163  1.68161384]"
        )
    )  # Eosin

    # Get slide at native resolution
    image = slice.get_slide_with_pixel_resolution_in_microns(2.0)

    # Get the height and width of the slice
    yy, xx, _ = image.shape

    # Create a densities array to store the local densities
    x_pass = int(np.ceil(xx / stride))
    y_pass = int(np.ceil(yy / stride))
    densities = np.zeros((y_pass, x_pass))

    # Convert our image to Nx3 Tristimulus values
    T_I = image.reshape((image.shape[0] * image.shape[1], 3))

    # Least squares fit to our response functions
    T_x = np.linalg.lstsq(responses.T, T_I.T, rcond=None)

    # Normalise our reponses per map for DAB and Brain Transmission
    DAB = normalise(T_x[0][0].copy())
    BT = normalise(T_x[0][3].copy())

    # Remove the background (Brain) fom DAB response, leaving just the dab mask
    # Remove the background (Brain) fom DAB response, leaving just the dab mask
    if process_raw:
        raw_DAB = DAB - BT
        raw_DAB = raw_DAB.reshape(image.shape[0], image.shape[1])
    else:
        mDAB = colourmap_1d(np.array([1.0, 1.0, 0.0]), np.array([0.0, 0.0, 1.0]), DAB)
        mBT = colourmap_1d(np.array([1.0, 1.0, 0.0]), np.array([0.0, 0.0, 1.0]), BT)
        m_DAB = mDAB - mBT
        raw_DAB = m_DAB.reshape(image.shape[0], image.shape[1], 3)

    # ticker for JPEG index
    dump_number = 0
    if not process_raw:
        # Tumble over the slice using a fixed window size
        for xi, x in enumerate(np.arange(0, xx, stride)):
            for yi, y in enumerate(np.arange(0, yy, stride)):
                # Grab this section x -> x + stride, y -> y + stride
                section = raw_DAB[y : y + stride, x : x + stride]

                diff = np.abs(
                    section[:, :, 0].astype(float) - section[:, :, 2].astype(float)
                )
                if np.sum(diff > threshold) / (stride**2) > pc:
                    io.imwrite(
                        f"{jpeg_dump_path_and_name}{dump_number}.jpg",
                        image[y : y + stride, x : x + stride, :],
                    )
                    dump_number += 1
    else:
        # Tumble over the slice using a fixed window size
        for xi, x in enumerate(np.arange(0, xx, stride)):
            for yi, y in enumerate(np.arange(0, yy, stride)):
                # Grab this section x -> x + stride, y -> y + stride
                section = raw_DAB[y : y + stride, x : x + stride] < raw_threshold

                if np.mean(section) > raw_pc:
                    io.imwrite(
                        f"{jpeg_dump_path_and_name}{dump_number}.jpg",
                        image[y : y + stride, x : x + stride, :],
                    )
                    dump_number += 1
    return densities

In [None]:
# This process function will be called in parallel on each slide.
def process(argv):
    file = argv["file"]
    cnd = argv["condition"]
    simulated = argv["sim"]

    try:
        file_name = os.path.basename(os.path.normpath(file))
        slide = AperioSlide(file) if not simulated else SyntheticSlide(file)
    except:
        return file
    d = spectral_decompose_and_dump_jpeg(
        slide,
        stride=config["map_stride"],
        jpeg_dump_path_and_name=f'{config["dump_path"]}/{cnd}/{file_name}',
    )
    with open(f'{config["dump_path"]}/{cnd}/{file_name}.npy', "wb") as f:
        np.save(f, d)

    return file

In [None]:
# Make the directory structure to save our files
for pth in [config["positive_case_folder"], config["negative_case_folder"]]:
    pathlib.Path(config["dump_path"] + pth).mkdir(parents=True, exist_ok=True)

files_list_and_configuration = []
# For each case in positive/negative case directories, or for simulated data
if not config["simulated"]:
    for cases in [config["positive_case_folder"], config["negative_case_folder"]]:
        for searched_path in config["search_directories"]:
            for p in glob(searched_path + cases + "/*/", recursive=True):
                for f in glob(p + "/*.svs", recursive=True):
                    if config["slide_index_filter"] in f:
                        files_list_and_configuration.append(
                            {
                                "file": f,
                                "condition": cases,
                                "sim": False,
                            }
                        )
else:
    for i in np.arange(0, config["simulation_n"]):
        # Which condition to generate, PD or Control
        condition = (
            config["positive_case_folder"]
            if np.random.random() > 0.5
            else config["negative_case_folder"]
        )
        # We will use the dataset name conv for our slides.
        name_for_slide = "PD" if "PD" in condition else "PDC"
        # Generate a new slide and save it to disk
        SpectralSlideGenerator(
            512 * 4,
            512 * 4,
            filename=f"{config['dump_path']}/{name_for_slide}{i}-17_A.png",
            control="C" in name_for_slide,
        )
        # Append and continue
        files_list_and_configuration.append(
            {
                "file": f"{config['dump_path']}/{name_for_slide}{i}-17_A.png",
                "condition": condition,
                "sim": True,
            }
        )

result = pqdm(
    files_list_and_configuration, process, n_jobs=1
)  # 1 job for AJAX optimistion

In [None]:
case_conditions = load_filenames_and_generate_conditions(config["case_files"])

positive_cases = []
negative_cases = []
for case, condition in case_conditions.items():
    positive_cases.append(case) if "C" not in condition else negative_cases.append(case)

# Raw Analysis of Density Maps

Let's first just take a look at the density maps that have been dumped.  We are going to use the basic thresholding routines to determine if we have successfully segmented asyn, before we 
look into more advanced classifications methods in the other WPs.

Process:

* Create histograms of the density for each case, and band pass those densities to highlight the maximal differences between the groups.
* Calculate the mean bandpass density for each 'case', and then produce an ROC on the basis of those densities.

In [None]:
# Here we use a simple median classifier, with a low RGB threshold as a simple mask.
# Note this is for illustrative purposes,
# we are going to explore a better classification method in the following workbooks.

# Count results of simple median classifier
positive_score_list = calc_median_score_list(
    positive_cases,
    f'{config["dump_path"]}/{config["positive_case_folder"]}/*17_*.jpg',
    rgb_threshold=config["B_channel_threshold"],
)

negative_score_list = calc_median_score_list(
    negative_cases,
    f'{config["dump_path"]}/{config["negative_case_folder"]}/*17_*.jpg',
    rgb_threshold=config["B_channel_threshold"],
)

In [None]:
plt.figure()
ax = plot_roc(
    np.array(positive_score_list),
    np.array(negative_score_list),
    verbose=False,
    title="Threshold of Median Scores Per Case ROC Curve",
)
plt.show()

In [None]:
# Plot the median Counts
fig, ax = plt.subplots()
ax.boxplot([negative_score_list, positive_score_list], showfliers=False)
ax.set_xticks([1, 2], ["Control", "PD"])
plt.xlabel("Condition")
plt.ylabel("Quartiles of ROIs Identified")

In [None]:
labels = np.hstack(
    [np.ones(len(positive_score_list)), np.zeros(len(negative_score_list))]
)
outputs = np.hstack([positive_score_list, negative_score_list])

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(labels, outputs)

In [None]:
plt.figure()
plt.plot(fpr, tpr, label="PD vs Control")
plt.legend()
plt.xlabel("False Alarm Rate", fontsize=18)
plt.ylabel("Hit Rate", fontsize=18)
plt.yticks(fontsize=18)
plt.xticks(fontsize=18)
# plt.title("512um Patch Level Discrmination between Taupathology and Control Tau Segmentation")
plt.show()

In [None]:
def descriptives(scores):
    px = 512**2
    mu = np.median(scores)
    p95 = np.percentile(scores, 95)
    print(f"mu : {(mu / px)*100}, 95pc: {(p95 / px)*100}")

In [None]:
descriptives(negative_score_list)

In [None]:
descriptives(positive_score_list)