In [None]:
import numpy as np
import torch
import tifffile as tiff
from pathlib import Path
from skimage.filters import gaussian
from skimage.morphology import remove_small_objects, dilation, disk
from skimage.measure import label, regionprops_table
from skimage.filters import threshold_otsu
from skimage.segmentation import watershed, relabel_sequential
from functools import reduce
from scipy import ndimage as ndi
import pandas as pd
from instanseg import InstanSeg
import matplotlib.pyplot as plt

In [None]:
from pathlib import Path

dir_path = Path().resolve()
dir_path

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
instanseg_fluorescence = InstanSeg("fluorescence_nuclei_and_cells", verbosity=1, device=device)

In [None]:
WSI_PATH = dir_path.parent / 'image/pHGG_1991_3149_Scan1-001.qptiff'
USE_GPU = torch.cuda.is_available()
TILE = 512
OVERLAP = 128
BATCH_SIZE = 40

In [None]:
tf = tiff.TiffFile(WSI_PATH)
series = tf.series[0]
arr = series.asarray(out="memmap")
if arr.ndim == 3 and arr.shape[0] < arr.shape[-1]:
    C, H, W = arr.shape
else:
    arr = np.moveaxis(arr, -1, 0)
    C, H, W = arr.shape

In [None]:
arr.shape

### Cell segmentation based on the Just DAPI (channel 0)

In [None]:
arr[0]

In [None]:
labeled_output, image_tensor = instanseg_fluorescence.eval_medium_image(
    arr[0],
    pixel_size=None,
    target="cells",
    tile_size=TILE, 
    batch_size=BATCH_SIZE
)

In [None]:
print(type(labeled_output), getattr(labeled_output, 'shape', None))
print(type(image_tensor),  getattr(image_tensor,  'shape', None))
print(getattr(labeled_output, 'dtype', None), getattr(image_tensor, 'dtype', None))

In [None]:
img = image_tensor.squeeze().detach().cpu().numpy()

In [None]:
plt.imshow(img[::50, ::50])   # 50x downsample
plt.show()

In [None]:
labeled_output = labeled_output.squeeze().detach().cpu().numpy().astype("int32")

In [None]:
props = regionprops_table(
    labeled_output,
    intensity_image=arr[0],
    properties=("label", "centroid", "area", "perimeter","mean_intensity")
)

# label = cell id , centroid = center of cell, area and primeter = shape of cell, mean_intensity
# Run for each channel
cells_df = pd.DataFrame(props)
print(len(cells_df))

In [None]:
cells_df.head(20)

### Summarization for All Channel

In [None]:
def extract_cell_features(labeled_output, arr, channel_names=None):
    """
    Parameters
    ----------
    labeled_output : ndarray (H, W)
        Segmentation mask, each cell has unique integer ID.
    arr : ndarray (C, H, W)
        Multi-channel image (channels, height, width).
    channel_names : list of str
        Names of channels in arr, e.g. ["DAPI","CD68","TMEM119","SPP1"].
        If None, channels will be named ch0, ch1, ...
    
    Returns
    -------
    DataFrame with per-cell features (like QuPath export).
    """
    if channel_names is None:
        channel_names = [f"ch{i}" for i in range(arr.shape[0])]

    results = []

    # --- morphology (independent of intensity) ---
    props_morph = regionprops_table(
        labeled_output,
        properties=("label", "centroid", "area", "perimeter")
    )
    df = pd.DataFrame(props_morph)
    df.rename(columns={"centroid-0": "Centroid Y µm",
                       "centroid-1": "Centroid X µm",
                       "area": "Cell: Area (px)",
                       "perimeter": "Cell: Perimeter (px)"}, inplace=True)
    results.append(df)

    # --- intensity per channel ---
    for i, ch in enumerate(channel_names):
        props = regionprops_table(
            labeled_output,
            intensity_image=arr[i],
            properties=("label", "mean_intensity", "median_intensity")
        )
        df_ch = pd.DataFrame(props).rename(
            columns={
                "mean_intensity": f"Cell: {ch}: Mean",
                "median_intensity": f"Cell: {ch}: Median"
            }
        )
        results.append(df_ch)

    # --- merge all on "label" ---
    features_df = reduce(lambda left, right: pd.merge(left, right, on="label"), results)
    return features_df

# -------------------------
# Example usage:
# labeled_output: (H,W)
# arr: (C,H,W)
# channel_names = ["DAPI", "GFAP", "CD45", "TMEM119", "HLA-DR", "CD68", "SPP1"]

features_df = extract_cell_features(labeled_output, arr, channel_names)
print(features_df.head())


## Analysis on their data

In [None]:
patient_1991 = dir_path / "AKOYA/measurements_1991.xlsx"

In [None]:
raw = pd.read_excel(patient_1991, header=None)
header = raw.iloc[0,0].split(";")
data = raw.iloc[1:,0].str.split(";", expand=True)
data.columns = header
df_1991 = data.reset_index(drop=True)
df_1991

In [None]:
df_1991.columns

In [None]:
for col in df_1991.columns:
    if "Mean" in col or "Median" in col:
        df_1991[col] = pd.to_numeric(df_1991[col], errors="coerce")

markers = ["SPP1", "TMEM119", "CD68", "CD45", "LGALS3", "H3K27M", "GLUT1", "CD31"]

TH = {}
for m in markers:
    col = f"Cell: {m}: Mean" if m != "H3K27M" else f"Nucleus: {m}: Mean"
    vals = df_1991[col].dropna()
    if len(vals) == 0:
        continue
    try:
       TH[m] = threshold_otsu(vals)   
    except:
       TH[m] = vals.median()        
print("Thresholds:", TH)


def pos(marker, nucleus=False):
    col = f"Nucleus: {marker}: Mean" if nucleus else f"Cell: {marker}: Mean"
    return df_1991[col] > TH[marker]

microglia_like = pos("TMEM119") & pos("CD68") & pos("CD45")
macrophage_like = pos("LGALS3") & pos("CD68") & pos("CD45")
h3k27m = pos("H3K27M", nucleus=True)
hypoxic = pos("GLUT1") & (~pos("CD31"))

# ---- 6. Compute % SPP1+ ----
results = {}

results["SPP1 in TMEM119+ CD68+ CD45+"] = (df_1991.loc[microglia_like, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100
results["SPP1 in LGALS3+ CD68+ CD45+"] = (df_1991.loc[macrophage_like, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100
results["SPP1 in H3K27M+"] = (df_1991.loc[h3k27m, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100

results["SPP1 in TMEM119+ CD68+ CD45+ GLUT1+ CD31-"] = (df_1991.loc[microglia_like & hypoxic, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100
results["SPP1 in LGALS3+ CD68+ CD45+ GLUT1+ CD31-"] = (df_1991.loc[macrophage_like & hypoxic, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100
results["SPP1 in H3K27M+ GLUT1+ CD31-"] = (df_1991.loc[h3k27m & hypoxic, "Cell: SPP1: Mean"] > TH["SPP1"]).mean() * 100

summary = pd.DataFrame.from_dict(results, orient="index", columns=["% SPP1+"])
summary["% SPP1+"] = summary["% SPP1+"].fillna("No cells")
summary

# Save summary
# summary.to_csv("SPP1_percentages_summary.csv")


In [None]:
for col in df_1991.columns:
    if "Mean" in col or "Median" in col:
        df_1991[col] = pd.to_numeric(df[col], errors="coerce")

def population_stats(pop_mask, name):
    n_total = pop_mask.sum()
    if n_total == 0:
        return {"Population": name, "N_total": 0, "% SPP1+": None}
    n_spp1 = (df_1991.loc[pop_mask, "Cell: SPP1: Mean"] > TH["SPP1"]).sum()
    pct = n_spp1 / n_total * 100
    return {"Population": name, "N_total": n_total, "% SPP1+": pct}

# Example usage:
stats = []
stats.append(population_stats(microglia_like, "TMEM119+ CD68+ CD45+"))
stats.append(population_stats(macrophage_like, "LGALS3+ CD68+ CD45+"))
stats.append(population_stats(h3k27m, "H3K27M+"))
stats.append(population_stats(microglia_like & hypoxic, "TMEM119+ CD68+ CD45+ GLUT1+ CD31-"))
stats.append(population_stats(macrophage_like & hypoxic, "LGALS3+ CD68+ CD45+ GLUT1+ CD31-"))
stats.append(population_stats(h3k27m & hypoxic, "H3K27M+ GLUT1+ CD31-"))

summary = pd.DataFrame(stats)
print(summary)
