In [2]:
import pandas as pd
import openslide
import tifffile as tiff
import matplotlib.pyplot as plt
import pyarrow.parquet as pq
import torch

from pathlib import Path
import json

In [3]:
base_dir = Path().resolve()
base_dir

PosixPath('/home/sadegh/projects/nencki_institute')

## Patient 1991

In [None]:
patient_1991 = base_dir / "data/measurements_1991.xlsx"


In [None]:
raw = pd.read_excel(patient_1991, header=None)
header = raw.iloc[0,0].split(";")
data = raw.iloc[1:,0].str.split(";", expand=True)
data.columns = header
df_1991 = data.reset_index(drop=True)
df_1991

In [None]:
# df_1991.to_csv("patient_1991.csv", index=False)

In [None]:
summary_path = base_dir / "qupath/1991_Diffuse midline glioma H3K27M/data/1/summary.json"
with open(summary_path, "r") as f:
    summary = json.load(f)
summary

In [None]:
server_path = base_dir / "qupath/1991_Diffuse midline glioma H3K27M/data/1/server.json"
with open(server_path, "r") as f:
    server = json.load(f)
server

In [None]:
classes_path = base_dir / "qupath/1991_Diffuse midline glioma H3K27M/classifiers/classes.json"
with open(classes_path, "r") as f:
    classes = json.load(f)
classes

In [None]:
patinet_3149 = base_dir / "AKOYA/measurements_3149.csv"

In [None]:
raw = pd.read_csv(patinet_3149, header=None)   
header = raw.iloc[0, 0].split(";")
data = raw.iloc[1:, 0].str.split(";", expand=True)
data.columns = header
df_3149 = data.reset_index(drop=True)
df_3149


In [None]:
df.columns = [c.replace("Cell: ", "").replace("Nucleus: ", "").replace(": Mean", "").strip() for c in df.columns]

df = df[[c for c in df.columns if not c.endswith("Median")]]

for col in df.columns[7:]: 
    df[col] = pd.to_numeric(df[col], errors="coerce")

def percentage_in_population(df, base_markers, positive_marker, threshold=0.1):
    """
    base_markers: dict, e.g. {"TMEM119":0.1, "CD68":0.1, "CD45":0.1}
    positive_marker: str, e.g. "SPP1"
    threshold: float, cutoff for positivity
    """
    query = " and ".join([f"`{m}` > {thr}" for m,thr in base_markers.items()])
    base = df.query(query)
    if len(base) == 0:
        return 0
    pos = base[base[positive_marker] > threshold]
    return len(pos) / len(base) * 100

results = {
    "SPP1 in TMEM119+ CD68+ CD45+": percentage_in_population(df, {"TMEM119":0.1, "CD68":0.1, "CD45":0.1}, "SPP1"),
    "SPP1 in LGALS3+ CD68+ CD45+": percentage_in_population(df, {"LGALS3":0.1, "CD68":0.1, "CD45":0.1}, "SPP1"),
    "SPP1 in H3K27M+": percentage_in_population(df, {"H3K27M":0.1}, "SPP1"),
    "SPP1 in TMEM119+ CD68+ CD45+ GLUT1+ CD31-": percentage_in_population(df, {"TMEM119":0.1, "CD68":0.1, "CD45":0.1, "GLUT1":0.1}, "SPP1") 
                                               - percentage_in_population(df, {"TMEM119":0.1, "CD68":0.1, "CD45":0.1, "CD31":0.1}, "SPP1"),
    "SPP1 in LGALS3+ CD68+ CD45+ GLUT1+ CD31-": percentage_in_population(df, {"LGALS3":0.1, "CD68":0.1, "CD45":0.1, "GLUT1":0.1}, "SPP1") 
                                               - percentage_in_population(df, {"LGALS3":0.1, "CD68":0.1, "CD45":0.1, "CD31":0.1}, "SPP1"),
    "SPP1 in H3K27M+ GLUT1+ CD31-": percentage_in_population(df, {"H3K27M":0.1, "GLUT1":0.1}, "SPP1") 
                                    - percentage_in_population(df, {"H3K27M":0.1, "CD31":0.1}, "SPP1"),
}

pd.Series(results)


In [None]:
markers = ["SPP1", "TMEM119", "CD68", "CD45", "LGALS3", "H3K27M", "GLUT1", "CD31"]

for m in markers:
    df[m].astype(float).hist(bins=100)
    plt.title(m)
    plt.xlabel("Intensity")
    plt.ylabel("Count")
    plt.show()

In [None]:
thresholds = {
    "SPP1": 0.175,
    "TMEM119": 6.25,
    "CD68": 8,
    "CD45": 10,
    "LGALS3": 4.65,
    "H3K27M": 12,
    "GLUT1": 15,
    "CD31": 2
}

In [None]:
def percentage_in_population(df, base_markers, positive_marker, thresholds):
    query = " and ".join([f"`{m}` > {thresholds[m]}" for m in base_markers])
    base = df.query(query)
    if len(base) == 0:
        return 0
    pos = base[base[positive_marker] > thresholds[positive_marker]]
    return len(pos) / len(base) * 100


In [None]:
df_cd31_neg = df[df["CD31"] <= thresholds["CD31"]] 

In [None]:
result_cd31_neg = {
    "SPP1 in TMEM119+ CD68+ CD45+ GLUT1+ CD31-":
        percentage_in_population(df_cd31_neg, {"TMEM119":0, "CD68":0, "CD45":0, "GLUT1":0}, "SPP1", thresholds),
    "SPP1 in LGALS3+ CD68+ CD45+ GLUT1+ CD31-":
        percentage_in_population(df_cd31_neg, {"LGALS3":0, "CD68":0, "CD45":0, "GLUT1":0}, "SPP1", thresholds),
    "SPP1 in H3K27M+ GLUT1+ CD31-":
        percentage_in_population(df_cd31_neg, {"H3K27M":0, "GLUT1":0}, "SPP1", thresholds),
}

pd.Series(result_cd31_neg)