In [1]:
import os
import re

import pandas as pd
import scanpy as sc

from cellseg_benchmark._constants import (
    factor_to_celltype,
    true_cluster,
)
from cellseg_benchmark.metrics import compute_f1

## Method 1: use area overlap
Issue: different coordinate systems between ficture and boundaries, results in very low F1 scores, because small overlap between images

In [2]:
def rename(colnames):
    """Rename column names."""
    mapper = {}
    for col in colnames:
        mapper[col] = factor_to_celltype[re.split("_", col)[1]]
    return mapper

In [3]:
# constants
base_path = "/dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark/"

cohort = "aging"
method = "Cellpose_1_nuclei_model"
celltype_name = "cell_type_revised"
obsm_key = "ficture_area"

# read data
data_path = os.path.join(base_path, "analysis", cohort, method)
adata = sc.read_h5ad(os.path.join(data_path, "adatas", "adata_integrated.h5ad.gz"))

In [4]:
correct_celltypes = {}
for i in factor_to_celltype.keys():
    correct_celltypes[factor_to_celltype[i]] = true_cluster[factor_to_celltype[i]]

In [5]:
general_stats_dic = {}
for sample in os.listdir(os.path.join(base_path, "samples")):
    if sample.startswith(cohort) and os.path.exists(
        os.path.join(
            base_path, "samples", sample, "results", "Ficture", "general_stats.csv"
        )
    ):
        tmp = pd.read_csv(
            os.path.join(
                base_path, "samples", sample, "results/Ficture/general_stats.csv"
            ),
            index_col=0,
        )
        tmp.rename(columns=factor_to_celltype, inplace=True)
        general_stats_dic[sample] = tmp

In [6]:
data = {}
for key in general_stats_dic.keys():
    data[key] = adata[adata.obs["sample"] == key].obsm[obsm_key].copy()
    data[key].rename(columns=rename(data[key].columns), inplace=True)
    data[key]["celltype"] = adata[adata.obs["sample"] == key].obs[celltype_name].values

In [7]:
results = {}
for key in data.keys():
    results[key] = compute_f1(
        data[key],
        general_stats=general_stats_dic[key],
        flavor="all",
        correct_celltypes=correct_celltypes,
        weighted=False,
    )

results = pd.concat(results, names=["sample", "metric"])
results.index = results.index.droplevel(1)

results

Unnamed: 0_level_0,ABCs,Astrocytes,BAMs,Bergmann,ECs,Ependymal,Immune-Other,Microglia,Neurons-Dopa,Neurons-Dopa-Gaba,...,Neurons-Immature,Neurons-Other,OECs,OPCs,Oligodendrocytes,Pericytes,SMCs,VLMCs,macro F1_score,micro F1_score
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
aging_s10_r0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aging_s10_r1,0.020894,0.0,0.001701,0.008378,0.01481,0.0,0.0,0.004074,0.001792,0.144362,...,0.08153,0.169472,0.0,0.004903,0.036246,0.0,0.000549,0.00116,0.030884,0.068168
aging_s11_r0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aging_s8_r1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aging_s5_r1,0.008864,0.0,0.000669,0.002889,0.009346,0.0,0.0,3.9e-05,0.006666,0.00951,...,0.383845,0.011702,0.0,0.009876,0.014557,0.0,0.000375,0.001011,0.031871,0.055931
aging_s6_r0,0.00807,0.003639,0.001443,0.001534,0.004746,0.0,0.003323,0.001684,0.023151,0.138002,...,0.373334,0.049069,0.0,0.000348,0.006627,4e-05,0.002068,9.4e-05,0.034126,0.054901
aging_s7_r2,0.013762,0.002076,0.00196,0.002595,0.009532,0.0,0.0,0.000421,0.014714,0.022806,...,0.095398,0.100047,0.0,0.000946,0.115511,0.0,0.00163,0.000992,0.030186,0.061943
aging_s10_r2,0.00547,0.041534,0.000507,0.004248,0.0234,6.4e-05,0.000103,0.001118,0.011957,0.049016,...,0.313616,0.094609,0.0,0.002952,0.01989,0.005154,0.000181,0.00166,0.041684,0.072028
aging_s11_r1,0.01295,0.016033,0.002863,0.007802,0.008555,0.000886,0.000979,0.004798,0.005586,0.039137,...,0.341631,0.061565,0.0,0.004265,0.071761,3.8e-05,0.000592,0.000643,0.035047,0.051195
aging_s12_r0,0.010808,0.002299,0.0,0.00524,0.0,0.0,0.002036,0.001068,0.014919,0.04126,...,0.302944,0.005055,0.0,0.006407,0.083562,0.0,8.4e-05,0.000693,0.033695,0.050119


In [9]:
f1 = pd.DataFrame({"F1_statistics": results.mean(axis=0)}).T
f1

Unnamed: 0,ABCs,Astrocytes,BAMs,Bergmann,ECs,Ependymal,Immune-Other,Microglia,Neurons-Dopa,Neurons-Dopa-Gaba,...,Neurons-Immature,Neurons-Other,OECs,OPCs,Oligodendrocytes,Pericytes,SMCs,VLMCs,macro F1_score,micro F1_score
F1_statistics,0.011271,0.006393,0.001173,0.00383,0.010689,8.3e-05,0.000605,0.001516,0.008414,0.042941,...,0.218282,0.051524,0.0,0.003468,0.033315,0.003086,0.000459,0.000652,0.026207,0.044387
