In [5]:
from pathlib import Path
import pandas as pd
import numpy as np
from IPython.display import display, Markdown
import plotly.express as px
import os

In [6]:
CSV_PATH = "/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/JH_Head_CT/method_vs_gold_per_scan_JH_Head_CT.csv"
df = pd.read_csv(CSV_PATH)
df['icv_ratio'] = df['icv_ml_pred'] / df['icv_ml_ref']
display(df.head())

Unnamed: 0,patient_id,stem,method_A,method_B,dice,iou,sensitivity,specificity,msd_mm,hd95_mm,icv_ml_ref,icv_ml_pred,delta_icv_ml,gold_path,pred_path,above_thresh,within_5ml,icv_ratio
0,1,01_BRAIN_1_Anonymized,Brainchop,GoldManual,0.96,0.922,0.986,0.989,1.399,3.207,1217.472,1284.2,66.727,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,0,0,1.054809
1,1,01_BRAIN_1_Anonymized,HD-CTBET,GoldManual,0.939,0.886,0.985,0.981,2.486,5.0,1217.472,1335.502,118.03,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,0,0,1.096947
2,1,01_BRAIN_1_Anonymized,SynthStrip,GoldManual,0.942,0.89,0.991,0.981,2.211,3.784,1217.472,1346.028,128.556,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,0,0,1.105593
3,1,01_BRAIN_1_Anonymized,CTBET,GoldManual,0.991,0.983,0.992,0.998,0.223,0.635,1217.472,1220.469,2.997,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,1,1,1.002462
4,1,01_BRAIN_1_Anonymized,Robust-CTBET,GoldManual,0.989,0.978,0.987,0.998,0.427,1.004,1217.472,1213.58,-3.893,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,/dcs05/ciprian/smart/mistie_3/jhu_manual_segme...,1,1,0.996803


In [7]:
THRESHOLDS = [0.95, 0.97, 0.99]
outdir = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/derived_metrics")
outdir.mkdir(parents=True, exist_ok=True)

# For gold-standard comparisons: method_A is the algorithm, method_B is the gold reference.
# We don't canonicalize pairs here because order matters (A=method, B=gold).
data_root = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison")
csv_paths = sorted(data_root.rglob('method_vs_gold_per_scan*.csv'))
if not csv_paths:
    print(f'No input CSVs found under {data_root}')

for csv_path in csv_paths:
    dataset = csv_path.parent.name
    print(f'Processing dataset: {dataset} -> {csv_path}')
    df_ds = pd.read_csv(csv_path)
    # compute icv_ratio if columns present
    if 'icv_ml_pred' in df_ds.columns and 'icv_ml_ref' in df_ds.columns:
        df_ds['icv_ratio'] = df_ds['icv_ml_pred'] / df_ds['icv_ml_ref']

    methods = sorted(df_ds['method_A'].unique())
    rows_all = []
    # For each method (method_A compared to gold in method_B), count rows and threshold passes
    for m in methods:
        method_df = df_ds[df_ds['method_A'] == m].copy()
        n_comparisons = int(len(method_df))
        for t in THRESHOLDS:
            n_ge = int((method_df['dice'] >= t).sum())
            pct = (n_ge / n_comparisons * 100.0) if n_comparisons else np.nan
            rows_all.append({'method': m, 'threshold': float(t), 'n_comparisons': n_comparisons, 'n_ge': n_ge, 'pct_ge': float(pct)})
            method_dir = outdir / dataset
            method_dir.mkdir(parents=True, exist_ok=True)
    # Save combined CSV and pivot matrix per dataset
    combined_df = pd.DataFrame(rows_all)
    pivot = combined_df.pivot(index='method', columns='threshold', values='pct_ge').reindex(index=methods)
    pivot.to_csv(outdir / dataset / 'dice_matrix_by_method_thresholds.csv')
    display(Markdown(f'### Results for {dataset}'))
    display(pivot)


Processing dataset: CERMEP-IDB-MRXFDG -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/CERMEP-IDB-MRXFDG/method_vs_gold_per_scan_CERMEP-IDB-MRXFDG.csv


### Results for CERMEP-IDB-MRXFDG

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,75.0,5.0,0.0
CTBET,0.0,0.0,0.0
CT_BET,70.0,0.0,0.0
CTbet_Docker,35.0,0.0,0.0
HD-CTBET,95.0,25.0,0.0
Robust-CTBET,80.0,10.0,0.0
SynthStrip,25.0,0.0,0.0


Processing dataset: JH_Head_CT -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/JH_Head_CT/method_vs_gold_per_scan_JH_Head_CT.csv


### Results for JH_Head_CT

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,88.571429,20.0,0.0
CTBET,97.142857,80.0,34.285714
CT_BET,100.0,71.428571,42.857143
CTbet_Docker,100.0,91.428571,48.571429
HD-CTBET,60.0,0.0,0.0
Robust-CTBET,97.142857,80.0,22.857143
SynthStrip,48.571429,0.0,0.0


Processing dataset: MISTIE_III -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/MISTIE_III/method_vs_gold_per_scan_mistie3.csv


### Results for MISTIE_III

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,20.0,10.0,0.0
CTBET,60.0,50.0,0.0
CT_BET,71.428571,14.285714,0.0
CTbet_Docker,100.0,80.0,0.0
HD-CTBET,30.0,0.0,0.0
Robust-CTBET,80.0,70.0,0.0
SynthStrip,20.0,0.0,0.0


In [8]:
THRESHOLDS = [2.5, 5.0, 10.0]
outdir = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/derived_metrics")
outdir.mkdir(parents=True, exist_ok=True)

data_root = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison")
csv_paths = sorted(data_root.rglob('method_vs_gold_per_scan*.csv'))
if not csv_paths:
    print(f'No input CSVs found under {data_root}')

for csv_path in csv_paths:
    dataset = csv_path.parent.name
    print(f'Processing dataset: {dataset} -> {csv_path}')
    df_ds = pd.read_csv(csv_path)

    methods = sorted(df_ds['method_A'].unique())
    rows_all = []
    # For each method (method_A compared to gold in method_B), count rows and threshold passes
    for m in methods:
        method_df = df_ds[df_ds['method_A'] == m].copy()
        n_comparisons = int(len(method_df))
        for t in THRESHOLDS:
            n_ge = int((method_df['delta_icv_ml'].abs() <= t).sum())
            pct = (n_ge / n_comparisons * 100.0) if n_comparisons else np.nan
            rows_all.append({'method': m, 'threshold': float(t), 'n_comparisons': n_comparisons, 'n_ge': n_ge, 'pct_ge': float(pct)})
            method_dir = outdir / dataset
            method_dir.mkdir(parents=True, exist_ok=True)
    # Save combined CSV and pivot matrix per dataset
    combined_df = pd.DataFrame(rows_all)
    pivot = combined_df.pivot(index='method', columns='threshold', values='pct_ge').reindex(index=methods)
    pivot.to_csv(outdir / dataset / 'icv_matrix_by_method_thresholds.csv')
    display(Markdown(f'### Results for {dataset}'))
    display(pivot)


Processing dataset: CERMEP-IDB-MRXFDG -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/CERMEP-IDB-MRXFDG/method_vs_gold_per_scan_CERMEP-IDB-MRXFDG.csv


### Results for CERMEP-IDB-MRXFDG

threshold,2.5,5.0,10.0
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,0.0,5.0,5.0
CTBET,0.0,5.0,5.0
CT_BET,5.0,5.0,15.0
CTbet_Docker,0.0,0.0,10.0
HD-CTBET,0.0,0.0,0.0
Robust-CTBET,5.0,5.0,5.0
SynthStrip,0.0,0.0,0.0


Processing dataset: JH_Head_CT -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/JH_Head_CT/method_vs_gold_per_scan_JH_Head_CT.csv


### Results for JH_Head_CT

threshold,2.5,5.0,10.0
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,2.857143,8.571429,20.0
CTBET,11.428571,22.857143,48.571429
CT_BET,7.142857,35.714286,42.857143
CTbet_Docker,11.428571,22.857143,42.857143
HD-CTBET,0.0,2.857143,8.571429
Robust-CTBET,8.571429,25.714286,37.142857
SynthStrip,0.0,5.714286,5.714286


Processing dataset: MISTIE_III -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/MISTIE_III/method_vs_gold_per_scan_mistie3.csv


### Results for MISTIE_III

threshold,2.5,5.0,10.0
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,0.0,0.0,10.0
CTBET,0.0,0.0,0.0
CT_BET,0.0,0.0,14.285714
CTbet_Docker,10.0,20.0,20.0
HD-CTBET,0.0,0.0,10.0
Robust-CTBET,0.0,0.0,10.0
SynthStrip,0.0,0.0,10.0


In [9]:
THRESHOLDS = [0.95, 0.97, 0.99]
outdir = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/derived_metrics")
outdir.mkdir(parents=True, exist_ok=True)

data_root = Path("/Users/rushil/brain_extraction/results/quantitative/gold_std_comparison")
csv_paths = sorted(data_root.rglob('method_vs_gold_per_scan*.csv'))
if not csv_paths:
    print(f'No input CSVs found under {data_root}')

for csv_path in csv_paths:
    dataset = csv_path.parent.name
    print(f'Processing dataset: {dataset} -> {csv_path}')
    df_ds = pd.read_csv(csv_path)

    # require icv columns to compute ratio
    if 'icv_ml_pred' not in df_ds.columns or 'icv_ml_ref' not in df_ds.columns:
        print(f"Skipping {csv_path} - missing icv_ml_pred/icv_ml_ref")
        continue
    df_ds['icv_ratio'] = df_ds['icv_ml_pred'] / df_ds['icv_ml_ref']

    methods = sorted(df_ds['method_A'].unique())
    combined_rows = []

    # For each method (method_A compared to gold in method_B), count rows within icv_ratio thresholds
    for m in methods:
        method_df = df_ds[df_ds['method_A'] == m].copy()
        n_comparisons = int(len(method_df))
        rows_m = []
        for t in THRESHOLDS:
            # interpret threshold t as closeness to 1.0 (e.g. t=0.95 => within 5% => |ratio-1| <= 0.05)
            tol = 1.0 - float(t)
            n_within = int((method_df['icv_ratio'].sub(1.0).abs() <= tol).sum())
            pct_within = (n_within / n_comparisons * 100.0) if n_comparisons else np.nan
            row = {'method': m, 'threshold': float(t), 'n_comparisons': n_comparisons, 'n_within': n_within, 'pct_within': float(pct_within)}
            rows_m.append(row)
            combined_rows.append(row)

        # save per-method CSV under derived_metrics/<dataset>/<method>/
        safe_method = str(m).replace('/', '_')
        method_dir = outdir / dataset / safe_method
        method_dir.mkdir(parents=True, exist_ok=True)
        pd.DataFrame(rows_m).to_csv(method_dir / 'icv_ratio_within_thresholds_by_method.csv', index=False)

    # Save combined CSV and pivot matrix per dataset
    combined_df = pd.DataFrame(combined_rows)
    combined_df.to_csv(outdir / dataset / 'icv_ratio_within_thresholds_all_methods.csv', index=False)
    pivot = combined_df.pivot(index='method', columns='threshold', values='pct_within').reindex(index=methods)
    pivot.to_csv(outdir / dataset / 'icv_ratio_matrix_by_method_thresholds.csv')
    display(Markdown(f'### ICV ratio results for {dataset}'))
    display(pivot)

Processing dataset: CERMEP-IDB-MRXFDG -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/CERMEP-IDB-MRXFDG/method_vs_gold_per_scan_CERMEP-IDB-MRXFDG.csv


### ICV ratio results for CERMEP-IDB-MRXFDG

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,45.0,35.0,15.0
CTBET,10.0,5.0,5.0
CT_BET,85.0,75.0,30.0
CTbet_Docker,45.0,35.0,10.0
HD-CTBET,50.0,0.0,0.0
Robust-CTBET,75.0,70.0,15.0
SynthStrip,0.0,0.0,0.0


Processing dataset: JH_Head_CT -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/JH_Head_CT/method_vs_gold_per_scan_JH_Head_CT.csv


### ICV ratio results for JH_Head_CT

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,85.714286,60.0,25.714286
CTBET,94.285714,77.142857,48.571429
CT_BET,100.0,71.428571,57.142857
CTbet_Docker,94.285714,74.285714,48.571429
HD-CTBET,51.428571,28.571429,8.571429
Robust-CTBET,91.428571,71.428571,45.714286
SynthStrip,17.142857,11.428571,5.714286


Processing dataset: MISTIE_III -> /Users/rushil/brain_extraction/results/quantitative/gold_std_comparison/MISTIE_III/method_vs_gold_per_scan_mistie3.csv


### ICV ratio results for MISTIE_III

threshold,0.95,0.97,0.99
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,60.0,40.0,10.0
CTBET,70.0,20.0,0.0
CT_BET,57.142857,42.857143,14.285714
CTbet_Docker,100.0,60.0,20.0
HD-CTBET,30.0,10.0,10.0
Robust-CTBET,70.0,50.0,20.0
SynthStrip,20.0,10.0,10.0
