In [5]:
# Cell 1 — load pairwise CSV and show a short summary
from pathlib import Path
import pandas as pd
import numpy as np
from IPython.display import display, Markdown

In [18]:
# path to the consolidated CSV (relative to repo root)
CSV_PATH = Path("/Users/rushil/brain_extraction/results/quantitative/pairwise/pairwise_2x2_metrics_all_scans.csv")
df = pd.read_csv(CSV_PATH)
display(df.head())

exclude = ['6109-317_20150302_0647_ct', '6142-308_20150610_0707_ct', '6193-324_20150924_1431_ct', '6257-335_20160118_1150_ct',
                     '6418-193_20161228_1248_ct', '6470-296_20170602_0607_ct', '6480-154_20170622_0937_ct']

exclude_prefixes = ("6046", "6084", "6096", "6246", "6315", "6342", "6499")

df = df[~df["stem"].isin(exclude)]
df = df[~df["stem"].str.startswith(exclude_prefixes)]

Unnamed: 0,patient_id,stem,method_A,method_B,tp,fp,fn,tn,dice,iou,sensitivity_sym,specificity_sym,msd_mm,hd95_mm,icv_A_ml,icv_B_ml,delta_icv_ml,delta_icv_pct,n_vox
0,6001-161,6001-161_20131229_1702_ct,Brainchop,CTBET,1285727,68597,21734,8761542,0.966064,0.934355,0.966363,0.994879,0.646912,3.936647,1614.304686,1558.445704,55.858982,3.584275,10137600
1,6001-161,6001-161_20131229_1702_ct,Brainchop,CTbet_Docker,1288829,65495,18445,8764831,0.968463,0.938854,0.968765,0.995241,0.665632,3.813597,1614.304686,1558.222807,56.08188,3.599092,10137600
2,6001-161,6001-161_20131229_1702_ct,Brainchop,HD-CTBET,1299270,55054,44613,8738663,0.963062,0.928755,0.963076,0.99433,0.999888,4.999462,1614.304686,1601.859396,12.44529,0.776928,10137600
3,6001-161,6001-161_20131229_1702_ct,Brainchop,Robust-CTBET,1284087,70237,15829,8767447,0.967574,0.937185,0.967981,0.995125,0.603386,3.452668,1614.304686,1549.45234,64.852347,4.185501,10137600
4,6001-161,6001-161_20131229_1702_ct,Brainchop,SynthStrip,1340260,14064,64049,8719227,0.971684,0.944928,0.972003,0.995549,0.652497,3.417967,1614.304686,1673.884978,-59.580292,-3.559402,10137600


In [None]:
# Cell 2 — Dice thresholded percentages aggregated per-method across other methods
from pathlib import Path
import pandas as pd
import numpy as np
from IPython.display import display, Markdown

# thresholds to report
THRESHOLDS = [0.90, 0.95, 0.97]
outdir = Path("/Users/rushil/brain_extraction/results/quantitative/pairwise/derived_metrics")
outdir.mkdir(parents=True, exist_ok=True)

rows = []
methods = sorted(set(df['method_A']).union(df['method_B']))

# For each method, compute the unique stems where that method appears,
# and count stems where the method has ANY comparison with another method
# whose dice >= threshold (cumulative across all other methods).
for t in THRESHOLDS:
    for m in methods:
        method = df[(df['method_A'] == m) | (df['method_B'] == m)].copy()
        print(len(method))
        stem_max = gm.groupby('stem', as_index=False)['dice'].max()
        n_stems = int(len(stem_max))
        n_ge = int((stem_max['dice'] >= t).sum())
        pct = (n_ge / n_stems * 100.0) if n_stems else np.nan
        rows.append({
            'method': m,
            'threshold': float(t),
            'n_stems': n_stems,
            'n_ge': n_ge,
            'pct_ge': float(pct)
        })

# DataFrame and outputs
dice_method_df = pd.DataFrame(rows)
dice_method_df.to_csv(outdir / 'dice_thresholds_by_method_across_others.csv', index=False)

display(Markdown('### Dice threshold summary aggregated per-method (first rows)'))
display(dice_method_df.head())

# pivot table: methods x thresholds for easy reporting/heatmap
pivot = dice_method_df.pivot(index='method', columns='threshold', values='pct_ge').reindex(index=methods)
pivot.to_csv(outdir / 'dice_matrix_by_method_thresholds.csv')
display(Markdown('### Matrix (rows=method, cols=threshold)'))
display(pivot)


### Dice threshold summary aggregated per-method (first rows)

Unnamed: 0,method,threshold,n_stems,n_ge,pct_ge
0,Brainchop,0.9,4704,4682,99.532313
1,CTBET,0.9,4704,4635,98.533163
2,CT_BET,0.9,2399,2394,99.79158
3,CTbet_Docker,0.9,4704,4703,99.978741
4,HD-CTBET,0.9,4704,4703,99.978741


### Matrix (rows=method, cols=threshold)

threshold,0.90,0.95,0.97
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brainchop,99.532313,95.812075,45.535714
CTBET,98.533163,97.661565,96.938776
CT_BET,99.79158,99.041267,96.957065
CTbet_Docker,99.978741,99.872449,99.234694
HD-CTBET,99.978741,98.022959,17.47449
Robust-CTBET,99.936224,99.872449,99.744898
SynthStrip,97.491497,90.114796,24.00085


In [None]:
ICV_THRESH_ML = 5.0
outdir = Path("/Users/rushil/brain_extraction/results/quantitative/pairwise/derived_metrics")
outdir.mkdir(parents=True, exist_ok=True)

rows = []
for (A,B), g in df.groupby(["method_A","method_B"]):
    v = g['delta_icv_ml'].dropna().abs()
    n_total = int(len(v))
    n_within = int((v <= ICV_THRESH_ML).sum())
    pct_within = (n_within / n_total * 100.0) if n_total else np.nan
    rows.append({
        'method_A': A, 'method_B': B, 'n_scans': n_total,
        'n_within_5ml': n_within, 'pct_within_5ml': float(pct_within)
    })

icv_within_df = pd.DataFrame(rows)
icv_within_df.to_csv(outdir / 'icv_within_5ml_by_pair.csv', index=False)
display(Markdown(f'### ICV within {ICV_THRESH_ML} mL by pair (first rows)'))
display(icv_within_df.head())

# matrix form for reporting
methods = sorted(set(df['method_A']).union(df['method_B']))
pivot_icv = icv_within_df.pivot(index='method_A', columns='method_B', values='pct_within_5ml').reindex(index=methods, columns=methods)
pivot_icv.to_csv(outdir / 'icv_within_5ml_matrix.csv')
display(pivot_icv)

In [21]:
methods = sorted(set(df['method_A']).union(df['method_B']))
for m in methods:
    method = df[(df['method_A'] == m) | (df['method_B'] == m)].copy()
    unique_stems = method['stem'].nunique()
    print(m, len(method))

Brainchop 25919
CTBET 25919
CT_BET 14394
CTbet_Docker 25919
HD-CTBET 25919
Robust-CTBET 25919
SynthStrip 25919


In [25]:
method = df[(df['method_A'] == 'CT_BET') | (df['method_B'] == 'CT_BET')].copy()
method['stem'].nunique()

2399

In [26]:
methods = sorted(set(df['method_A']).union(df['method_B']))
counts = {}
for m in methods:
    counts[m] = df[(df['method_A']==m)|(df['method_B']==m)]['stem'].nunique()
import pandas as pd
pd.Series(counts).sort_values(ascending=False)

Brainchop       4704
CTBET           4704
CTbet_Docker    4704
HD-CTBET        4704
Robust-CTBET    4704
SynthStrip      4704
CT_BET          2399
dtype: int64