In [2]:
import re
import glob
import numpy as np
import pandas as pd

# your .log files
LOG_DIR = "*.log"

# regex patterns to capture the values
patterns = {
    "auc": r"ROC AUC:\s*([0-9.]+)",
    "bacc": r"Balanced Accuracy:\s*([0-9.]+)",
    "sens_spec20": r"Spec 20% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec40": r"Spec 40% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec60": r"Spec 60% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
}

# store results
results = {k: [] for k in ["auc", "bacc", "sens20", "sens40", "sens60"]}

for log_file in glob.glob(LOG_DIR):
    with open(log_file, "r") as f:
        text = f.read()
        # extract AUC
        auc = re.search(patterns["auc"], text)
        if auc:
            results["auc"].append(float(auc.group(1)))
        # extract balanced accuracy
        bacc = re.search(patterns["bacc"], text)
        if bacc:
            results["bacc"].append(float(bacc.group(1)))
        # extract sensitivities at different specificity levels
        for key, pat in [("sens20", "sens_spec20"), ("sens40", "sens_spec40"), ("sens60", "sens_spec60")]:
            match = re.search(patterns[pat], text)
            if match:
                sens = float(match.group(1))  # first number = sensitivity
                results[key].append(sens)

# summarize into DataFrame
summary = {}
for metric, values in results.items():
    arr = np.array(values, dtype=float)
    summary[metric] = {
        "mean": np.mean(arr),
        "std": np.std(arr, ddof=1)  # sample std
    }

df = pd.DataFrame(summary).T
print(df.round(4))

          mean     std
auc     0.7254  0.0044
bacc    0.6628  0.0049
sens20  0.9597  0.0065
sens40  0.8980  0.0049
sens60  0.7208  0.0143


In [1]:
import re
import glob
import numpy as np
import pandas as pd

# your .log files
# LOG_DIR = "*.log"
log_files = [
    "tabpfn_default_seed0.log",
    "tabpfn_default_seed2.log",
    "tabpfn_default_seed12.log",
    "tabpfn_default_seed15.log",
    "tabpfn_default_seed21.log",
]

# regex patterns to capture the values
patterns = {
    "seed": r"Running with seed:\s*(\d+)",
    "auc": r"ROC AUC:\s*([0-9.]+)",
    "bacc": r"Balanced Accuracy:\s*([0-9.]+)",
    "sens_spec20": r"Spec 20% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec40": r"Spec 40% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec60": r"Spec 60% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
}

# store results per seed
all_results = []

print("Using log files:")
for f in log_files:
    print(" -", f)

# Regex patterns (handles the unicode arrow "→")
pat_seed = re.compile(r"Running with seed:\s*(\d+)")
pat_auc = re.compile(r"ROC AUC:\s*([0-9.]+)")
pat_bacc = re.compile(r"Balanced Accuracy:\s*([0-9.]+)")
pat_spec20 = re.compile(r"Spec\s*20%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40 = re.compile(r"Spec\s*40%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60 = re.compile(r"Spec\s*60%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

# (Optional) fallback patterns if your logs sometimes use "->" instead of "→"
pat_spec20_fallback = re.compile(r"Spec\s*20%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40_fallback = re.compile(r"Spec\s*40%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60_fallback = re.compile(r"Spec\s*60%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

def extract_from_last_test_block(text: str):
    """
    Split by '[TEST]' and parse metrics from the last block only.
    Returns dict with seed, auc, bacc, sens20/40/60 (sensitivities).
    """
    parts = re.split(r"\[TEST\]", text)
    last_block = parts[-1] if parts else text  # if no [TEST], use whole text

    # get seed from anywhere in file (first occurrence)
    seed_m = pat_seed.search(text)
    seed = int(seed_m.group(1)) if seed_m else None

    # pull metrics from the last block
    def _grab(pat, block, fallback=None):
        m = pat.search(block)
        if (not m) and fallback:
            m = fallback.search(block)
        return m

    auc_m = pat_auc.search(last_block)
    bacc_m = pat_bacc.search(last_block)

    s20_m = _grab(pat_spec20, last_block, pat_spec20_fallback)
    s40_m = _grab(pat_spec40, last_block, pat_spec40_fallback)
    s60_m = _grab(pat_spec60, last_block, pat_spec60_fallback)

    return {
        "seed": seed,
        "auc": float(auc_m.group(1)) if auc_m else None,
        "bacc": float(bacc_m.group(1)) if bacc_m else None,
        "sens20": float(s20_m.group(1)) if s20_m else None,
        "sens40": float(s40_m.group(1)) if s40_m else None,
        "sens60": float(s60_m.group(1)) if s60_m else None,
    }

# Parse all logs
rows = []
for path in log_files:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read()
    row = {"file": path}
    row.update(extract_from_last_test_block(txt))
    rows.append(row)

df_runs = pd.DataFrame(rows)
print("\nPer-run results (from LAST [TEST] block):")
print(df_runs)

# Summaries
summary = {}
for metric in ["auc", "bacc", "sens20", "sens40", "sens60"]:
    vals = df_runs[metric].dropna().to_numpy(dtype=float)
    if len(vals) == 0:
        mean, std = np.nan, np.nan
    elif len(vals) == 1:
        mean, std = float(vals[0]), np.nan
    else:
        mean, std = float(np.mean(vals)), float(np.std(vals, ddof=1))
    summary[metric] = {"mean": mean, "std": std}

df_summary = pd.DataFrame(summary).T
print("\nSummary (mean ± std) across seeds:")
print(df_summary.round(4))

Using log files:
 - tabpfn_default_seed0.log
 - tabpfn_default_seed2.log
 - tabpfn_default_seed12.log
 - tabpfn_default_seed15.log
 - tabpfn_default_seed21.log

Per-run results (from LAST [TEST] block):
                        file  seed     auc    bacc  sens20  sens40  sens60
0   tabpfn_default_seed0.log     0  0.7242  0.6541  0.9553  0.8949  0.7114
1   tabpfn_default_seed2.log     2  0.7137  0.6643  0.9463  0.8949  0.7181
2  tabpfn_default_seed12.log    12  0.7212  0.6642  0.9620  0.9060  0.7047
3  tabpfn_default_seed15.log    15  0.7324  0.6663  0.9642  0.8949  0.7293
4  tabpfn_default_seed21.log    21  0.7227  0.6653  0.9508  0.8993  0.7181

Summary (mean ± std) across seeds:
          mean     std
auc     0.7228  0.0067
bacc    0.6628  0.0050
sens20  0.9557  0.0075
sens40  0.8980  0.0049
sens60  0.7163  0.0091
