In [1]:
import re
import glob
import numpy as np
import pandas as pd

# your .log files
# LOG_DIR = "*.log"
log_files = [
    "tabpfn_mlp_bce_seed0.log",
    "tabpfn_mlp_bce_seed2.log",
    "tabpfn_mlp_bce_seed12.log",
    "tabpfn_mlp_bce_seed15.log",
    "tabpfn_mlp_bce_seed21.log",
]

# regex patterns to capture the values
patterns = {
    "seed": r"Running with seed:\s*(\d+)",
    "auc": r"ROC AUC:\s*([0-9.]+)",
    "bacc": r"Balanced Accuracy:\s*([0-9.]+)",
    "sens_spec20": r"Spec 20% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec40": r"Spec 40% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec60": r"Spec 60% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
}

# store results per seed
all_results = []

print("Using log files:")
for f in log_files:
    print(" -", f)

# Regex patterns (handles the unicode arrow "→")
pat_seed = re.compile(r"Running with seed:\s*(\d+)")
pat_auc = re.compile(r"ROC AUC:\s*([0-9.]+)")
pat_bacc = re.compile(r"Balanced Accuracy:\s*([0-9.]+)")
pat_spec20 = re.compile(r"Spec\s*20%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40 = re.compile(r"Spec\s*40%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60 = re.compile(r"Spec\s*60%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

# (Optional) fallback patterns if your logs sometimes use "->" instead of "→"
pat_spec20_fallback = re.compile(r"Spec\s*20%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40_fallback = re.compile(r"Spec\s*40%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60_fallback = re.compile(r"Spec\s*60%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

def extract_from_last_test_block(text: str):
    """
    Split by '[TEST]' and parse metrics from the last block only.
    Returns dict with seed, auc, bacc, sens20/40/60 (sensitivities).
    """
    parts = re.split(r"\[TEST\]", text)
    last_block = parts[-1] if parts else text  # if no [TEST], use whole text

    # get seed from anywhere in file (first occurrence)
    seed_m = pat_seed.search(text)
    seed = int(seed_m.group(1)) if seed_m else None

    # pull metrics from the last block
    def _grab(pat, block, fallback=None):
        m = pat.search(block)
        if (not m) and fallback:
            m = fallback.search(block)
        return m

    auc_m = pat_auc.search(last_block)
    bacc_m = pat_bacc.search(last_block)

    s20_m = _grab(pat_spec20, last_block, pat_spec20_fallback)
    s40_m = _grab(pat_spec40, last_block, pat_spec40_fallback)
    s60_m = _grab(pat_spec60, last_block, pat_spec60_fallback)

    return {
        "seed": seed,
        "auc": float(auc_m.group(1)) if auc_m else None,
        "bacc": float(bacc_m.group(1)) if bacc_m else None,
        "sens20": float(s20_m.group(1)) if s20_m else None,
        "sens40": float(s40_m.group(1)) if s40_m else None,
        "sens60": float(s60_m.group(1)) if s60_m else None,
    }

# Parse all logs
rows = []
for path in log_files:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read()
    row = {"file": path}
    row.update(extract_from_last_test_block(txt))
    rows.append(row)

df_runs = pd.DataFrame(rows)
print("\nPer-run results (from LAST [TEST] block):")
print(df_runs)

# Summaries
summary = {}
for metric in ["auc", "bacc", "sens20", "sens40", "sens60"]:
    vals = df_runs[metric].dropna().to_numpy(dtype=float)
    if len(vals) == 0:
        mean, std = np.nan, np.nan
    elif len(vals) == 1:
        mean, std = float(vals[0]), np.nan
    else:
        mean, std = float(np.mean(vals)), float(np.std(vals, ddof=1))
    summary[metric] = {"mean": mean, "std": std}

df_summary = pd.DataFrame(summary).T
print("\nSummary (mean ± std) across seeds:")
print(df_summary.round(4))

Using log files:
 - tabpfn_mlp_bce_seed0.log
 - tabpfn_mlp_bce_seed2.log
 - tabpfn_mlp_bce_seed12.log
 - tabpfn_mlp_bce_seed15.log
 - tabpfn_mlp_bce_seed21.log

Per-run results (from LAST [TEST] block):
                        file  seed     auc    bacc  sens20  sens40  sens60
0   tabpfn_mlp_bce_seed0.log     0  0.7210  0.6654  0.9575  0.8523  0.7472
1   tabpfn_mlp_bce_seed2.log     2  0.6717  0.6440  0.9485  0.8814  0.0022
2  tabpfn_mlp_bce_seed12.log    12  0.7162  0.6620  0.9553  0.8859  0.7383
3  tabpfn_mlp_bce_seed15.log    15  0.7061  0.6542  0.9508  0.8300  0.7136
4  tabpfn_mlp_bce_seed21.log    21  0.7146  0.6396  0.9508  0.8949  0.6846

Summary (mean ± std) across seeds:
          mean     std
auc     0.7059  0.0199
bacc    0.6530  0.0111
sens20  0.9526  0.0037
sens40  0.8689  0.0270
sens60  0.5772  0.3223


In [None]:
import re
import glob
import numpy as np
import pandas as pd

# your .log files
# LOG_DIR = "*.log"
log_files = [
    "tabpfn_mlp_bce_seed0.log",
    "tabpfn_mlp_bce_seed3.log",
    "tabpfn_mlp_bce_seed12.log",
    "tabpfn_mlp_bce_seed15.log",
    "tabpfn_mlp_bce_seed21.log",
]

# regex patterns to capture the values
patterns = {
    "seed": r"Running with seed:\s*(\d+)",
    "auc": r"ROC AUC:\s*([0-9.]+)",
    "bacc": r"Balanced Accuracy:\s*([0-9.]+)",
    "sens_spec20": r"Spec 20% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec40": r"Spec 40% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
    "sens_spec60": r"Spec 60% → Sens:\s*([0-9.]+), Spec:\s*([0-9.]+), Thr",
}

# store results per seed
all_results = []

print("Using log files:")
for f in log_files:
    print(" -", f)

# Regex patterns (handles the unicode arrow "→")
pat_seed = re.compile(r"Running with seed:\s*(\d+)")
pat_auc = re.compile(r"ROC AUC:\s*([0-9.]+)")
pat_bacc = re.compile(r"Balanced Accuracy:\s*([0-9.]+)")
pat_spec20 = re.compile(r"Spec\s*20%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40 = re.compile(r"Spec\s*40%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60 = re.compile(r"Spec\s*60%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

# (Optional) fallback patterns if your logs sometimes use "->" instead of "→"
pat_spec20_fallback = re.compile(r"Spec\s*20%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec40_fallback = re.compile(r"Spec\s*40%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
pat_spec60_fallback = re.compile(r"Spec\s*60%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")

def extract_from_last_test_block(text: str):
    """
    Split by '[TEST]' and parse metrics from the last block only.
    Returns dict with seed, auc, bacc, sens20/40/60 (sensitivities).
    """
    parts = re.split(r"\[TEST\]", text)
    last_block = parts[-1] if parts else text  # if no [TEST], use whole text

    # get seed from anywhere in file (first occurrence)
    seed_m = pat_seed.search(text)
    seed = int(seed_m.group(1)) if seed_m else None

    # pull metrics from the last block
    def _grab(pat, block, fallback=None):
        m = pat.search(block)
        if (not m) and fallback:
            m = fallback.search(block)
        return m

    auc_m = pat_auc.search(last_block)
    bacc_m = pat_bacc.search(last_block)

    s20_m = _grab(pat_spec20, last_block, pat_spec20_fallback)
    s40_m = _grab(pat_spec40, last_block, pat_spec40_fallback)
    s60_m = _grab(pat_spec60, last_block, pat_spec60_fallback)

    return {
        "seed": seed,
        "auc": float(auc_m.group(1)) if auc_m else None,
        "bacc": float(bacc_m.group(1)) if bacc_m else None,
        "sens20": float(s20_m.group(1)) if s20_m else None,
        "sens40": float(s40_m.group(1)) if s40_m else None,
        "sens60": float(s60_m.group(1)) if s60_m else None,
    }

# Parse all logs
rows = []
for path in log_files:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read()
    row = {"file": path}
    row.update(extract_from_last_test_block(txt))
    rows.append(row)

df_runs = pd.DataFrame(rows)
print("\nPer-run results (from LAST [TEST] block):")
print(df_runs)

# Summaries
summary = {}
for metric in ["auc", "bacc", "sens20", "sens40", "sens60"]:
    vals = df_runs[metric].dropna().to_numpy(dtype=float)
    if len(vals) == 0:
        mean, std = np.nan, np.nan
    elif len(vals) == 1:
        mean, std = float(vals[0]), np.nan
    else:
        mean, std = float(np.mean(vals)), float(np.std(vals, ddof=1))
    summary[metric] = {"mean": mean, "std": std}

df_summary = pd.DataFrame(summary).T
print("\nSummary (mean ± std) across seeds:")
print(df_summary.round(4))

Using log files:
 - tabpfn_mlp_bce_seed0.log
 - tabpfn_mlp_bce_seed3.log
 - tabpfn_mlp_bce_seed12.log
 - tabpfn_mlp_bce_seed15.log
 - tabpfn_mlp_bce_seed21.log

Per-run results (from LAST [TEST] block):
                        file  seed     auc    bacc  sens20  sens40  sens60
0   tabpfn_mlp_bce_seed0.log     0  0.7210  0.6654  0.9575  0.8523  0.7472
1   tabpfn_mlp_bce_seed3.log     3  0.7160  0.6611  0.9508  0.8725  0.7025
2  tabpfn_mlp_bce_seed12.log    12  0.7162  0.6620  0.9553  0.8859  0.7383
3  tabpfn_mlp_bce_seed15.log    15  0.7061  0.6542  0.9508  0.8300  0.7136
4  tabpfn_mlp_bce_seed21.log    21  0.7146  0.6396  0.9508  0.8949  0.6846

Summary (mean ± std) across seeds:
          mean     std
auc     0.7148  0.0054
bacc    0.6565  0.0103
sens20  0.9530  0.0032
sens40  0.8671  0.0262
sens60  0.7172  0.0257


In [3]:
import re
import numpy as np
import pandas as pd

# ---- your logs (manually defined) ----
log_files = [
    "tabpfn_mlp_bce_seed0.log",
    "tabpfn_mlp_bce_seed3.log",
    "tabpfn_mlp_bce_seed12.log",
    "tabpfn_mlp_bce_seed15.log",
    "tabpfn_mlp_bce_seed21.log",
]

print("Using log files:")
for f in log_files:
    print(" -", f)

# ---- regex patterns ----
pat_seed   = re.compile(r"Running with seed:\s*(\d+)")
pat_auc    = re.compile(r"ROC AUC:\s*([0-9.]+)")
pat_bacc   = re.compile(r"Balanced Accuracy:\s*([0-9.]+)")

def _mk_spec_pat(pct: int):
    # unicode arrow or ASCII arrow
    return (
        re.compile(rf"Spec\s*{pct}%\s*→\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr"),
        re.compile(rf"Spec\s*{pct}%\s*->\s*Sens:\s*([0-9.]+),\s*Spec:\s*([0-9.]+),\s*Thr")
    )
PAT_20 = _mk_spec_pat(20)
PAT_40 = _mk_spec_pat(40)
PAT_60 = _mk_spec_pat(60)

def _grab(block: str, pats):
    """Try one or multiple regexes, return the first match."""
    if not isinstance(pats, (list, tuple)):
        pats = (pats,)
    for p in pats:
        m = p.search(block)
        if m:
            return m
    return None

def parse_block(block: str):
    """Extract metrics from a contiguous metrics block."""
    auc_m  = _grab(block, pat_auc)
    bacc_m = _grab(block, pat_bacc)
    s20_m  = _grab(block, PAT_20)
    s40_m  = _grab(block, PAT_40)
    s60_m  = _grab(block, PAT_60)

    return {
        "auc":    float(auc_m.group(1))  if auc_m else None,
        "bacc":   float(bacc_m.group(1)) if bacc_m else None,
        "sens20": float(s20_m.group(1))  if s20_m else None,
        "sens40": float(s40_m.group(1))  if s40_m else None,
        "sens60": float(s60_m.group(1))  if s60_m else None,
    }

def find_metric_blocks(text: str):
    """
    Locate all 'ROC AUC:' occurrences and slice the text into metric blocks.
    Each block starts at a 'ROC AUC:' line and goes up to the next 'ROC AUC:' (or EOF).
    Returns a list of dicts (parsed metrics) in file order.
    """
    starts = [m.start() for m in pat_auc.finditer(text)]
    if not starts:
        return []

    blocks = []
    for i, s in enumerate(starts):
        e = starts[i + 1] if (i + 1) < len(starts) else len(text)
        chunk = text[s:e]
        metrics = parse_block(chunk)
        # keep only reasonably complete blocks (has auc or bacc at least)
        if any(metrics.get(k) is not None for k in ("auc", "bacc", "sens20", "sens40", "sens60")):
            blocks.append(metrics)
    return blocks

# ---- parse all logs: take FIRST and LAST metric block ----
rows = []
for path in log_files:
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read()

    seed_m = pat_seed.search(txt)
    seed = int(seed_m.group(1)) if seed_m else None

    blocks = find_metric_blocks(txt)

    first = blocks[0] if blocks else {}
    last  = blocks[-1] if blocks else {}

    row = {
        "file": path,
        "seed": seed,
        # first block
        "auc_first":    first.get("auc"),
        "bacc_first":   first.get("bacc"),
        "sens20_first": first.get("sens20"),
        "sens40_first": first.get("sens40"),
        "sens60_first": first.get("sens60"),
        # last block
        "auc_last":    last.get("auc"),
        "bacc_last":   last.get("bacc"),
        "sens20_last": last.get("sens20"),
        "sens40_last": last.get("sens40"),
        "sens60_last": last.get("sens60"),
    }
    rows.append(row)

df_runs = pd.DataFrame(rows)
print("\nPer-run results (FIRST and LAST metrics blocks):")
print(df_runs)

# ---- summarize first vs last separately ----
def summarize(df, suffix: str):
    out = {}
    for metric in ["auc", "bacc", "sens20", "sens40", "sens60"]:
        col = f"{metric}_{suffix}"
        vals = df[col].dropna().to_numpy(dtype=float)
        if len(vals) == 0:
            mean, std = np.nan, np.nan
        elif len(vals) == 1:
            mean, std = float(vals[0]), np.nan
        else:
            mean, std = float(np.mean(vals)), float(np.std(vals, ddof=1))
        out[col] = {"mean": mean, "std": std}
    return pd.DataFrame(out).T

df_summary_first = summarize(df_runs, "first")
df_summary_last  = summarize(df_runs, "last")

print("\nSummary of FIRST metrics (mean ± std):")
print(df_summary_first.round(4))

print("\nSummary of LAST metrics (mean ± std):")
print(df_summary_last.round(4))

Using log files:
 - tabpfn_mlp_bce_seed0.log
 - tabpfn_mlp_bce_seed3.log
 - tabpfn_mlp_bce_seed12.log
 - tabpfn_mlp_bce_seed15.log
 - tabpfn_mlp_bce_seed21.log

Per-run results (FIRST and LAST metrics blocks):
                        file  seed  auc_first  bacc_first  sens20_first  \
0   tabpfn_mlp_bce_seed0.log     0     0.7242      0.6541        0.9553   
1   tabpfn_mlp_bce_seed3.log     3     0.7375      0.6832        0.9620   
2  tabpfn_mlp_bce_seed12.log    12     0.7212      0.6642        0.9620   
3  tabpfn_mlp_bce_seed15.log    15     0.7324      0.6663        0.9642   
4  tabpfn_mlp_bce_seed21.log    21     0.7227      0.6653        0.9508   

   sens40_first  sens60_first  auc_last  bacc_last  sens20_last  sens40_last  \
0        0.8949        0.7114    0.7210     0.6654       0.9575       0.8523   
1        0.9016        0.7696    0.7160     0.6611       0.9508       0.8725   
2        0.9060        0.7047    0.7162     0.6620       0.9553       0.8859   
3        0.8949    