In [11]:
import os, json
from pathlib import Path

print("CWD:", Path.cwd())

# Try both possibilities: running from project root vs from /notebooks
candidates = [Path("reports/experiments"), Path("../reports/experiments")]
metrics_dir = next((p for p in candidates if p.exists()), None)

if metrics_dir is None:
    # Show what actually exists to debug fast
    print("Existing reports dirs:", [p for p in [Path("reports"), Path("../reports")] if p.exists()])
    raise FileNotFoundError("Could not find reports/experiments from current CWD.")

print("Using metrics dir:", metrics_dir.resolve())



CWD: /Users/tksiankop/Desktop/ai-mentalhealth-research/notebooks
Using metrics dir: /Users/tksiankop/Desktop/ai-mentalhealth-research/reports/experiments


In [12]:
import json, pandas as pd
from pathlib import Path

# Pick the right directory whether the notebook runs from root or /notebooks
candidates = [Path("reports/experiments"), Path("../reports/experiments")]
metrics_dir = next((p for p in candidates if p.exists()), None)
assert metrics_dir is not None, "Could not find reports/experiments/"

paths = sorted(metrics_dir.glob("xlmr_metrics*.json"))
print("Found files:", [p.name for p in paths])

rows = []
for p in paths:
    try:
        with open(p, "r") as f:
            data = json.load(f)
    except Exception as e:
        print(f"[WARN] Skipping {p.name}: not valid JSON ({e})")
        continue

    # Handle multiple possible shapes
    if isinstance(data, (int, float)):
        # a file that just contains a number (treat as eval_loss only)
        rows.append({
            "file": p.name, "class": "N/A",
            "precision": None, "recall": None, "f1-score": None, "support": None,
            "eval_loss": float(data)
        })
        continue

    if not isinstance(data, dict):
        print(f"[WARN] Skipping {p.name}: unexpected top-level type {type(data)}")
        continue

    eval_loss = data.get("eval_loss")
    report = data.get("report")

    # If report missing or not a dict, still record the loss row
    if not isinstance(report, dict):
        rows.append({
            "file": p.name, "class": "N/A",
            "precision": None, "recall": None, "f1-score": None, "support": None,
            "eval_loss": eval_loss
        })
        continue

    # Normal case: per-class stats inside 'report'
    for klass, stats in report.items():
        # each stats should be a dict with keys: precision, recall, f1-score, support
        if isinstance(stats, dict):
            rows.append({
                "file": p.name,
                "class": klass,
                "precision": stats.get("precision"),
                "recall": stats.get("recall"),
                "f1-score": stats.get("f1-score"),
                "support": stats.get("support"),
                "eval_loss": eval_loss,
            })

df = pd.DataFrame(rows)
print("Parsed rows:", len(df))

# Show main table (neutral/distress only if present)
if not df.empty:
    display(
        df[df["class"].isin(["neutral","distress","macro avg","weighted avg","N/A"])]
        .sort_values(["file","class"])
        .reset_index(drop=True)
    )
else:
    print("No usable metrics found.")


Found files: ['xlmr_metrics.json', 'xlmr_metrics_20251005_025500.json']
Parsed rows: 8


Unnamed: 0,file,class,precision,recall,f1-score,support,eval_loss
0,xlmr_metrics.json,distress,0.0,0.0,0.0,3.0,0.714077
1,xlmr_metrics.json,macro avg,0.2,0.5,0.285714,5.0,0.714077
2,xlmr_metrics.json,neutral,0.4,1.0,0.571429,2.0,0.714077
3,xlmr_metrics.json,weighted avg,0.16,0.4,0.228571,5.0,0.714077
4,xlmr_metrics_20251005_025500.json,distress,0.0,0.0,0.0,3.0,0.714077
5,xlmr_metrics_20251005_025500.json,macro avg,0.2,0.5,0.285714,5.0,0.714077
6,xlmr_metrics_20251005_025500.json,neutral,0.4,1.0,0.571429,2.0,0.714077
7,xlmr_metrics_20251005_025500.json,weighted avg,0.16,0.4,0.228571,5.0,0.714077
