# EANM-AI-QC — Quickstart (Jupyter)

This notebook runs the CLI scripts and then visualizes the produced CSV artifacts under `Results/`.

Key point: the notebook kernel must use the same Python environment as your terminal `.venv`.

In [None]:
from pathlib import Path
import sys, os

REPO = Path().resolve()
assert (REPO/'qnm_qai.py').exists(), "Run Jupyter from the repository root (folder containing qnm_qai.py)"
print("Repo root:", REPO)
print("Python:", sys.executable)

# Best-effort: ensure Results exists
(Path("Results")).mkdir(exist_ok=True)


In [None]:
ensure_matplotlib()
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, balanced_accuracy_score

def _safe_div(num, den):
    return float(num) / float(den) if float(den) != 0.0 else float("nan")

def cm_metrics_from_preds(y_true, prob1, threshold=0.5):
    y_true = np.asarray(y_true, dtype=int)
    prob1 = np.asarray(prob1, dtype=float)
    y_pred = (prob1 >= float(threshold)).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    sens = _safe_div(tp, tp + fn)
    spec = _safe_div(tn, tn + fp)
    ppv  = _safe_div(tp, tp + fp)
    npv  = _safe_div(tn, tn + fn)

    acc = accuracy_score(y_true, y_pred)
    bal = balanced_accuracy_score(y_true, y_pred)

    # AUC is undefined if only one class is present
    auc = float("nan")
    if len(np.unique(y_true)) == 2:
        auc = roc_auc_score(y_true, prob1)

    return {
        "threshold": float(threshold),
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "sensitivity": sens,
        "specificity": spec,
        "ppv": ppv,
        "npv": npv,
        "accuracy": float(acc),
        "balanced_accuracy": float(bal),
        "auc": float(auc),
    }, cm

def show_confusion_matrix(cm, title="Confusion matrix", labels=("0", "1")):
    import matplotlib.pyplot as plt
    import numpy as np

    cm = np.asarray(cm, dtype=int)
    fig, ax = plt.subplots(figsize=(4.2, 3.6))
    im = ax.imshow(cm)

    ax.set_title(title)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    ax.set_xticks([0, 1], labels=labels)
    ax.set_yticks([0, 1], labels=labels)

    for (i, j), v in np.ndenumerate(cm):
        ax.text(j, i, str(v), ha="center", va="center")

    fig.tight_layout()
    plt.show()

def show_roc_curve(y_true, prob1, title="ROC curve"):
    import matplotlib.pyplot as plt
    from sklearn.metrics import RocCurveDisplay

    if len(np.unique(y_true)) < 2:
        print("ROC: only one class present in y_true; skipping.")
        return
    RocCurveDisplay.from_predictions(y_true, prob1)
    plt.title(title)
    plt.show()

def ensure_matplotlib():
    try:
        import matplotlib.pyplot as _plt  # noqa: F401
    except Exception:
        # Notebook-safe install
        import sys
        !{sys.executable} -m pip install matplotlib


## Run the demo (no SHAP/LIME)

This trains models and writes metrics + predictions to `Results/`.

In [None]:
!bash examples/run_all_examples.sh

## Summary tables (`Results/*__results.csv`)

In [None]:
from pathlib import Path
import pandas as pd

for csv in sorted(Path("Results").glob("*__results.csv")):
    print("\n", csv)
    display(pd.read_csv(csv))


## Confusion matrices (test set) per method

This reads `Results/<dataset>/<method>/predictions/test.csv`.

In [None]:
from pathlib import Path
import pandas as pd

datasets = [p for p in Path("Results").iterdir() if p.is_dir()]
for ds in sorted(datasets):
    print("\n=== DATASET:", ds.name, "===")
    for method_dir in sorted(ds.iterdir()):
        pred_path = method_dir / "predictions" / "test.csv"
        if not pred_path.exists():
            continue
        dfp = pd.read_csv(pred_path)
        if "true_label" not in dfp.columns:
            print(method_dir.name, "test.csv has no true_label; skipping confusion matrix.")
            continue
        y = dfp["true_label"].astype(int).to_numpy()
        prob1 = dfp["prob_1"].astype(float).to_numpy()

        metrics, cm = cm_metrics_from_preds(y, prob1, threshold=0.5)
        print("\n", method_dir.name, {k: metrics[k] for k in ["accuracy","auc","balanced_accuracy","sensitivity","specificity"]})
        show_confusion_matrix(cm, title=f"{ds.name} / {method_dir.name} — test CM (thr=0.5)")
        show_roc_curve(y, prob1, title=f"{ds.name} / {method_dir.name} — test ROC")
