# Tabular radiomics demo

Trains/evaluates:
- `pl_kernel_svm`
- `pl_qcnn_alt`
- `pl_qcnn_muw`

Then visualizes **test-set** confusion matrices and ROC curves from the stored prediction CSVs.

In [None]:
from pathlib import Path
import sys, os

REPO = Path().resolve()
assert (REPO/'qnm_qai.py').exists(), "Run Jupyter from the repository root (folder containing qnm_qai.py)"
print("Repo root:", REPO)
print("Python:", sys.executable)

# Best-effort: ensure Results exists
(Path("Results")).mkdir(exist_ok=True)


In [None]:
ensure_matplotlib()
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, balanced_accuracy_score

def _safe_div(num, den):
    return float(num) / float(den) if float(den) != 0.0 else float("nan")

def cm_metrics_from_preds(y_true, prob1, threshold=0.5):
    y_true = np.asarray(y_true, dtype=int)
    prob1 = np.asarray(prob1, dtype=float)
    y_pred = (prob1 >= float(threshold)).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    tn, fp, fn, tp = cm.ravel()

    sens = _safe_div(tp, tp + fn)
    spec = _safe_div(tn, tn + fp)
    ppv  = _safe_div(tp, tp + fp)
    npv  = _safe_div(tn, tn + fn)

    acc = accuracy_score(y_true, y_pred)
    bal = balanced_accuracy_score(y_true, y_pred)

    # AUC is undefined if only one class is present
    auc = float("nan")
    if len(np.unique(y_true)) == 2:
        auc = roc_auc_score(y_true, prob1)

    return {
        "threshold": float(threshold),
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "sensitivity": sens,
        "specificity": spec,
        "ppv": ppv,
        "npv": npv,
        "accuracy": float(acc),
        "balanced_accuracy": float(bal),
        "auc": float(auc),
    }, cm

def show_confusion_matrix(cm, title="Confusion matrix", labels=("0", "1")):
    import matplotlib.pyplot as plt
    import numpy as np

    cm = np.asarray(cm, dtype=int)
    fig, ax = plt.subplots(figsize=(4.2, 3.6))
    im = ax.imshow(cm)

    ax.set_title(title)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    ax.set_xticks([0, 1], labels=labels)
    ax.set_yticks([0, 1], labels=labels)

    for (i, j), v in np.ndenumerate(cm):
        ax.text(j, i, str(v), ha="center", va="center")

    fig.tight_layout()
    plt.show()

def show_roc_curve(y_true, prob1, title="ROC curve"):
    import matplotlib.pyplot as plt
    from sklearn.metrics import RocCurveDisplay

    if len(np.unique(y_true)) < 2:
        print("ROC: only one class present in y_true; skipping.")
        return
    RocCurveDisplay.from_predictions(y_true, prob1)
    plt.title(title)
    plt.show()

def ensure_matplotlib():
    try:
        import matplotlib.pyplot as _plt  # noqa: F401
    except Exception:
        # Notebook-safe install
        import sys
        !{sys.executable} -m pip install matplotlib


## Build tabular CSVs from raw FDB/LDB (demo_data/tabular/raw)

Produces:
- `demo_data/tabular/real_train.csv`
- `demo_data/tabular/real_infer.csv`

In [None]:
!python examples/build_tabular_from_fdb_ldb.py

## Train + evaluate (no SHAP/LIME)

In [None]:
!python qnm_qai.py run   --input demo_data/tabular/real_train.csv   --infer demo_data/tabular/real_infer.csv   --input-type tabular   --methods pl_kernel_svm,pl_qcnn_alt,pl_qcnn_muw   --results-dir Results   --test-size 0.25   --max-samples-per-method 80   --qcnn-epochs 15   --qcnn-lr 0.02   --qcnn-batch-size 16   --qcnn-init-scale 0.1   --seed 0   --no-explain


## Summary metrics table

In [None]:
import pandas as pd
from pathlib import Path

summary = Path("Results")/"real_train__results.csv"
df = pd.read_csv(summary)
df


## Per-method test confusion matrices + ROC

Uses `Results/real_train/<method>/predictions/test.csv`.

In [None]:
from pathlib import Path
import pandas as pd

base = Path("Results")/"real_train"
for method in ["pl_kernel_svm","pl_qcnn_alt","pl_qcnn_muw"]:
    pred_path = base/method/"predictions"/"test.csv"
    print("\nMETHOD:", method)
    dfp = pd.read_csv(pred_path)
    display(dfp.head(10))

    y = dfp["true_label"].astype(int).to_numpy()
    prob1 = dfp["prob_1"].astype(float).to_numpy()

    metrics, cm = cm_metrics_from_preds(y, prob1, threshold=0.5)
    display(pd.DataFrame([metrics]))
    show_confusion_matrix(cm, title=f"real_train / {method} — test CM (thr=0.5)")
    show_roc_curve(y, prob1, title=f"real_train / {method} — test ROC")


## If the run stored an optimized decision threshold

Some configurations store a non-0.5 threshold in metadata to maximize balanced accuracy on the TRAIN split. This cell prints it if present.

In [None]:
import json
from pathlib import Path

base = Path("Results")/"real_train"
for method in ["pl_kernel_svm","pl_qcnn_alt","pl_qcnn_muw"]:
    meta = base/method/"model"/"metadata.json"
    if not meta.exists():
        continue
    j = json.loads(meta.read_text())
    thr = j.get("decision_threshold", None)
    print(method, "decision_threshold:", thr)
