In [22]:
%pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import json
import pandas as pd
from joblib import load
import sys
import os
from sklearn.metrics import classification_report, fbeta_score, precision_score, recall_score, accuracy_score
sys.path.append(os.path.abspath(".."))
def numeric_selector_after_poly(df):
    return [c for c in df.columns if c not in categorical_vars]
X_test =  pd.read_csv("../data/testing_features.csv").drop(columns=["RestingBP","RestingECG"])
y_test = pd.read_csv("../data/testing_labels.csv")
ARTI_DIR = "artifacts/final_oldpeak_yj_poly_tuned"   
pipeline = load(f"{ARTI_DIR}/pipeline.joblib")
with open(f"{ARTI_DIR}/threshold.json") as f:
    threshold = float(json.load(f)["best_threshold"])


saved_cols = pd.read_csv(f"{ARTI_DIR}/columns.csv")["columns"].tolist()
assert list(X_test.columns) == saved_cols, "Column order/names mismatch with training artifacts."


In [2]:

print("Threshold used:", threshold)

Threshold used: 0.22479016604179114


In [4]:
p_test = pipeline.predict_proba(X_test)[:, 1]
y_test_pred = (p_test >= threshold).astype(int)
print("Recall (pos class):", recall_score(y_test, y_test_pred, zero_division=0))
print("Precision (pos class):", precision_score(y_test, y_test_pred, zero_division=0))
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print("F2:", fbeta_score(y_test, y_test_pred, beta=2, zero_division=0))
print("\nClassification report (test):")
print(classification_report(y_test, y_test_pred, zero_division=0))

Recall (pos class): 0.9803921568627451
Precision (pos class): 0.7299270072992701
Accuracy: 0.7880434782608695
F2: 0.9174311926605505

Classification report (test):
              precision    recall  f1-score   support

           0       0.96      0.55      0.70        82
           1       0.73      0.98      0.84       102

    accuracy                           0.79       184
   macro avg       0.84      0.76      0.77       184
weighted avg       0.83      0.79      0.77       184



In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    classification_report, confusion_matrix,
    precision_score, recall_score, accuracy_score, fbeta_score,
    roc_auc_score, average_precision_score,
    precision_recall_curve, roc_curve
)

# --- Ensure assets dir exists
os.makedirs("assets", exist_ok=True)

# --- Base predictions (you already have these)
# p_test = pipeline.predict_proba(X_test)[:, 1]
# y_test_pred = (p_test >= threshold).astype(int)

# --- Core metrics
rec = recall_score(y_test, y_test_pred, zero_division=0)
prec = precision_score(y_test, y_test_pred, zero_division=0)
acc = accuracy_score(y_test, y_test_pred)
f2  = fbeta_score(y_test, y_test_pred, beta=2, zero_division=0)

# Threshold-free metrics
roc_auc = roc_auc_score(y_test, p_test)
pr_auc  = average_precision_score(y_test, p_test)

# Confusion matrix + specificity
cm = confusion_matrix(y_test, y_test_pred, labels=[0,1])
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0

# --- Metrics table (nice for README)
metrics_df = pd.DataFrame([{
    "threshold": round(float(threshold), 3),
    "recall_pos": rec,
    "precision_pos": prec,
    "accuracy": acc,
    "F2": f2,
    "roc_auc": roc_auc,
    "pr_auc": pr_auc,
    "specificity": specificity,
    "TP": tp, "FP": fp, "TN": tn, "FN": fn,
}]).round(4)

print("=== Final Test Metrics ===")
print(metrics_df.to_markdown(index=False))
print("\nClassification report (test):")
print(classification_report(y_test, y_test_pred, zero_division=0))

# ===========================
# Figures
# ===========================

# --- 1) Confusion matrix (counts)
fig, ax = plt.subplots(figsize=(4.8, 4.2))
im = ax.imshow(cm, cmap="Blues")
ax.set_xlabel("Predicted label")
ax.set_ylabel("True label")
ax.set_title(f"Confusion Matrix (Test @ threshold={threshold:.3f})")
ax.set_xticks([0,1]); ax.set_xticklabels(["0","1"])
ax.set_yticks([0,1]); ax.set_yticklabels(["0","1"])

# annotate counts
for (i, j), val in np.ndenumerate(cm):
    ax.text(j, i, f"{val}", ha="center", va="center",
            fontsize=11, color=("white" if cm.max()/2 < val else "black"))

fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
plt.tight_layout()
plt.savefig("assets/fig_confusion_test.png", dpi=220)
plt.close(fig)

# --- 2) PR curve (mark operating point)
prec_curve, rec_curve, pr_thresh = precision_recall_curve(y_test, p_test)
op_prec = prec
op_rec  = rec

fig, ax = plt.subplots(figsize=(5.6, 4.2))
ax.plot(rec_curve, prec_curve, label=f"PR curve (AP={pr_auc:.3f})")
ax.scatter([op_rec], [op_prec], s=50, marker="o", label=f"Operating point @ {threshold:.3f}")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")
ax.set_title("Precision–Recall (Test)")
ax.legend(loc="lower left")
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.tight_layout()
plt.savefig("assets/fig_prcurve_test.png", dpi=220)
plt.close(fig)

# --- 3) ROC curve (mark operating point)
fpr, tpr, roc_thresh = roc_curve(y_test, p_test)
op_fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
op_tpr = rec

fig, ax = plt.subplots(figsize=(5.6, 4.2))
ax.plot(fpr, tpr, label=f"ROC (AUC={roc_auc:.3f})")
ax.plot([0,1], [0,1], linestyle="--", linewidth=1)  # chance line
ax.scatter([op_fpr], [op_tpr], s=50, marker="o", label=f"Operating point @ {threshold:.3f}")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate (Recall)")
ax.set_title("ROC (Test)")
ax.legend(loc="lower right")
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.tight_layout()
plt.savefig("assets/fig_roc_test.png", dpi=220)
plt.close(fig)

# --- Optional: save a CSV/JSON with metrics for reproducibility
metrics_df.to_csv("assets/test_metrics.csv", index=False)


=== Final Test Metrics ===
|   threshold |   recall_pos |   precision_pos |   accuracy |     F2 |   roc_auc |   pr_auc |   specificity |   TP |   FP |   TN |   FN |
|------------:|-------------:|----------------:|-----------:|-------:|----------:|---------:|--------------:|-----:|-----:|-----:|-----:|
|       0.225 |       0.9804 |          0.7299 |      0.788 | 0.9174 |    0.9275 |   0.9377 |        0.5488 |  100 |   37 |   45 |    2 |

Classification report (test):
              precision    recall  f1-score   support

           0       0.96      0.55      0.70        82
           1       0.73      0.98      0.84       102

    accuracy                           0.79       184
   macro avg       0.84      0.76      0.77       184
weighted avg       0.83      0.79      0.77       184

