In [1]:
from pathlib import Path
import re, json
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report, confusion_matrix

# -----------------------------
# Hard-coded paths (edit here)
# -----------------------------
MODEL_PATH = Path(r"C:\Users\sagni\Downloads\FraudLens\fraudlens_model.pkl")
CSV_PATH   = Path(r"C:\Users\sagni\Downloads\FraudLens\archive\creditcard_2023.csv")
OUT_DIR    = Path(r"C:\Users\sagni\Downloads\FraudLens")
THRESHOLD  = 0.5
OUT_DIR.mkdir(parents=True, exist_ok=True)

# -----------------------------
# Load model + data
# -----------------------------
pipe = joblib.load(MODEL_PATH)
df = pd.read_csv(CSV_PATH)

# Detect label if present
def find_label_col(cols):
    pats = [r"class", r"label", r"target", r"fraud"]
    norm = {c: re.sub(r"[^a-z0-9]+"," ",str(c).lower()).strip() for c in cols}
    for c, nc in norm.items():
        for p in pats:
            if re.search(p, nc):
                return c
    return None

label_col = find_label_col(df.columns)
y_true = None
if label_col:
    y_true = (df[label_col].astype(float) > 0).astype(int).values

# Features
num_df = df.select_dtypes(include=[np.number])
if label_col in num_df.columns:
    X = num_df.drop(columns=[label_col])
else:
    X = num_df
X = X.fillna(0.0).astype(float)

# -----------------------------
# Predict
# -----------------------------
probs = pipe.predict_proba(X.values)[:, 1]
preds = (probs >= THRESHOLD).astype(int)

out = pd.DataFrame({"prob_fraud": probs, "y_pred": preds})
if y_true is not None:
    out["y_true"] = y_true

out.to_csv(OUT_DIR / "fraudlens_scored.csv", index=False)
print("[SAVED]", OUT_DIR / "fraudlens_scored.csv")

# Metrics if labels available
if y_true is not None:
    roc_auc = roc_auc_score(y_true, probs)
    pr_auc  = average_precision_score(y_true, probs)
    cm      = confusion_matrix(y_true, preds, labels=[0,1])
    report  = classification_report(y_true, preds, digits=4)
    print("ROC-AUC:", roc_auc, " PR-AUC:", pr_auc)
    print(report)


[SAVED] C:\Users\sagni\Downloads\FraudLens\fraudlens_scored.csv
ROC-AUC: 0.999850007272637  PR-AUC: 0.9998920620685758
              precision    recall  f1-score   support

           0     0.9970    0.9991    0.9981    284315
           1     0.9991    0.9970    0.9981    284315

    accuracy                         0.9981    568630
   macro avg     0.9981    0.9981    0.9981    568630
weighted avg     0.9981    0.9981    0.9981    568630

