In [1]:
# ==============================================
# FraudLens — Batch Inference (score new CSVs)
# ==============================================
# Usage example (PowerShell):
# python fraudlens_batch_infer.py ^
#   --model "C:\Users\sagni\Downloads\FraudLens\fraudlens_model.pkl" ^
#   --csv   "C:\path\to\new_transactions.csv" ^
#   --outdir "C:\Users\sagni\Downloads\FraudLens"
# ==============================================

import re
import json
import argparse
from pathlib import Path
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report, confusion_matrix

LABEL_PATTERNS = [
    r"\bclass\b", r"\blabel\b", r"\btarget\b",
    r"\bis[_\-\s]*fraud\b", r"\bfraud\b", r"^y$"
]

def find_label_col(cols):
    norm = {c: re.sub(r"[^a-z0-9]+"," ",str(c).lower()).strip() for c in cols}
    for c, nc in norm.items():
        for p in LABEL_PATTERNS:
            if re.search(p, nc):
                return c
    return None

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--model", required=True, help="Path to fraudlens_model.pkl")
    ap.add_argument("--csv", required=True, help="CSV to score")
    ap.add_argument("--outdir", required=True, help="Output directory")
    ap.add_argument("--threshold", type=float, default=0.5, help="Decision threshold (default 0.5)")
    args = ap.parse_args()

    model_path = Path(args.model)
    csv_path   = Path(args.csv)
    out_dir    = Path(args.outdir)
    out_dir.mkdir(parents=True, exist_ok=True)

    print(f"[INFO] Loading model: {model_path}")
    pipe = joblib.load(model_path)

    print(f"[INFO] Reading: {csv_path}")
    df = pd.read_csv(csv_path, low_memory=False)
    print("[INFO] Shape:", df.shape)

    # Auto-detect label if present (optional)
    label_col = find_label_col(df.columns)
    y_true = None
    if label_col is not None:
        y_raw = df[label_col]
        if pd.api.types.is_numeric_dtype(y_raw):
            y_true = (y_raw.astype(float) > 0).astype(int).values
        else:
            lower = y_raw.astype(str).str.lower().str.strip()
            pos = {"1","true","t","yes","y","fraud"}
            y_true = lower.apply(lambda v: 1 if v in pos else 0).astype(int).values

    # Use only numeric features (ignore label if numeric)
    num_df = df.select_dtypes(include=[np.number]).copy()
    if label_col in (num_df.columns if label_col else []):
        X = num_df.drop(columns=[label_col])
    else:
        X = num_df
    X = X.fillna(0.0).astype(float)

    # Score
    print("[INFO] Scoring...")
    probs = pipe.predict_proba(X.values)[:, 1]
    preds = (probs >= args.threshold).astype(int)

    # Build output frame
    out = pd.DataFrame({
        "row_id": np.arange(len(probs)),
        "prob_fraud": probs,
        "y_pred": preds
    })
    if label_col is not None:
        out["y_true"] = y_true

    scored_csv = out_dir / "fraudlens_scored.csv"
    out.to_csv(scored_csv, index=False)
    print(f"[SAVED] {scored_csv}")

    # Metrics if labels available
    if y_true is not None:
        roc_auc = roc_auc_score(y_true, probs)
        pr_auc  = average_precision_score(y_true, probs)
        cm      = confusion_matrix(y_true, preds, labels=[0,1])
        report  = classification_report(y_true, preds, digits=4)

        metrics = {
            "threshold": args.threshold,
            "roc_auc": float(roc_auc),
            "pr_auc": float(pr_auc),
            "confusion_matrix": {
                "tn": int(cm[0,0]), "fp": int(cm[0,1]),
                "fn": int(cm[1,0]), "tp": int(cm[1,1])
            },
            "classification_report": report
        }
        metrics_json = out_dir / "fraudlens_scored_metrics.json"
        with open(metrics_json, "w", encoding="utf-8") as f:
            json.dump(metrics, f, indent=2)
        print(f"[SAVED] {metrics_json}")
    else:
        print("[INFO] No label column detected — metrics not computed.")

if __name__ == "__main__":
    main()


usage: ipykernel_launcher.py [-h] --model MODEL --csv CSV --outdir OUTDIR [--threshold THRESHOLD]
ipykernel_launcher.py: error: the following arguments are required: --model, --csv, --outdir


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
