In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
import sys
from preprocessing import prepare_data

from sklearn.metrics import (
    f1_score,
    precision_score,
    recall_score,
    accuracy_score,
    confusion_matrix,
    classification_report,
)

In [2]:
# ====================================================================
# 1. DEFINE FILE PATHS
# ====================================================================
TEST_DATA_PATH = "fraudTest.csv"
MODEL_PATH = "models/xgb_champion.json"
ENCODERS_PATH = "models/encoders.joblib"

print("Starting model evaluation script...")
print(f"  Loading test data from: {TEST_DATA_PATH}")
print(f"  Loading model from: {MODEL_PATH}")
print(f"  Loading encoders from: {ENCODERS_PATH}")
print("=" * 60)

Starting model evaluation script...
  Loading test data from: fraudTest.csv
  Loading model from: models/xgb_champion.json
  Loading encoders from: models/encoders.joblib


In [3]:
# ====================================================================
# 2. LOAD ARTIFACTS
# ====================================================================
try:
    # Load the XGBoost model
    model = xgb.XGBClassifier()
    model.load_model(MODEL_PATH)
    print("âœ… Champion model loaded successfully.")

    # Load the encoders
    encoders = joblib.load(ENCODERS_PATH)
    print("âœ… Encoders loaded successfully.")

    # Load test data
    test_df = pd.read_csv(TEST_DATA_PATH)
    print(f"âœ… Test data loaded ({len(test_df)} records).")

except FileNotFoundError as e:
    print(f"\nðŸš¨ ERROR: File not found.")
    print(f"  Details: {e}")
    print("  Please ensure 'save_model.py' ran successfully and all files are in the correct paths.")
    sys.exit() # Stop execution if files are missing
except Exception as e:
    print(f"\nðŸš¨ ERROR loading artifacts: {e}")
    sys.exit()

âœ… Champion model loaded successfully.
âœ… Encoders loaded successfully.


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


âœ… Test data loaded (555719 records).


In [4]:
# ====================================================================
# 3. PREPROCESS TEST DATA
# ====================================================================
print("\nRunning preprocessing on test data...")

# We MUST use the *exact* same preprocessing steps as in training
# We use fit=False and pass the loaded encoders
out_test = prepare_data(
    test_df,
    mode="tree",       # Must match training config
    training=False,    # We are evaluating, not training
    fit=False,         # We are NOT fitting, only transforming
    encoders=encoders, # Pass the loaded encoders
    scalers={},        # Not used in 'tree' mode
)

df_test = out_test["df"]
X_test = df_test.drop("is_fraud", axis=1)
y_test = df_test["is_fraud"]

# Clean inf/-inf values just in case (good practice)
X_test = X_test.replace([np.inf, -np.inf], np.nan).fillna(0).clip(-1e6, 1e6)

print(f"  Test data preprocessed. Final shape: {X_test.shape}")


Running preprocessing on test data...
  Test data preprocessed. Final shape: (555719, 13)


In [5]:
# ====================================================================
# 4. RUN PREDICTIONS & EVALUATE
# ====================================================================
print("\nRunning predictions on test set...")
y_pred = model.predict(X_test)
print("  Predictions complete.")

# --- Calculate Metrics ---
f1 = f1_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=["Not Fraud (0)", "Fraud (1)"])

# --- Display Results ---
print("\n" + "=" * 60)
print("          FINAL MODEL PERFORMANCE (TEST SET)")
print("=" * 60)

print(f"  F1-Score:  {f1:.5f}")
print(f"  Precision: {prec:.5f}")
print(f"  Recall:    {rec:.5f}")
print(f"  Accuracy:  {acc:.5f}")

print("\n" + "-" * 60)
print("  CONFUSION MATRIX")
print("-" * 60)
print(f"          [ Predicted: 0 ] [ Predicted: 1 ]")
print(f" [ Actual: 0 ]  {cm[0][0]:<12}   {cm[0][1]:<12} (FP)")
print(f" [ Actual: 1 ]  {cm[1][0]:<12} (FN)   {cm[1][1]:<12} (TP)")
print("-" * 60)
print(f"\n  True Positives (Frauds caught): {cm[1][1]}")
print(f"  False Positives (Alerts):     {cm[0][1]}")
print(f"  False Negatives (Frauds missed): {cm[1][0]}")


print("\n" + "-" * 60)
print("  CLASSIFICATION REPORT")
print("-" * 60)
print(report)
print("=" * 60)


Running predictions on test set...
  Predictions complete.

          FINAL MODEL PERFORMANCE (TEST SET)
  F1-Score:  0.85402
  Precision: 0.91344
  Recall:    0.80186
  Accuracy:  0.99894

------------------------------------------------------------
  CONFUSION MATRIX
------------------------------------------------------------
          [ Predicted: 0 ] [ Predicted: 1 ]
 [ Actual: 0 ]  553411         163          (FP)
 [ Actual: 1 ]  425          (FN)   1720         (TP)
------------------------------------------------------------

  True Positives (Frauds caught): 1720
  False Positives (Alerts):     163
  False Negatives (Frauds missed): 425

------------------------------------------------------------
  CLASSIFICATION REPORT
------------------------------------------------------------
               precision    recall  f1-score   support

Not Fraud (0)       1.00      1.00      1.00    553574
    Fraud (1)       0.91      0.80      0.85      2145

     accuracy                  

In [9]:
df_test

Unnamed: 0,merchant,category,amt,gender,state,city_pop,is_fraud,hour,day,month,weekday,is_weekend,age,distance_from_home
0,319,10,2.86,1,40,333497,0,12,21,6,6,1,52,24.561462
1,591,10,29.84,0,44,302,0,12,21,6,6,1,30,104.925092
2,611,5,41.28,0,34,34496,0,12,21,6,6,1,50,59.080078
3,222,9,60.05,1,9,54767,0,12,21,6,6,1,33,27.698567
4,292,13,3.19,1,22,1126,0,12,21,6,6,1,65,104.335106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555714,507,5,43.77,1,24,519,0,23,31,12,3,0,54,77.026148
555715,264,7,111.84,1,43,28739,0,23,31,12,3,0,21,100.074420
555716,496,7,86.88,0,47,3684,0,23,31,12,3,0,39,80.759302
555717,75,13,7.99,1,13,129,0,23,31,12,3,0,55,52.933240


In [12]:
print(encoders.keys())

dict_keys(['merchant_enc', 'state_enc', 'gender_enc', 'category_enc'])


In [16]:
# print(f"Categories: {list(encoders['merchant_enc'].categories_[0])}")
print(f"Merchants: {list(encoders['state_enc'].categories_[0])}")
print(f"Genders: {list(encoders['gender_enc'].classes_)}")
print(f"States: {list(encoders['category_enc'].categories_[0])}")

Merchants: ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
Genders: ['F', 'M']
States: ['entertainment', 'food_dining', 'gas_transport', 'grocery_net', 'grocery_pos', 'health_fitness', 'home', 'kids_pets', 'misc_net', 'misc_pos', 'personal_care', 'shopping_net', 'shopping_pos', 'travel']
