In [1]:
# main.py
import pandas as pd
from models.feature_engineering import preprocess
from models.data_utils import load_and_prepare_data
from models.baseline_models import train_and_save_baselines
from models.ensemble_fraud_detector import run_ensemble_model

def main():
    """
    Orchestrates the entire credit card fraud detection pipeline:
    1. Loads and preprocesses the data.
    2. Trains and saves baseline models (Logistic Regression, Random Forest).
    3. Loads the saved models, combines them into an ensemble, and evaluates its performance.
    """
    print("🚀 Starting Credit Card Fraud Detection Pipeline...")

    # --- Step 1: Load and Prepare Data ---
    print("\n📦 Loading and preparing data...")
    X_train, X_test, y_train, y_test = load_and_prepare_data(percent=1, test_size=0.2, seed=42)
    print("Data loaded and split into training and testing sets.")

    # --- Step 2: Train and Save Baseline Models ---
    print("\n🏋️ Training and saving baseline models...")
    train_and_save_baselines(X_train, X_test, y_train, y_test)
    print("Baseline models trained and saved.")

    # --- Step 3: Run Ensemble Model ---
    print("\n🤝 Running ensemble fraud detector...")
    run_ensemble_model(X_test, y_test)
    print("Ensemble model evaluated. Pipeline complete!")

if __name__ == "__main__":
    main()

🚀 Starting Credit Card Fraud Detection Pipeline...

📦 Loading and preparing data...
Data loaded and split into training and testing sets.

🏋️ Training and saving baseline models...

Evaluation results for Logistic Regression:
ROC AUC Score: 0.9722
Confusion Matrix:
[[55475  1389]
 [    8    90]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     56864
           1       0.06      0.92      0.11        98

    accuracy                           0.98     56962
   macro avg       0.53      0.95      0.55     56962
weighted avg       1.00      0.98      0.99     56962

Saved Logistic Regression model to C:\Users\segev\code_notebooks\fraud_ml_project\models\logistic_regression.joblib

Evaluation results for Random Forest:
ROC AUC Score: 0.9581
Confusion Matrix:
[[56861     3]
 [   24    74]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     5