In [None]:
# main.ipynb

# Purpose: End-to-end execution overview for the entire project
# Includes ETL, feature engineering, model training, and evaluation summary

# Import necessary scripts
from scripts.etl_loader import run_etl
from scripts.feature_engineering import load_and_engineer
from scripts.model_utils import load_data, balance_data, train_models, evaluate_model
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# Step 1: Run the ETL pipeline to merge and store data
print("Running ETL pipeline...")
run_etl()

# Step 2: Run feature engineering to create model-ready file
print("Running feature engineering...")
df_model_ready = load_and_engineer()

# Step 3: Load and prepare features and labels
X, y = load_data(file_path='data/model_ready.csv', label_col='PotentialFraud')

# Step 4: Balance dataset using SMOTE
X_res, y_res = balance_data(X, y)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

# Step 6: Train models
print("Training models...")
models = train_models(X_train, y_train)

# Step 7: Evaluate all models and store results
summary_results = {}
for name, model in models.items():
    print(f"\nEvaluating {name}...")
    metrics = evaluate_model(model, X_test, y_test, model_name=name)
    summary_results[name] = metrics

# Step 8: Create and save evaluation summary
summary_df = pd.DataFrame(summary_results).T
summary_df = summary_df[['accuracy', 'precision', 'recall', 'f1', 'auc', 'specificity']]
print("\nFinal Model Comparison:")
display(summary_df)

# Step 9: Save results and plot
summary_df.to_csv("outputs/final_model_summary.csv", index=True)

summary_df[['accuracy', 'precision', 'recall', 'f1']].plot(kind='bar', figsize=(10, 6))
plt.title("Final Model Comparison")
plt.ylabel("Score")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("outputs/final_model_comparison.png")
plt.show()

print("End-to-end pipeline completed successfully.")
