In [None]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, 
    precision_recall_curve, 
    roc_curve,
    average_precision_score,
    roc_auc_score
)
from config.settings import DATA_PATHS

# Load data
df = pd.read_parquet(DATA_PATHS['processed'])
X = df.drop(columns=['is_fraud', 'transaction_id', 'transaction_date'])
y = df['is_fraud']
X = pd.get_dummies(X, columns=['merchant_category', 'country', 'gender'])

# Load models
models = {
    'Random Forest': joblib.load('models/baseline_rf.pkl'),
    'XGBoost': joblib.load('models/xgboost.pkl'),
    'LightGBM': joblib.load('models/lightgbm.pkl'),
    'Isolation Forest': joblib.load('models/isolationforest.pkl'),
    'Tuned XGBoost': joblib.load('models/xgboost_tuned.pkl')
}

# Evaluate all models
results = []
for name, model in models.items():
    if name in ['Isolation Forest', 'OneClassSVM', 'LocalOutlierFactor']:
        # Anomaly detection models need special handling
        if name == 'Isolation Forest':
            y_scores = -model.score_samples(X)
        elif name == 'OneClassSVM':
            y_scores = -model.decision_function(X)
        else:
            y_scores = -model.decision_function(X)
        auc = roc_auc_score(y, y_scores)
        ap = average_precision_score(y, y_scores)
    else:
        y_scores = model.predict_proba(X)[:, 1]
        auc = roc_auc_score(y, y_scores)
        ap = average_precision_score(y, y_scores)
    
    results.append({
        'Model': name,
        'ROC AUC': auc,
        'PR AUC': ap
    })

results_df = pd.DataFrame(results).sort_values('PR AUC', ascending=False)
print("Model Performance Comparison:")
print(results_df)

# Plot performance comparison
plt.figure(figsize=(10, 6))
sns.barplot(x='PR AUC', y='Model', data=results_df, palette='viridis')
plt.title('Model Comparison by PR AUC Score')
plt.xlim(0, 1)
plt.show()

# Detailed evaluation of best model
best_model_name = results_df.iloc[0]['Model']
best_model = models[best_model_name]

if 'Forest' in best_model_name or 'SVM' in best_model_name:
    y_scores = -best_model.score_samples(X) if 'Isolation' in best_model_name else -best_model.decision_function(X)
else:
    y_scores = best_model.predict_proba(X)[:, 1]

# ROC Curve
fpr, tpr, _ = roc_curve(y, y_scores)
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(fpr, tpr, label=f'AUC = {roc_auc_score(y, y_scores):.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()

# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y, y_scores)
plt.subplot(1, 2, 2)
plt.plot(recall, precision, label=f'AP = {average_precision_score(y, y_scores):.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.tight_layout()
plt.show()

# Feature Importance (for tree-based models)
if hasattr(best_model, 'feature_importances_'):
    feature_imp = pd.DataFrame({
        'Feature': X.columns,
        'Importance': best_model.feature_importances_
    }).sort_values('Importance', ascending=False).head(20)
    
    plt.figure(figsize=(10, 8))
    sns.barplot(x='Importance', y='Feature', data=feature_imp)
    plt.title('Top 20 Important Features')
    plt.show()