# Credit Card Fraud Detection - Ensemble Methods

This notebook implements ensemble learning techniques to improve fraud detection performance:
- Voting Classifier (Hard and Soft Voting)
- Stacking Classifier
- Model combination strategies
- Performance comparison with individual models

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import warnings
from pathlib import Path
import time

# Machine Learning Libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier, VotingClassifier, 
    StackingClassifier, AdaBoostClassifier,
    GradientBoostingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import (
    classification_report, confusion_matrix, 
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, precision_recall_curve
)

# Settings
warnings.filterwarnings('ignore')
plt.style.use('default')
np.random.seed(42)

print("Libraries imported successfully")

## Load Preprocessed Data and Trained Models

In [None]:
# Load preprocessed data
processed_dir = Path('../data/processed')
models_dir = Path('../models')

def load_data():
    """Load preprocessed datasets"""
    datasets = {}
    files_to_load = [
        'X_train_smote', 'X_test', 'y_train_smote', 'y_test'
    ]
    
    for filename in files_to_load:
        try:
            with open(processed_dir / f"{filename}.pkl", 'rb') as f:
                datasets[filename] = pickle.load(f)
            print(f"Loaded {filename}: {datasets[filename].shape}")
        except FileNotFoundError:
            print(f"Warning: {filename}.pkl not found")
    
    return datasets

def load_trained_models():
    """Load pre-trained individual models"""
    models = {}
    model_files = ['best_knn', 'best_svm', 'best_dt', 'best_rf']
    
    for model_name in model_files:
        try:
            with open(models_dir / f"{model_name}.pkl", 'rb') as f:
                models[model_name] = pickle.load(f)
            print(f"Loaded {model_name}")
        except FileNotFoundError:
            print(f"Warning: {model_name}.pkl not found")
    
    return models

# Load data
data = load_data()
trained_models = load_trained_models()

if data:
    X_train_smote = data['X_train_smote']
    X_test = data['X_test']
    y_train_smote = data['y_train_smote']
    y_test = data['y_test']
    
    print(f"\nData loaded successfully!")
    print(f"Training samples: {X_train_smote.shape[0]}")
    print(f"Test samples: {X_test.shape[0]}")
    print(f"Features: {X_train_smote.shape[1]}")
else:
    print("Creating sample data for demonstration...")
    n_samples = 1000
    n_features = 20
    
    X_train_smote = pd.DataFrame(np.random.randn(n_samples, n_features))
    X_test = pd.DataFrame(np.random.randn(200, n_features))
    y_train_smote = pd.Series(np.random.choice([0, 1], n_samples))
    y_test = pd.Series(np.random.choice([0, 1], 200, p=[0.8, 0.2]))
    
    trained_models = {}
    print(f"Sample data created for demonstration")

## Model Evaluation Framework

In [None]:
def evaluate_ensemble(model, X_train, y_train, X_test, y_test, model_name):
    """Comprehensive ensemble model evaluation"""
    
    print(f"\n{'='*60}")
    print(f"EVALUATING {model_name.upper()}")
    print(f"{'='*60}")
    
    # Record training time
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    
    # Make predictions
    start_time = time.time()
    y_pred = model.predict(X_test)
    prediction_time = time.time() - start_time
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # ROC AUC
    try:
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        roc_auc = roc_auc_score(y_test, y_pred_proba)
    except:
        roc_auc = "N/A"
        y_pred_proba = None
    
    # Cross-validation score
    try:
        cv_scores = cross_val_score(model, X_train, y_train, cv=3, scoring='f1')
        cv_mean = cv_scores.mean()
        cv_std = cv_scores.std()
    except:
        cv_mean = cv_std = "N/A"
    
    # Print results
    print(f"Training Time: {training_time:.3f} seconds")
    print(f"Prediction Time: {prediction_time:.3f} seconds")
    print(f"\nPerformance Metrics:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"ROC AUC:   {roc_auc if roc_auc != 'N/A' else 'N/A'}")
    
    if cv_mean != "N/A":
        print(f"CV F1 Score: {cv_mean:.4f} (+/- {cv_std * 2:.4f})")
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print(f"\nConfusion Matrix:")
    print(cm)
    
    # Classification Report
    print(f"\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    return {
        'model_name': model_name,
        'model': model,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'cv_mean': cv_mean,
        'cv_std': cv_std,
        'training_time': training_time,
        'prediction_time': prediction_time,
        'confusion_matrix': cm,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }

def plot_confusion_matrix(cm, model_name):
    """Plot confusion matrix"""
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Legitimate', 'Fraud'],
                yticklabels=['Legitimate', 'Fraud'])
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

print("Ensemble evaluation framework ready")

## Base Models for Ensemble

In [None]:
# Create base models for ensemble
if trained_models:
    # Use pre-trained optimized models
    base_models = [
        ('knn', trained_models.get('best_knn', KNeighborsClassifier(n_neighbors=5))),
        ('svm', trained_models.get('best_svm', SVC(probability=True, random_state=42))),
        ('dt', trained_models.get('best_dt', DecisionTreeClassifier(random_state=42))),
        ('rf', trained_models.get('best_rf', RandomForestClassifier(random_state=42)))
    ]
    print("Using pre-trained optimized models")
else:
    # Create new base models with good default parameters
    base_models = [
        ('knn', KNeighborsClassifier(n_neighbors=5, weights='distance')),
        ('svm', SVC(kernel='rbf', C=1.0, probability=True, random_state=42)),
        ('dt', DecisionTreeClassifier(max_depth=10, min_samples_split=10, random_state=42)),
        ('rf', RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42, n_jobs=-1))
    ]
    print("Using new base models with default parameters")

print(f"Base models for ensemble: {[name for name, _ in base_models]}")

## 1. Voting Classifier

In [None]:
# Hard Voting Classifier
print("Training Hard Voting Classifier...")

hard_voting = VotingClassifier(
    estimators=base_models,
    voting='hard',
    n_jobs=-1
)

hard_voting_results = evaluate_ensemble(
    hard_voting, X_train_smote, y_train_smote, X_test, y_test, "Hard Voting Classifier"
)

plot_confusion_matrix(hard_voting_results['confusion_matrix'], "Hard Voting Classifier")

In [None]:
# Soft Voting Classifier
print("\nTraining Soft Voting Classifier...")

soft_voting = VotingClassifier(
    estimators=base_models,
    voting='soft',
    n_jobs=-1
)

soft_voting_results = evaluate_ensemble(
    soft_voting, X_train_smote, y_train_smote, X_test, y_test, "Soft Voting Classifier"
)

plot_confusion_matrix(soft_voting_results['confusion_matrix'], "Soft Voting Classifier")

## 2. Stacking Classifier

In [None]:
# Stacking Classifier with Logistic Regression as meta-learner
print("\nTraining Stacking Classifier...")

stacking = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(random_state=42),
    cv=3,
    n_jobs=-1
)

stacking_results = evaluate_ensemble(
    stacking, X_train_smote, y_train_smote, X_test, y_test, "Stacking Classifier (LR)"
)

plot_confusion_matrix(stacking_results['confusion_matrix'], "Stacking Classifier (LR)")

In [None]:
# Stacking Classifier with Random Forest as meta-learner
print("\nTraining Stacking Classifier with RF Meta-learner...")

stacking_rf = StackingClassifier(
    estimators=base_models,
    final_estimator=RandomForestClassifier(n_estimators=50, random_state=42),
    cv=3,
    n_jobs=-1
)

stacking_rf_results = evaluate_ensemble(
    stacking_rf, X_train_smote, y_train_smote, X_test, y_test, "Stacking Classifier (RF)"
)

plot_confusion_matrix(stacking_rf_results['confusion_matrix'], "Stacking Classifier (RF)")

## 3. Additional Ensemble Methods

In [None]:
# AdaBoost Classifier
print("\nTraining AdaBoost Classifier...")

ada_boost = AdaBoostClassifier(
    base_estimator=DecisionTreeClassifier(max_depth=3),
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)

ada_boost_results = evaluate_ensemble(
    ada_boost, X_train_smote, y_train_smote, X_test, y_test, "AdaBoost Classifier"
)

plot_confusion_matrix(ada_boost_results['confusion_matrix'], "AdaBoost Classifier")

In [None]:
# Gradient Boosting Classifier
print("\nTraining Gradient Boosting Classifier...")

gb_classifier = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)

gb_results = evaluate_ensemble(
    gb_classifier, X_train_smote, y_train_smote, X_test, y_test, "Gradient Boosting"
)

plot_confusion_matrix(gb_results['confusion_matrix'], "Gradient Boosting")

## 4. Weighted Voting Classifier

In [None]:
# Weighted Voting based on individual model performance
print("\nTraining Weighted Voting Classifier...")

# Assign weights based on expected performance (higher weight for better models)
# In practice, these would be determined from validation performance
model_weights = [1.0, 1.2, 1.1, 1.3]  # knn, svm, dt, rf

weighted_voting = VotingClassifier(
    estimators=base_models,
    voting='soft',
    weights=model_weights,
    n_jobs=-1
)

weighted_voting_results = evaluate_ensemble(
    weighted_voting, X_train_smote, y_train_smote, X_test, y_test, "Weighted Voting Classifier"
)

plot_confusion_matrix(weighted_voting_results['confusion_matrix'], "Weighted Voting Classifier")

## Ensemble Methods Comparison

In [None]:
# Compile all ensemble results
ensemble_results = [
    hard_voting_results,
    soft_voting_results,
    stacking_results,
    stacking_rf_results,
    ada_boost_results,
    gb_results,
    weighted_voting_results
]

# Create comparison DataFrame
ensemble_comparison_data = []
for result in ensemble_results:
    ensemble_comparison_data.append({
        'Model': result['model_name'],
        'Accuracy': result['accuracy'],
        'Precision': result['precision'],
        'Recall': result['recall'],
        'F1 Score': result['f1_score'],
        'ROC AUC': result['roc_auc'] if result['roc_auc'] != "N/A" else np.nan,
        'CV F1 Mean': result['cv_mean'] if result['cv_mean'] != "N/A" else np.nan,
        'Training Time (s)': result['training_time'],
        'Prediction Time (s)': result['prediction_time']
    })

ensemble_comparison_df = pd.DataFrame(ensemble_comparison_data)

print("\n" + "="*90)
print("ENSEMBLE METHODS COMPARISON")
print("="*90)
print(ensemble_comparison_df.to_string(index=False, float_format='%.4f'))

# Find best ensemble model
best_ensemble_idx = ensemble_comparison_df['F1 Score'].idxmax()
best_ensemble_name = ensemble_comparison_df.loc[best_ensemble_idx, 'Model']
best_ensemble_f1 = ensemble_comparison_df.loc[best_ensemble_idx, 'F1 Score']

print(f"\nBest Ensemble Model: {best_ensemble_name} (F1 Score: {best_ensemble_f1:.4f})")

In [None]:
# Visualization of ensemble comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# F1 Score comparison
sns.barplot(data=ensemble_comparison_df, x='F1 Score', y='Model', ax=axes[0,0])
axes[0,0].set_title('Ensemble Models F1 Score Comparison')

# Precision vs Recall
axes[0,1].scatter(ensemble_comparison_df['Precision'], ensemble_comparison_df['Recall'], s=100)
for i, model in enumerate(ensemble_comparison_df['Model']):
    axes[0,1].annotate(model, 
                      (ensemble_comparison_df['Precision'].iloc[i], ensemble_comparison_df['Recall'].iloc[i]),
                      xytext=(5, 5), textcoords='offset points', fontsize=8)
axes[0,1].set_xlabel('Precision')
axes[0,1].set_ylabel('Recall')
axes[0,1].set_title('Precision vs Recall - Ensemble Methods')
axes[0,1].grid(True, alpha=0.3)

# ROC AUC comparison
roc_data = ensemble_comparison_df.dropna(subset=['ROC AUC'])
if not roc_data.empty:
    sns.barplot(data=roc_data, x='ROC AUC', y='Model', ax=axes[1,0])
    axes[1,0].set_title('ROC AUC Comparison')

# Training time comparison
sns.barplot(data=ensemble_comparison_df, x='Training Time (s)', y='Model', ax=axes[1,1])
axes[1,1].set_title('Training Time Comparison')
axes[1,1].set_xscale('log')

plt.tight_layout()
plt.show()

## ROC and Precision-Recall Curves for Ensembles

In [None]:
# Plot ROC and PR curves for ensemble methods
prob_ensembles = [result for result in ensemble_results if result['y_pred_proba'] is not None]

if prob_ensembles:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # ROC Curves
    for result in prob_ensembles:
        fpr, tpr, _ = roc_curve(y_test, result['y_pred_proba'])
        auc_score = roc_auc_score(y_test, result['y_pred_proba'])
        ax1.plot(fpr, tpr, label=f"{result['model_name']} (AUC: {auc_score:.3f})")
    
    ax1.plot([0, 1], [0, 1], 'k--', label='Random')
    ax1.set_xlabel('False Positive Rate')
    ax1.set_ylabel('True Positive Rate')
    ax1.set_title('ROC Curves - Ensemble Methods')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # Precision-Recall Curves
    for result in prob_ensembles:
        precision_curve, recall_curve, _ = precision_recall_curve(y_test, result['y_pred_proba'])
        ax2.plot(recall_curve, precision_curve, label=result['model_name'])
    
    ax2.set_xlabel('Recall')
    ax2.set_ylabel('Precision')
    ax2.set_title('Precision-Recall Curves - Ensemble Methods')
    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## Compare with Individual Models

In [None]:
# Load individual models comparison if available
try:
    individual_comparison_df = pd.read_csv(models_dir / 'individual_models_comparison.csv')
    
    # Combine individual and ensemble results
    combined_comparison = pd.concat([individual_comparison_df, ensemble_comparison_df], ignore_index=True)
    
    print("\n" + "="*100)
    print("INDIVIDUAL vs ENSEMBLE MODELS COMPARISON")
    print("="*100)
    
    # Sort by F1 Score
    combined_comparison_sorted = combined_comparison.sort_values('F1 Score', ascending=False)
    print(combined_comparison_sorted.to_string(index=False, float_format='%.4f'))
    
    # Find overall best model
    overall_best_idx = combined_comparison_sorted.iloc[0]
    print(f"\nOverall Best Model: {overall_best_idx['Model']} (F1 Score: {overall_best_idx['F1 Score']:.4f})")
    
    # Visualization
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # F1 Score comparison - top 10
    top_10 = combined_comparison_sorted.head(10)
    sns.barplot(data=top_10, x='F1 Score', y='Model', ax=axes[0])
    axes[0].set_title('Top 10 Models - F1 Score Comparison')
    
    # Model type comparison (Individual vs Ensemble)
    individual_models = ['KNN', 'Optimized KNN', 'SVM', 'Optimized SVM', 
                        'Decision Tree', 'Optimized Decision Tree', 
                        'Random Forest', 'Optimized Random Forest']
    
    combined_comparison['Type'] = combined_comparison['Model'].apply(
        lambda x: 'Individual' if x in individual_models else 'Ensemble'
    )
    
    type_comparison = combined_comparison.groupby('Type')[['Accuracy', 'Precision', 'Recall', 'F1 Score']].mean()
    
    type_comparison.plot(kind='bar', ax=axes[1])
    axes[1].set_title('Individual vs Ensemble Models - Average Performance')
    axes[1].set_ylabel('Score')
    axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    axes[1].tick_params(axis='x', rotation=0)
    
    plt.tight_layout()
    plt.show()
    
except FileNotFoundError:
    print("Individual models comparison file not found. Showing ensemble results only.")
    combined_comparison = ensemble_comparison_df
    overall_best_idx = combined_comparison.loc[combined_comparison['F1 Score'].idxmax()]

## Save Best Ensemble Models

In [None]:
# Save ensemble models
ensemble_models = {
    'hard_voting': hard_voting_results['model'],
    'soft_voting': soft_voting_results['model'],
    'stacking_lr': stacking_results['model'],
    'stacking_rf': stacking_rf_results['model'],
    'ada_boost': ada_boost_results['model'],
    'gradient_boost': gb_results['model'],
    'weighted_voting': weighted_voting_results['model']
}

for model_name, model in ensemble_models.items():
    model_path = models_dir / f"{model_name}.pkl"
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"Saved {model_name} to {model_path}")

# Save ensemble comparison results
ensemble_comparison_df.to_csv(models_dir / 'ensemble_models_comparison.csv', index=False)
print(f"\nEnsemble comparison results saved to {models_dir / 'ensemble_models_comparison.csv'}")

# Save combined comparison if available
if 'combined_comparison' in locals():
    combined_comparison.to_csv(models_dir / 'all_models_comparison.csv', index=False)
    print(f"Combined comparison results saved to {models_dir / 'all_models_comparison.csv'}")

print("\nAll ensemble models saved successfully!")

## Ensemble Methods Summary

In [None]:
# Final ensemble summary
print("\n" + "="*80)
print("ENSEMBLE METHODS TRAINING SUMMARY")
print("="*80)

print(f"\n1. ENSEMBLE METHODS IMPLEMENTED:")
print(f"   ✓ Hard Voting Classifier")
print(f"   ✓ Soft Voting Classifier")
print(f"   ✓ Stacking Classifier (Logistic Regression)")
print(f"   ✓ Stacking Classifier (Random Forest)")
print(f"   ✓ AdaBoost Classifier")
print(f"   ✓ Gradient Boosting Classifier")
print(f"   ✓ Weighted Voting Classifier")

print(f"\n2. BEST ENSEMBLE MODEL:")
print(f"   Model: {best_ensemble_name}")
print(f"   F1 Score: {best_ensemble_f1:.4f}")
print(f"   Accuracy: {ensemble_comparison_df.loc[best_ensemble_idx, 'Accuracy']:.4f}")
print(f"   Precision: {ensemble_comparison_df.loc[best_ensemble_idx, 'Precision']:.4f}")
print(f"   Recall: {ensemble_comparison_df.loc[best_ensemble_idx, 'Recall']:.4f}")

if 'combined_comparison' in locals():
    print(f"\n3. OVERALL BEST MODEL (Individual + Ensemble):")
    print(f"   Model: {overall_best_idx['Model']}")
    print(f"   F1 Score: {overall_best_idx['F1 Score']:.4f}")
    print(f"   Type: {'Individual' if overall_best_idx['Model'] in individual_models else 'Ensemble'}")
    
    ensemble_improvement = (
        ensemble_comparison_df['F1 Score'].mean() - 
        individual_comparison_df['F1 Score'].mean()
    ) * 100
    print(f"\n4. ENSEMBLE IMPROVEMENT:")
    print(f"   Average F1 improvement: {ensemble_improvement:+.2f}%")

print(f"\n5. KEY INSIGHTS:")
best_precision_ens = ensemble_comparison_df.loc[ensemble_comparison_df['Precision'].idxmax()]
best_recall_ens = ensemble_comparison_df.loc[ensemble_comparison_df['Recall'].idxmax()]
fastest_ens = ensemble_comparison_df.loc[ensemble_comparison_df['Training Time (s)'].idxmin()]

print(f"   - Best Precision (Ensemble): {best_precision_ens['Model']} ({best_precision_ens['Precision']:.4f})")
print(f"   - Best Recall (Ensemble): {best_recall_ens['Model']} ({best_recall_ens['Recall']:.4f})")
print(f"   - Fastest Training: {fastest_ens['Model']} ({fastest_ens['Training Time (s)']:.3f}s)")

print(f"\n6. ENSEMBLE ADVANTAGES:")
print(f"   - Reduced overfitting through model diversity")
print(f"   - Improved generalization performance")
print(f"   - More robust predictions")
print(f"   - Better handling of model weaknesses")

print(f"\n7. NEXT STEPS:")
print(f"   - Final model evaluation and selection")
print(f"   - Business impact analysis")
print(f"   - Model deployment recommendations")
print(f"   - Performance monitoring setup")

print(f"\n" + "="*80)
print("ENSEMBLE METHODS COMPLETED - Ready for final evaluation")
print("="*80)