## Task 3 - Model Explainability with SHAP
### Complete Implementation for Model Interpretation

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, roc_auc_score
import shap
import matplotlib.pyplot as plt

# 1. Load and Prepare Data (Example with synthetic data)
def load_data():
    """Create synthetic data for demonstration"""
    # E-commerce data
    ecom_data = pd.DataFrame({
        'purchase_value': np.random.exponential(100, 1000),
        'time_since_signup': np.random.uniform(0, 168, 1000),
        'country_risk': np.random.choice([0.1, 0.5, 1.0, 5.0], 1000),
        'device_age': np.random.randint(0, 365, 1000),
        'class': np.random.choice([0, 1], 1000, p=[0.98, 0.02])
    })
    
    # Credit card data
    credit_data = pd.DataFrame({
        'V1': np.random.normal(0, 1, 1000),
        'V2': np.random.normal(0, 1, 1000),
        'V3': np.random.normal(0, 1, 1000),
        'Amount': np.random.exponential(50, 1000),
        'Time_Delta': np.random.exponential(300, 1000),
        'Class': np.random.choice([0, 1], 1000, p=[0.995, 0.005])
    })
    
    return ecom_data, credit_data

# 2. Train Models Function
def train_models(X_train, y_train, X_test, y_test, dataset_name):
    """Train and evaluate XGBoost model with SMOTE"""
    model = make_pipeline(
        SMOTE(random_state=42),
        XGBClassifier(
            scale_pos_weight=sum(y_train==0)/sum(y_train==1),
            eval_metric='logloss',
            random_state=42
        )
    )
    
    model.fit(X_train, y_train)
    
    # Evaluate
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]
    
    print(f"\n{dataset_name} Model Performance:")
    print(classification_report(y_test, y_pred))
    print(f"ROC AUC: {roc_auc_score(y_test, y_proba):.4f}")
    
    return model

# 3. SHAP Explanation Function
def explain_model(model, X_train, X_test, dataset_name):
    """Generate SHAP explanations with error handling"""
    try:
        # Create explainer
        explainer = shap.TreeExplainer(model.named_steps['xgbclassifier'])
        shap_values = explainer.shap_values(X_train)
        
        # Global feature importance
        plt.figure(figsize=(10, 6))
        shap.summary_plot(shap_values, X_train, show=False, plot_type="bar")
        plt.title(f"{dataset_name} - Global Feature Importance")
        plt.tight_layout()
        plt.savefig(f"{dataset_name}_shap_summary.png")
        plt.close()
        
        # Detailed summary plot
        plt.figure(figsize=(10, 6))
        shap.summary_plot(shap_values, X_train, show=False)
        plt.title(f"{dataset_name} - Feature Impact Direction")
        plt.tight_layout()
        plt.savefig(f"{dataset_name}_shap_detailed.png")
        plt.close()
        
        # Sample force plot
        sample_idx = np.random.choice(X_test.index, 1)[0]
        plt.figure()
        shap.force_plot(
            explainer.expected_value,
            shap_values[sample_idx,:],
            X_test.loc[sample_idx,:],
            matplotlib=True,
            show=False
        )
        plt.title(f"{dataset_name} - Local Explanation")
        plt.tight_layout()
        plt.savefig(f"{dataset_name}_force_plot.png")
        plt.close()
        
        return {
            "status": "success",
            "plots": [
                f"{dataset_name}_shap_summary.png",
                f"{dataset_name}_shap_detailed.png",
                f"{dataset_name}_force_plot.png"
            ]
        }
        
    except Exception as e:
        print(f"SHAP error: {str(e)}")
        return {
            "status": "error",
            "message": str(e)
        }

# 4. Main Execution
def main():
    # Load data
    ecom_data, credit_data = load_data()
    
    # Prepare e-commerce data
    X_ecom = ecom_data.drop('class', axis=1)
    y_ecom = ecom_data['class']
    X_train_ecom, X_test_ecom, y_train_ecom, y_test_ecom = train_test_split(
        X_ecom, y_ecom, test_size=0.2, random_state=42, stratify=y_ecom)
    
    # Prepare credit card data
    X_credit = credit_data.drop('Class', axis=1)
    y_credit = credit_data['Class']
    X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(
        X_credit, y_credit, test_size=0.2, random_state=42, stratify=y_credit)
    
    # Train models
    print("Training e-commerce model...")
    ecom_model = train_models(X_train_ecom, y_train_ecom, X_test_ecom, y_test_ecom, "E-commerce")
    
    print("\nTraining credit card model...")
    credit_model = train_models(X_train_credit, y_train_credit, X_test_credit, y_test_credit, "Credit Card")
    
    # Generate explanations
    print("\nExplaining e-commerce model...")
    ecom_explanations = explain_model(ecom_model, X_train_ecom, X_test_ecom, "ecommerce")
    
    print("\nExplaining credit card model...")
    credit_explanations = explain_model(credit_model, X_train_credit, X_test_credit, "creditcard")
    
    print("\nProcess completed successfully!")
    print("Saved explanation plots in current directory")

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


Training e-commerce model...

E-commerce Model Performance:
              precision    recall  f1-score   support

           0       0.97      0.96      0.96       194
           1       0.00      0.00      0.00         6

    accuracy                           0.93       200
   macro avg       0.48      0.48      0.48       200
weighted avg       0.94      0.93      0.93       200

ROC AUC: 0.5498

Training credit card model...

Credit Card Model Performance:
              precision    recall  f1-score   support

           0       0.99      0.92      0.96       198
           1       0.00      0.00      0.00         2

    accuracy                           0.92       200
   macro avg       0.49      0.46      0.48       200
weighted avg       0.98      0.92      0.95       200

ROC AUC: 0.2449

Explaining e-commerce model...
SHAP error: index 864 is out of bounds for axis 0 with size 800

Explaining credit card model...

Process completed successfully!
Saved explanation plots in cu

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>