# 🔍 Model Explainability with SHAP

## Objectives:
1. Load the trained model
2. Generate SHAP explanations
3. Visualize feature importance
4. Analyze individual predictions
5. Create summary plots

In [None]:
# Import libraries
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import joblib
import warnings
warnings.filterwarnings('ignore')

from data_preprocessing import prepare_for_modeling, get_feature_target_split
from model_training import split_data

# Initialize SHAP JavaScript for interactive plots
shap.initjs()

print("✓ Libraries imported successfully")

## 1. Load Model and Data

In [None]:
# Load the best model
model = joblib.load('../models/best_model.pkl')
feature_names = joblib.load('../models/feature_names.pkl')
metadata = joblib.load('../models/model_metadata.pkl')

print(f"Model: {metadata['model_name']}")
print(f"Accuracy: {metadata['accuracy']:.4f}")
print(f"ROC-AUC: {metadata['roc_auc']:.4f}")

In [None]:
# Load and prepare data
df = pd.read_csv('../data/spacex_cleaned.csv')
df_model = prepare_for_modeling(df)
X, y = get_feature_target_split(df_model)

# Split data
X_train, X_test, y_train, y_test = split_data(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

## 2. Create SHAP Explainer

In [None]:
# Create SHAP explainer
print("Creating SHAP explainer...")
explainer = shap.TreeExplainer(model)
print("✓ Explainer created")

In [None]:
# Calculate SHAP values for test set
print("Calculating SHAP values...")
shap_values = explainer.shap_values(X_test)
print("✓ SHAP values calculated")

## 3. SHAP Summary Plots

In [None]:
# Summary plot (bar)
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
plt.title('SHAP Feature Importance', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Summary plot (beeswarm)
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_test, show=False)
plt.title('SHAP Summary Plot - Feature Impact on Predictions', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 4. Individual Feature Analysis

In [None]:
# Top features
feature_importance = np.abs(shap_values).mean(axis=0)
top_features_idx = np.argsort(feature_importance)[::-1][:5]
top_features = X_test.columns[top_features_idx]

print("Top 5 Most Important Features:")
for i, feat in enumerate(top_features, 1):
    print(f"{i}. {feat}: {feature_importance[X_test.columns.get_loc(feat)]:.4f}")

In [None]:
# Dependence plots for top features
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.ravel()

for i, feat in enumerate(top_features):
    shap.dependence_plot(feat, shap_values, X_test, ax=axes[i], show=False)
    axes[i].set_title(f'Dependence Plot: {feat}')

# Hide the 6th subplot
axes[5].axis('off')

plt.tight_layout()
plt.show()

## 5. Individual Prediction Explanations

In [None]:
# Explain a successful prediction
success_idx = y_test[y_test == 1].index[0]
sample_idx = X_test.index.get_loc(success_idx)

print(f"Explaining successful launch prediction (Index: {success_idx})")
print(f"\nFeature values:")
print(X_test.iloc[sample_idx])
print(f"\nActual: Success")
print(f"Predicted: {'Success' if model.predict(X_test.iloc[[sample_idx]])[0] == 1 else 'Failure'}")
print(f"Probability: {model.predict_proba(X_test.iloc[[sample_idx]])[0][1]:.2%}")

In [None]:
# Force plot for successful prediction
shap.force_plot(explainer.expected_value, shap_values[sample_idx], X_test.iloc[sample_idx], matplotlib=True)
plt.title('SHAP Force Plot - Successful Launch')
plt.tight_layout()
plt.show()

In [None]:
# Explain a failed prediction (if any)
if (y_test == 0).sum() > 0:
    failure_idx = y_test[y_test == 0].index[0]
    sample_idx_fail = X_test.index.get_loc(failure_idx)
    
    print(f"Explaining failed launch prediction (Index: {failure_idx})")
    print(f"\nFeature values:")
    print(X_test.iloc[sample_idx_fail])
    print(f"\nActual: Failure")
    print(f"Predicted: {'Success' if model.predict(X_test.iloc[[sample_idx_fail]])[0] == 1 else 'Failure'}")
    print(f"Probability: {model.predict_proba(X_test.iloc[[sample_idx_fail]])[0][1]:.2%}")
    
    # Force plot
    shap.force_plot(explainer.expected_value, shap_values[sample_idx_fail], 
                    X_test.iloc[sample_idx_fail], matplotlib=True)
    plt.title('SHAP Force Plot - Failed Launch')
    plt.tight_layout()
    plt.show()

## 6. Waterfall Plots

In [None]:
# Waterfall plot for a single prediction
shap.waterfall_plot(shap.Explanation(values=shap_values[sample_idx], 
                                      base_values=explainer.expected_value,
                                      data=X_test.iloc[sample_idx],
                                      feature_names=X_test.columns.tolist()))

## 7. Save SHAP Values

In [None]:
# Save SHAP values for future use
shap_data = {
    'shap_values': shap_values,
    'expected_value': explainer.expected_value,
    'feature_names': X_test.columns.tolist()
}

joblib.dump(shap_data, '../models/shap_values.pkl')
print("✓ SHAP values saved to ../models/shap_values.pkl")

## 8. Key Insights

### From SHAP Analysis:
- Most important features identified
- Feature interactions visualized
- Individual predictions explained
- Model behavior is interpretable and transparent