# 08 - Model Interpretation

**Objective**: Explain model predictions for business insights

**Deliverables**:
- SHAP summary plots
- Coefficient odds ratios (LR)
- Business translations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import joblib
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import shap
shap.initjs()

plt.style.use('seaborn-v0_8-whitegrid')
print('Libraries loaded!')

In [None]:
# Paths
MODEL_PATH = Path('../data/06_models')
REPORTING_PATH = Path('../data/08_reporting')
REPORTING_PATH.mkdir(parents=True, exist_ok=True)

# Load models
lr_model = joblib.load(MODEL_PATH / 'logistic_regression.pkl')
xgb_model = joblib.load(MODEL_PATH / 'xgboost.pkl')
lgb_model = joblib.load(MODEL_PATH / 'lightgbm.pkl')
scaler = joblib.load(MODEL_PATH / 'scaler.pkl')

# Load data
test_df = pd.read_csv(MODEL_PATH / 'test_set.csv')

with open(MODEL_PATH / 'feature_list.json', 'r') as f:
    FEATURES = json.load(f)

TARGET = 'Churn'
X_test = test_df[FEATURES]
y_test = test_df[TARGET]

print(f"Features: {len(FEATURES)}")
print(f"Test samples: {len(X_test):,}")

## 1. Logistic Regression Coefficients

In [None]:
# Get coefficients and odds ratios
coef_df = pd.DataFrame({
    'Feature': FEATURES,
    'Coefficient': lr_model.coef_[0],
    'Odds_Ratio': np.exp(lr_model.coef_[0])
}).sort_values('Coefficient', key=abs, ascending=False)

print("üìä TOP 15 FEATURES BY COEFFICIENT MAGNITUDE:")
display(coef_df.head(15))

In [None]:
# Plot coefficients
top_n = 20
plot_df = coef_df.head(top_n).sort_values('Coefficient')

fig, ax = plt.subplots(figsize=(10, 8))
colors = ['#e74c3c' if x > 0 else '#2ecc71' for x in plot_df['Coefficient']]
ax.barh(plot_df['Feature'], plot_df['Coefficient'], color=colors, edgecolor='black')
ax.axvline(x=0, color='black', linewidth=0.5)
ax.set_xlabel('Coefficient (Log Odds)')
ax.set_title('Logistic Regression - Feature Coefficients', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig(REPORTING_PATH / 'lr_coefficients.png', dpi=150)
plt.show()

In [None]:
# Interpret top features
print("\nüìä BUSINESS INTERPRETATION (Odds Ratios):")
print("="*70)

for _, row in coef_df.head(10).iterrows():
    feat = row['Feature']
    odds = row['Odds_Ratio']
    
    if odds > 1:
        pct_change = (odds - 1) * 100
        print(f"‚Üë {feat}: +{pct_change:.1f}% churn odds per unit increase")
    else:
        pct_change = (1 - odds) * 100
        print(f"‚Üì {feat}: -{pct_change:.1f}% churn odds per unit increase")

## 2. SHAP Analysis - XGBoost

In [None]:
# Calculate SHAP values (sample for speed)
sample_size = min(1000, len(X_test))
X_sample = X_test.sample(n=sample_size, random_state=42)

print(f"Calculating SHAP values on {sample_size} samples...")

explainer = shap.TreeExplainer(xgb_model)
shap_values = explainer.shap_values(X_sample)

print("‚úÖ SHAP values calculated")

In [None]:
# SHAP Summary Plot
plt.figure(figsize=(12, 10))
shap.summary_plot(shap_values, X_sample, show=False, max_display=20)
plt.title('SHAP Feature Importance (XGBoost)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(REPORTING_PATH / 'shap_summary.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Saved: shap_summary.png")

In [None]:
# Mean absolute SHAP values
shap_importance = pd.DataFrame({
    'Feature': FEATURES,
    'Mean_SHAP': np.abs(shap_values).mean(axis=0)
}).sort_values('Mean_SHAP', ascending=False)

print("üìä TOP 15 FEATURES BY SHAP IMPORTANCE:")
display(shap_importance.head(15))

In [None]:
# Bar plot of importance
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_sample, plot_type='bar', show=False, max_display=15)
plt.title('Mean |SHAP| Value (Feature Importance)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(REPORTING_PATH / 'shap_importance_bar.png', dpi=150)
plt.show()

## 3. Partial Dependence Plots

In [None]:
# Top 4 features for PDP
top_features = shap_importance.head(4)['Feature'].tolist()

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

for i, feat in enumerate(top_features):
    ax = axes.flatten()[i]
    shap.dependence_plot(feat, shap_values, X_sample, ax=ax, show=False)
    ax.set_title(f'SHAP Dependence: {feat}', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.savefig(REPORTING_PATH / 'shap_dependence.png', dpi=150)
plt.show()

## 4. Individual Prediction Explanations

In [None]:
# Find high-risk churners
y_proba = xgb_model.predict_proba(X_test)[:, 1]
high_risk_idx = np.argsort(y_proba)[::-1][:5]

print("üìä TOP 5 HIGH-RISK CUSTOMERS:")
for i, idx in enumerate(high_risk_idx):
    actual = y_test.iloc[idx]
    predicted = y_proba[idx]
    status = "Churned" if actual == 1 else "Retained"
    print(f"{i+1}. Probability: {predicted:.3f}, Actual: {status}")

In [None]:
# Force plot for highest risk customer
idx = high_risk_idx[0]
customer_data = X_test.iloc[idx:idx+1]

# Calculate SHAP for this customer
customer_shap = explainer.shap_values(customer_data)

print(f"\nüîç EXPLANATION FOR HIGHEST RISK CUSTOMER:")
print(f"   Churn Probability: {y_proba[idx]:.3f}")
print(f"   Actual: {'Churned' if y_test.iloc[idx] == 1 else 'Retained'}")

# Top contributing features
contrib_df = pd.DataFrame({
    'Feature': FEATURES,
    'Value': customer_data.values[0],
    'SHAP': customer_shap[0]
}).sort_values('SHAP', key=abs, ascending=False)

print("\nüìä TOP 5 CONTRIBUTING FACTORS:")
for _, row in contrib_df.head(5).iterrows():
    direction = "‚Üë" if row['SHAP'] > 0 else "‚Üì"
    print(f"   {direction} {row['Feature']}: {row['Value']:.2f} (SHAP: {row['SHAP']:+.3f})")

## 5. Business Insights Summary

In [None]:
# Generate business insights
print("="*70)
print("üìã BUSINESS INSIGHTS FOR RETENTION STRATEGY")
print("="*70)

# Group features by domain
service_quality = ['DroppedCalls', 'BlockedCalls', 'CallFailureRate', 'UnansweredCalls']
engagement = ['CustomerCareCalls', 'CustomerCareIntensity', 'CareCallsPerRevenue']
tenure = ['MonthsInService', 'CurrentEquipmentDays', 'EquipmentAgeRatio']

print("\nüìû SERVICE QUALITY DRIVERS:")
for feat in service_quality:
    if feat in shap_importance['Feature'].values:
        rank = shap_importance[shap_importance['Feature']==feat].index[0] + 1
        shap_val = shap_importance[shap_importance['Feature']==feat]['Mean_SHAP'].values[0]
        print(f"   ‚Ä¢ {feat} (Rank #{rank}, Impact: {shap_val:.4f})")

print("\nüéß CUSTOMER ENGAGEMENT:")
for feat in engagement:
    if feat in shap_importance['Feature'].values:
        rank = shap_importance[shap_importance['Feature']==feat].index[0] + 1
        shap_val = shap_importance[shap_importance['Feature']==feat]['Mean_SHAP'].values[0]
        print(f"   ‚Ä¢ {feat} (Rank #{rank}, Impact: {shap_val:.4f})")

print("\nüìÖ TENURE & EQUIPMENT:")
for feat in tenure:
    if feat in shap_importance['Feature'].values:
        rank = shap_importance[shap_importance['Feature']==feat].index[0] + 1
        shap_val = shap_importance[shap_importance['Feature']==feat]['Mean_SHAP'].values[0]
        print(f"   ‚Ä¢ {feat} (Rank #{rank}, Impact: {shap_val:.4f})")

In [None]:
# Actionable recommendations
print("\n" + "="*70)
print("üéØ ACTIONABLE RECOMMENDATIONS")
print("="*70)

recommendations = [
    "1. NETWORK QUALITY: Prioritize reducing dropped/blocked calls in high-churn areas",
    "2. PROACTIVE SUPPORT: Customers with high care call intensity need dedicated account managers",
    "3. EQUIPMENT UPGRADE: Target customers with old equipment (>180 days) for upgrade offers",
    "4. EARLY TENURE: Focus retention efforts on customers in months 0-6 (highest risk period)",
    "5. REVENUE EFFICIENCY: Monitor customers with declining revenue-per-minute ratios"
]

for rec in recommendations:
    print(f"\n{rec}")

## 6. Save Interpretation Report

In [None]:
# Save artifacts
coef_df.to_csv(REPORTING_PATH / 'lr_coefficients.csv', index=False)
shap_importance.to_csv(REPORTING_PATH / 'shap_importance.csv', index=False)

print("üíæ Saved:")
print("   - lr_coefficients.csv")
print("   - shap_importance.csv")
print("   - shap_summary.png")
print("   - shap_importance_bar.png")
print("   - shap_dependence.png")

In [None]:
print("\n" + "="*70)
print("üéâ CHURN PREDICTION PIPELINE COMPLETE!")
print("="*70)
print("\nüìÅ ALL ARTIFACTS SAVED IN:")
print(f"   Models: {MODEL_PATH}")
print(f"   Reports: {REPORTING_PATH}")
print("\n‚úÖ Ready for deployment!")
print("="*70)