# Customer Churn Prediction - Part 3: Model Evaluation

## Overview
This notebook covers:
1. Detailed Model Evaluation
2. Performance Metrics
3. Visualization (ROC Curve, Confusion Matrix)
4. Feature Importance Analysis
5. Model Interpretation & Insights

## Step 1: Import Libraries

In [ ]:
# Data manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning Metrics
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, precision_recall_curve, auc,
    confusion_matrix, classification_report
)

# Load models
import joblib

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

## Step 2: Load Data and Models

In [ ]:
# Load test data
X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv').squeeze()

# Load scaler
scaler = joblib.load('models/scaler.pkl')
X_test_scaled = scaler.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Load models
lr_model = joblib.load('models/logistic_regression.pkl')
rf_model = joblib.load('models/random_forest.pkl')

# Try to load tuned models
try:
    rf_tuned_model = joblib.load('models/random_forest_tuned.pkl')
    RF_TUNED_AVAILABLE = True
except:
    RF_TUNED_AVAILABLE = False

try:
    xgb_model = joblib.load('models/xgboost.pkl')
    XGBOOST_AVAILABLE = True
except:
    XGBOOST_AVAILABLE = False

print("Data and models loaded successfully!")
print(f"Test set size: {X_test.shape[0]} samples")

## Step 3: Model Evaluation Function

In [ ]:
def evaluate_model(model, X_test, y_test, model_name, scaled=False):
    """
    Comprehensive model evaluation function
    """
    # Make predictions
    if scaled:
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    
    # Precision-Recall curve
    precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall_curve, precision_curve)
    
    results = {
        'Model': model_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'ROC-AUC': roc_auc,
        'PR-AUC': pr_auc,
        'Confusion Matrix': cm,
        'FPR': fpr,
        'TPR': tpr,
        'Precision Curve': precision_curve,
        'Recall Curve': recall_curve,
        'Predictions': y_pred,
        'Probabilities': y_pred_proba
    }
    
    return results

print("Evaluation function created!")

## Step 4: Evaluate All Models

In [ ]:
# Evaluate Logistic Regression
lr_results = evaluate_model(lr_model, X_test_scaled, y_test, 'Logistic Regression', scaled=True)

# Evaluate Random Forest
rf_results = evaluate_model(rf_model, X_test, y_test, 'Random Forest', scaled=False)

# Evaluate Random Forest Tuned (if available)
if RF_TUNED_AVAILABLE:
    rf_tuned_results = evaluate_model(rf_tuned_model, X_test, y_test, 'Random Forest (Tuned)', scaled=False)

# Evaluate XGBoost (if available)
if XGBOOST_AVAILABLE:
    xgb_results = evaluate_model(xgb_model, X_test, y_test, 'XGBoost', scaled=False)

print("All models evaluated!")

## Step 5: Performance Metrics Comparison

In [ ]:
# Create comparison dataframe
comparison_data = {
    'Model': [lr_results['Model'], rf_results['Model']],
    'Accuracy': [lr_results['Accuracy'], rf_results['Accuracy']],
    'Precision': [lr_results['Precision'], rf_results['Precision']],
    'Recall': [lr_results['Recall'], rf_results['Recall']],
    'F1-Score': [lr_results['F1-Score'], rf_results['F1-Score']],
    'ROC-AUC': [lr_results['ROC-AUC'], rf_results['ROC-AUC']],
    'PR-AUC': [lr_results['PR-AUC'], rf_results['PR-AUC']]
}

if RF_TUNED_AVAILABLE:
    comparison_data['Model'].append(rf_tuned_results['Model'])
    comparison_data['Accuracy'].append(rf_tuned_results['Accuracy'])
    comparison_data['Precision'].append(rf_tuned_results['Precision'])
    comparison_data['Recall'].append(rf_tuned_results['Recall'])
    comparison_data['F1-Score'].append(rf_tuned_results['F1-Score'])
    comparison_data['ROC-AUC'].append(rf_tuned_results['ROC-AUC'])
    comparison_data['PR-AUC'].append(rf_tuned_results['PR-AUC'])

if XGBOOST_AVAILABLE:
    comparison_data['Model'].append(xgb_results['Model'])
    comparison_data['Accuracy'].append(xgb_results['Accuracy'])
    comparison_data['Precision'].append(xgb_results['Precision'])
    comparison_data['Recall'].append(xgb_results['Recall'])
    comparison_data['F1-Score'].append(xgb_results['F1-Score'])
    comparison_data['ROC-AUC'].append(xgb_results['ROC-AUC'])
    comparison_data['PR-AUC'].append(xgb_results['PR-AUC'])

comparison_df = pd.DataFrame(comparison_data)

print("Model Performance Comparison:")
print("="*80)
print(comparison_df.to_string(index=False))

# Round for display
comparison_df_rounded = comparison_df.round(4)
print("\n" + "="*80)
print(comparison_df_rounded.to_string(index=False))

## Step 6: Confusion Matrix Visualization

In [ ]:
def plot_confusion_matrix(cm, model_name, ax):
    """Plot confusion matrix"""
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax, cbar=False)
    ax.set_title(f'{model_name}\nConfusion Matrix', fontweight='bold')
    ax.set_ylabel('True Label', fontsize=10)
    ax.set_xlabel('Predicted Label', fontsize=10)
    ax.set_xticklabels(['No Churn', 'Churn'])
    ax.set_yticklabels(['No Churn', 'Churn'])

# Create subplots
num_models = 2
if RF_TUNED_AVAILABLE:
    num_models += 1
if XGBOOST_AVAILABLE:
    num_models += 1

fig, axes = plt.subplots(1, num_models, figsize=(6*num_models, 5))
if num_models == 1:
    axes = [axes]

idx = 0
plot_confusion_matrix(lr_results['Confusion Matrix'], 'Logistic Regression', axes[idx])
idx += 1
plot_confusion_matrix(rf_results['Confusion Matrix'], 'Random Forest', axes[idx])

if RF_TUNED_AVAILABLE:
    idx += 1
    plot_confusion_matrix(rf_tuned_results['Confusion Matrix'], 'Random Forest (Tuned)', axes[idx])

if XGBOOST_AVAILABLE:
    idx += 1
    plot_confusion_matrix(xgb_results['Confusion Matrix'], 'XGBoost', axes[idx])

plt.tight_layout()
plt.savefig('models/confusion_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

## Step 7: ROC Curve Visualization

In [ ]:
# Plot ROC curves for all models
plt.figure(figsize=(10, 8))

plt.plot(lr_results['FPR'], lr_results['TPR'], 
         label=f"Logistic Regression (AUC = {lr_results['ROC-AUC']:.4f})", linewidth=2)
plt.plot(rf_results['FPR'], rf_results['TPR'], 
         label=f"Random Forest (AUC = {rf_results['ROC-AUC']:.4f})", linewidth=2)

if RF_TUNED_AVAILABLE:
    plt.plot(rf_tuned_results['FPR'], rf_tuned_results['TPR'], 
             label=f"Random Forest Tuned (AUC = {rf_tuned_results['ROC-AUC']:.4f})", linewidth=2)

if XGBOOST_AVAILABLE:
    plt.plot(xgb_results['FPR'], xgb_results['TPR'], 
             label=f"XGBoost (AUC = {xgb_results['ROC-AUC']:.4f})", linewidth=2)

# Diagonal line (random classifier)
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier (AUC = 0.5000)', linewidth=1)

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curve Comparison', fontsize=14, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('models/roc_curves.png', dpi=300, bbox_inches='tight')
plt.show()

## Step 8: Precision-Recall Curve

In [ ]:
# Plot Precision-Recall curves
plt.figure(figsize=(10, 8))

plt.plot(lr_results['Recall Curve'], lr_results['Precision Curve'], 
         label=f"Logistic Regression (AUC = {lr_results['PR-AUC']:.4f})", linewidth=2)
plt.plot(rf_results['Recall Curve'], rf_results['Precision Curve'], 
         label=f"Random Forest (AUC = {rf_results['PR-AUC']:.4f})", linewidth=2)

if RF_TUNED_AVAILABLE:
    plt.plot(rf_tuned_results['Recall Curve'], rf_tuned_results['Precision Curve'], 
             label=f"Random Forest Tuned (AUC = {rf_tuned_results['PR-AUC']:.4f})", linewidth=2)

if XGBOOST_AVAILABLE:
    plt.plot(xgb_results['Recall Curve'], xgb_results['Precision Curve'], 
             label=f"XGBoost (AUC = {xgb_results['PR-AUC']:.4f})", linewidth=2)

plt.xlabel('Recall', fontsize=12)
plt.ylabel('Precision', fontsize=12)
plt.title('Precision-Recall Curve Comparison', fontsize=14, fontweight='bold')
plt.legend(loc='lower left', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('models/pr_curves.png', dpi=300, bbox_inches='tight')
plt.show()

## Step 9: Feature Importance Analysis

In [ ]:
# Get feature importance from Random Forest
feature_importance = pd.DataFrame({
    'Feature': X_test.columns,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

print("Top 15 Most Important Features:")
print("="*50)
print(feature_importance.head(15).to_string(index=False))

# Visualize top features
plt.figure(figsize=(12, 8))
top_features = feature_importance.head(15)
plt.barh(range(len(top_features)), top_features['Importance'].values)
plt.yticks(range(len(top_features)), top_features['Feature'].values)
plt.xlabel('Importance', fontsize=12)
plt.title('Top 15 Feature Importance (Random Forest)', fontsize=14, fontweight='bold')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('models/feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

## Step 10: Classification Report

In [ ]:
# Detailed classification report for best model
print("Classification Report - Random Forest:")
print("="*60)
print(classification_report(y_test, rf_results['Predictions'], 
                            target_names=['No Churn', 'Churn']))

## Step 11: Model Insights and Recommendations

In [ ]:
print("="*80)
print("MODEL INSIGHTS AND BUSINESS RECOMMENDATIONS")
print("="*80)

print("\n1. KEY FINDINGS:")
print("   - Contract type is the strongest predictor of churn")
print("   - Tenure (customer loyalty) inversely related to churn")
print("   - Payment method affects churn probability")
print("   - Internet service type influences churn")
print("   - Monthly charges correlate with churn risk")

print("\n2. BUSINESS RECOMMENDATIONS:")
print("   a) Target Month-to-month customers for retention campaigns")
print("   b) Offer incentives to long-tenure customers")
print("   c) Improve service quality for fiber optic internet users")
print("   d) Promote automatic payment methods to reduce churn")
print("   e) Create loyalty programs for customers with high tenure")
print("   f) Monitor customers with high monthly charges")

print("\n3. MODEL PERFORMANCE:")
print(f"   - Best Model: Random Forest")
print(f"   - Accuracy: {rf_results['Accuracy']:.2%}")
print(f"   - Precision: {rf_results['Precision']:.2%}")
print(f"   - Recall: {rf_results['Recall']:.2%}")
print(f"   - F1-Score: {rf_results['F1-Score']:.2%}")
print(f"   - ROC-AUC: {rf_results['ROC-AUC']:.4f}")

print("\n" + "="*80)

## Summary

### Evaluation Complete:
1. All models evaluated with comprehensive metrics
2. Visualizations created (ROC, PR curves, confusion matrices)
3. Feature importance analyzed
4. Business insights and recommendations generated

### Next Steps:
- Use the trained model to predict churn for new customers