# Model Training and Evaluation

This notebook demonstrates model development, training, evaluation, and business impact analysis.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import joblib

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score,
                             roc_curve, precision_recall_curve, f1_score)
from sklearn.utils import resample

import warnings
warnings.filterwarnings('ignore')

print('Libraries imported successfully!')

## 1. Load Processed Data

Load the engineered features from the previous notebook.

In [None]:
# Load data
data_path = Path('../data/processed/customer_data_features.csv')

if not data_path.exists():
    print('Error: Processed data not found. Please run 02-feature-engineering.ipynb first.')
    # Generate sample data for demonstration
    np.random.seed(42)
    n_samples = 1000
    data = pd.DataFrame({
        'feature_1': np.random.randn(n_samples),
        'feature_2': np.random.randn(n_samples),
        'feature_3': np.random.randn(n_samples),
        'feature_4': np.random.randn(n_samples),
        'feature_5': np.random.randn(n_samples),
        'churn': np.random.choice([0, 1], n_samples, p=[0.73, 0.27])
    })
else:
    data = pd.read_csv(data_path)

print(f'Dataset shape: {data.shape}')
print(f'Churn rate: {data["churn"].mean():.2%}')

## 2. Prepare Training and Test Sets

Split data with stratification to maintain class balance.

In [None]:
# Separate features and target
X = data.drop(['churn', 'customer_id'], axis=1, errors='ignore')
y = data['churn']

# Train-test split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f'Training set: {X_train.shape}')
print(f'Test set: {X_test.shape}')
print(f'\nClass distribution in training set:')
print(y_train.value_counts(normalize=True))

## 3. Handle Class Imbalance

Address imbalanced classes using resampling techniques.

In [None]:
# Separate majority and minority classes
X_train_df = pd.DataFrame(X_train, columns=X.columns)
X_train_df['churn'] = y_train.values

majority = X_train_df[X_train_df['churn'] == 0]
minority = X_train_df[X_train_df['churn'] == 1]

# Upsample minority class
minority_upsampled = resample(minority,
                              replace=True,
                              n_samples=len(majority),
                              random_state=42)

# Combine majority and upsampled minority
X_train_balanced = pd.concat([majority, minority_upsampled])

# Separate features and target
y_train_balanced = X_train_balanced['churn']
X_train_balanced = X_train_balanced.drop('churn', axis=1)

print(f'Balanced training set: {X_train_balanced.shape}')
print(f'\nClass distribution after balancing:')
print(y_train_balanced.value_counts(normalize=True))

## 4. Train Multiple Models

Train and compare different algorithms.

In [None]:
# Initialize models
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
}

# Train and evaluate models
results = {}

for name, model in models.items():
    print(f'\nTraining {name}...')
    
    # Train model
    model.fit(X_train_balanced, y_train_balanced)
    
    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    results[name] = {
        'model': model,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'roc_auc': roc_auc_score(y_test, y_pred_proba),
        'f1': f1_score(y_test, y_pred)
    }
    
    print(f'{name} - ROC AUC: {results[name]["roc_auc"]:.4f}, F1 Score: {results[name]["f1"]:.4f}')

print('\nModel training completed!')

## 5. Model Comparison

Compare model performance using multiple metrics.

In [None]:
# Create comparison dataframe
comparison_df = pd.DataFrame({
    'Model': list(results.keys()),
    'ROC AUC': [results[m]['roc_auc'] for m in results.keys()],
    'F1 Score': [results[m]['f1'] for m in results.keys()]
}).sort_values('ROC AUC', ascending=False)

print('Model Performance Comparison:')
print(comparison_df)

# Visualize comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

comparison_df.plot(x='Model', y='ROC AUC', kind='bar', ax=axes[0], color='steelblue', legend=False)
axes[0].set_title('ROC AUC Score by Model', fontsize=12, fontweight='bold')
axes[0].set_ylabel('ROC AUC')
axes[0].set_ylim([0.5, 1.0])
axes[0].grid(axis='y', alpha=0.3)

comparison_df.plot(x='Model', y='F1 Score', kind='bar', ax=axes[1], color='coral', legend=False)
axes[1].set_title('F1 Score by Model', fontsize=12, fontweight='bold')
axes[1].set_ylabel('F1 Score')
axes[1].set_ylim([0.0, 1.0])
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Select Best Model and Detailed Evaluation

Perform comprehensive evaluation of the best performing model.

In [None]:
# Select best model
best_model_name = comparison_df.iloc[0]['Model']
best_model = results[best_model_name]['model']
y_pred = results[best_model_name]['y_pred']
y_pred_proba = results[best_model_name]['y_pred_proba']

print(f'Best Model: {best_model_name}')
print('\n' + '='*60)
print('CLASSIFICATION REPORT')
print('='*60)
print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))

## 7. Confusion Matrix Analysis

Visualize prediction errors and their types.

In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
plt.title(f'Confusion Matrix - {best_model_name}', fontsize=14, fontweight='bold')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()

# Calculate error types
tn, fp, fn, tp = cm.ravel()
print(f'\nTrue Negatives: {tn}')
print(f'False Positives: {fp}')
print(f'False Negatives: {fn}')
print(f'True Positives: {tp}')

## 8. ROC Curve and Precision-Recall Curve

Analyze model performance across different thresholds.

In [None]:
# Calculate curves
fpr, tpr, roc_thresholds = roc_curve(y_test, y_pred_proba)
precision, recall, pr_thresholds = precision_recall_curve(y_test, y_pred_proba)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# ROC Curve
axes[0].plot(fpr, tpr, color='darkorange', lw=2, 
             label=f'ROC curve (AUC = {results[best_model_name]["roc_auc"]:.3f})')
axes[0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
axes[0].set_xlim([0.0, 1.0])
axes[0].set_ylim([0.0, 1.05])
axes[0].set_xlabel('False Positive Rate')
axes[0].set_ylabel('True Positive Rate')
axes[0].set_title('ROC Curve', fontsize=12, fontweight='bold')
axes[0].legend(loc='lower right')
axes[0].grid(alpha=0.3)

# Precision-Recall Curve
axes[1].plot(recall, precision, color='green', lw=2, label='Precision-Recall curve')
axes[1].set_xlim([0.0, 1.0])
axes[1].set_ylim([0.0, 1.05])
axes[1].set_xlabel('Recall')
axes[1].set_ylabel('Precision')
axes[1].set_title('Precision-Recall Curve', fontsize=12, fontweight='bold')
axes[1].legend(loc='lower left')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 9. Feature Importance Analysis

Identify the most influential features (for tree-based models).

In [None]:
# Feature importance (only for tree-based models)
if hasattr(best_model, 'feature_importances_'):
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': best_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    # Plot top 15 features
    top_n = min(15, len(feature_importance))
    plt.figure(figsize=(10, 8))
    plt.barh(range(top_n), feature_importance['importance'].head(top_n), color='steelblue')
    plt.yticks(range(top_n), feature_importance['feature'].head(top_n))
    plt.xlabel('Importance')
    plt.title(f'Top {top_n} Feature Importances - {best_model_name}', fontsize=12, fontweight='bold')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()
    
    print(f'\nTop {top_n} Most Important Features:')
    print(feature_importance.head(top_n))
else:
    print('Feature importance not available for this model type.')

## 10. Business Impact Analysis

Translate technical metrics into business value.

In [None]:
# Define business costs
customer_lifetime_value = 1000  # Value of retaining a customer
retention_campaign_cost = 50     # Cost of retention campaign

# Calculate business metrics
true_positives = tp
false_positives = fp
false_negatives = fn

# Financial impact
revenue_saved = true_positives * customer_lifetime_value
campaign_cost = (true_positives + false_positives) * retention_campaign_cost
missed_revenue = false_negatives * customer_lifetime_value
net_benefit = revenue_saved - campaign_cost

# ROI calculation
roi = (net_benefit / campaign_cost) * 100 if campaign_cost > 0 else 0

print('='*60)
print('BUSINESS IMPACT ANALYSIS')
print('='*60)
print(f'\nCustomers correctly identified as churning: {true_positives}')
print(f'Revenue saved from retention: ${revenue_saved:,.0f}')
print(f'\nTotal retention campaigns sent: {true_positives + false_positives}')
print(f'Total campaign cost: ${campaign_cost:,.0f}')
print(f'\nCustomers missed (false negatives): {false_negatives}')
print(f'Potential revenue loss: ${missed_revenue:,.0f}')
print(f'\n' + '-'*60)
print(f'NET BENEFIT: ${net_benefit:,.0f}')
print(f'ROI: {roi:.1f}%')
print('='*60)

## 11. Model Hyperparameter Tuning

Optimize the best model using grid search.

In [None]:
# Define parameter grid (example for Random Forest)
if best_model_name == 'Random Forest':
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }
    
    print('Performing hyperparameter tuning...')
    grid_search = GridSearchCV(
        RandomForestClassifier(random_state=42, n_jobs=-1),
        param_grid,
        cv=3,
        scoring='roc_auc',
        n_jobs=-1,
        verbose=1
    )
    
    grid_search.fit(X_train_balanced, y_train_balanced)
    
    print(f'\nBest parameters: {grid_search.best_params_}')
    print(f'Best cross-validation score: {grid_search.best_score_:.4f}')
    
    # Evaluate tuned model
    tuned_model = grid_search.best_estimator_
    y_pred_tuned = tuned_model.predict(X_test)
    y_pred_proba_tuned = tuned_model.predict_proba(X_test)[:, 1]
    
    print(f'\nTuned model ROC AUC: {roc_auc_score(y_test, y_pred_proba_tuned):.4f}')
    print(f'Tuned model F1 Score: {f1_score(y_test, y_pred_tuned):.4f}')
    
    # Update best model
    best_model = tuned_model
else:
    print('Hyperparameter tuning example shown for Random Forest only.')

## 12. Save Model

Save the trained model for deployment.

In [None]:
# Create models directory
models_dir = Path('../models')
models_dir.mkdir(parents=True, exist_ok=True)

# Save model
model_path = models_dir / f'{best_model_name.lower().replace(" ", "_")}_model.pkl'
joblib.dump(best_model, model_path)
print(f'Model saved to: {model_path}')

# Save feature names
feature_names_path = models_dir / 'feature_names.txt'
with open(feature_names_path, 'w') as f:
    f.write('\n'.join(X.columns))
print(f'Feature names saved to: {feature_names_path}')

## 13. Model Summary

Document key findings and recommendations.

In [None]:
summary = f"""
MODEL TRAINING SUMMARY:
======================

1. Best Model: {best_model_name}
   - ROC AUC: {results[best_model_name]['roc_auc']:.4f}
   - F1 Score: {results[best_model_name]['f1']:.4f}

2. Business Impact:
   - Net Benefit: ${net_benefit:,.0f}
   - ROI: {roi:.1f}%
   - Customers Saved: {true_positives}

3. Model Performance:
   - True Positives: {tp}
   - False Positives: {fp}
   - False Negatives: {fn}
   - True Negatives: {tn}

4. Recommendations:
   - Deploy model with threshold optimization for business goals
   - Monitor model performance monthly
   - Retrain quarterly or when performance degrades
   - Implement A/B testing for retention campaigns
   - Track actual retention rates vs predictions

5. Next Steps:
   - Create model card documentation
   - Set up monitoring dashboard
   - Implement prediction API
   - Establish retraining pipeline
"""

print(summary)