# Model Evaluation - Patient Appointment Prediction

This notebook provides comprehensive evaluation of trained models including detailed metrics, visualizations, and business insights.

## Table of Contents
1. [Model Loading and Setup](#model-loading)
2. [Performance Metrics](#performance-metrics)
3. [Visualization Analysis](#visualization)
4. [Business Impact Analysis](#business-impact)
5. [Model Interpretability](#interpretability)
6. [Recommendations](#recommendations)


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, 
    average_precision_score, accuracy_score, precision_score, 
    recall_score, f1_score, roc_curve, precision_recall_curve
)
from sklearn.model_selection import validation_curve, learning_curve
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

# Import utility functions
import sys
sys.path.append('../src')
from utils import plot_confusion_matrix, plot_roc_curve, plot_feature_importance

print("Libraries imported successfully!")


## 1. Model Loading and Setup {#model-loading}


In [None]:
# Load the best trained model
try:
    model = joblib.load('../models/best_model.joblib')
    print("✅ Best model loaded successfully!")
except FileNotFoundError:
    print("❌ Model file not found. Please run the training pipeline first.")
    print("Run: python src/train.py")
    model = None

# Load processed data
try:
    df = pd.read_csv('../data/processed/cleaned_dataset.csv')
    print("✅ Processed data loaded successfully!")
except FileNotFoundError:
    print("❌ Processed data not found. Please run preprocessing first.")
    print("Run: python src/preprocess.py")
    df = None

if model is not None and df is not None:
    # Prepare data for evaluation
    target_col = 'NoShow' if 'NoShow' in df.columns else 'No-show_encoded'
    y = df[target_col]
    X = df.drop(columns=[target_col])
    
    print(f"\n📊 Dataset Info:")
    print(f"Features: {X.shape[1]}")
    print(f"Samples: {X.shape[0]:,}")
    print(f"No-show rate: {y.mean():.1%}")
    
    # Make predictions
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]
    
    print(f"\n🎯 Model Predictions:")
    print(f"Predicted no-show rate: {y_pred.mean():.1%}")
    print(f"Average predicted probability: {y_prob.mean():.3f}")
else:
    print("⚠️ Cannot proceed without model and data. Please run the training pipeline.")


## 2. Performance Metrics {#performance-metrics}


In [None]:
# Comprehensive Performance Analysis
if model is not None and df is not None:
    print("📈 COMPREHENSIVE MODEL EVALUATION")
    print("=" * 50)
    
    # Calculate all metrics
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred, zero_division=0)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    roc_auc = roc_auc_score(y, y_prob)
    pr_auc = average_precision_score(y, y_prob)
    
    # Confusion matrix
    cm = confusion_matrix(y, y_pred)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    print(f"\n🎯 Classification Metrics:")
    print(f"Accuracy:     {accuracy:.4f}")
    print(f"Precision:    {precision:.4f}")
    print(f"Recall:       {recall:.4f}")
    print(f"F1-Score:     {f1:.4f}")
    print(f"Specificity:  {specificity:.4f}")
    
    print(f"\n📊 Area Under Curves:")
    print(f"ROC-AUC:      {roc_auc:.4f}")
    print(f"PR-AUC:       {pr_auc:.4f}")
    
    print(f"\n🔍 Confusion Matrix:")
    print(f"True Negatives:  {tn:,}")
    print(f"False Positives: {fp:,}")
    print(f"False Negatives: {fn:,}")
    print(f"True Positives:  {tp:,}")
    
    # Classification report
    print(f"\n📋 Detailed Classification Report:")
    print(classification_report(y, y_pred, target_names=['Show', 'No Show']))
else:
    print("⚠️ Cannot evaluate without model and data.")


## 3. Visualization Analysis {#visualization}


In [None]:
# Generate comprehensive visualizations
if model is not None and df is not None:
    print("📊 Generating comprehensive visualizations...")
    
    # Create subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. ROC Curve
    fpr, tpr, _ = roc_curve(y, y_prob)
    roc_auc = auc(fpr, tpr)
    
    axes[0, 0].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.3f})')
    axes[0, 0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
    axes[0, 0].set_xlim([0.0, 1.0])
    axes[0, 0].set_ylim([0.0, 1.05])
    axes[0, 0].set_xlabel('False Positive Rate')
    axes[0, 0].set_ylabel('True Positive Rate')
    axes[0, 0].set_title('ROC Curve')
    axes[0, 0].legend(loc="lower right")
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Precision-Recall Curve
    precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)
    pr_auc = auc(recall_vals, precision_vals)
    
    axes[0, 1].plot(recall_vals, precision_vals, color='darkgreen', lw=2, label=f'PR Curve (AUC = {pr_auc:.3f})')
    axes[0, 1].set_xlabel('Recall')
    axes[0, 1].set_ylabel('Precision')
    axes[0, 1].set_title('Precision-Recall Curve')
    axes[0, 1].legend(loc="lower left")
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Confusion Matrix
    cm = confusion_matrix(y, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0],
                xticklabels=['Show', 'No Show'], yticklabels=['Show', 'No Show'])
    axes[1, 0].set_title('Confusion Matrix')
    axes[1, 0].set_xlabel('Predicted')
    axes[1, 0].set_ylabel('Actual')
    
    # 4. Prediction Distribution
    axes[1, 1].hist(y_prob[y == 0], bins=50, alpha=0.7, label='Actual Show', color='blue')
    axes[1, 1].hist(y_prob[y == 1], bins=50, alpha=0.7, label='Actual No Show', color='red')
    axes[1, 1].set_xlabel('Predicted Probability')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].set_title('Prediction Distribution')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Visualizations generated successfully!")
else:
    print("⚠️ Cannot generate visualizations without model and data.")


## 4. Business Impact Analysis {#business-impact}


In [None]:
# Business Impact Analysis
if model is not None and df is not None:
    print("💼 BUSINESS IMPACT ANALYSIS")
    print("=" * 40)
    
    # Calculate business metrics
    total_appointments = len(y)
    actual_no_shows = y.sum()
    predicted_no_shows = y_pred.sum()
    
    # Cost assumptions (example values)
    cost_per_no_show = 150  # Cost of missed appointment slot
    cost_per_intervention = 5  # Cost of SMS reminder
    
    # Current situation (no model)
    current_no_show_cost = actual_no_shows * cost_per_no_show
    
    # With model predictions
    high_risk_threshold = 0.5
    high_risk_patients = (y_prob >= high_risk_threshold).sum()
    
    # Assume intervention reduces no-show rate by 30% for high-risk patients
    intervention_effectiveness = 0.3
    prevented_no_shows = high_risk_patients * intervention_effectiveness
    intervention_cost = high_risk_patients * cost_per_intervention
    cost_savings = prevented_no_shows * cost_per_no_show
    net_savings = cost_savings - intervention_cost
    
    print(f"\n📊 Current Situation:")
    print(f"Total appointments: {total_appointments:,}")
    print(f"Actual no-shows: {actual_no_shows:,} ({actual_no_shows/total_appointments:.1%})")
    print(f"Current no-show cost: ${current_no_show_cost:,}")
    
    print(f"\n🎯 With Model Intervention:")
    print(f"High-risk patients identified: {high_risk_patients:,} ({high_risk_patients/total_appointments:.1%})")
    print(f"Intervention cost: ${intervention_cost:,}")
    print(f"Prevented no-shows: {prevented_no_shows:.0f}")
    print(f"Cost savings: ${cost_savings:,}")
    print(f"Net savings: ${net_savings:,}")
    
    print(f"\n💰 ROI Analysis:")
    roi = (net_savings / intervention_cost) * 100 if intervention_cost > 0 else 0
    print(f"Return on Investment: {roi:.1f}%")
    print(f"Cost per prevented no-show: ${intervention_cost/prevented_no_shows:.2f}" if prevented_no_shows > 0 else "N/A")
    
    # Risk stratification
    print(f"\n🎯 Risk Stratification:")
    low_risk = (y_prob < 0.3).sum()
    medium_risk = ((y_prob >= 0.3) & (y_prob < 0.7)).sum()
    high_risk = (y_prob >= 0.7).sum()
    
    print(f"Low risk (< 30%): {low_risk:,} patients")
    print(f"Medium risk (30-70%): {medium_risk:,} patients")
    print(f"High risk (> 70%): {high_risk:,} patients")
else:
    print("⚠️ Cannot perform business analysis without model and data.")


## 5. Model Interpretability {#interpretability}


In [None]:
# Feature Importance Analysis
if model is not None and df is not None:
    print("🔍 MODEL INTERPRETABILITY ANALYSIS")
    print("=" * 40)
    
    # Try to get feature importance
    try:
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
            feature_names = X.columns
        elif hasattr(model, 'named_steps') and hasattr(model.named_steps['clf'], 'feature_importances_'):
            importances = model.named_steps['clf'].feature_importances_
            feature_names = model.named_steps['prep'].get_feature_names_out()
        else:
            print("⚠️ Model does not support feature importance analysis")
            importances = None
            feature_names = None
        
        if importances is not None:
            # Create importance DataFrame
            importance_df = pd.DataFrame({
                'feature': feature_names,
                'importance': importances
            }).sort_values('importance', ascending=False)
            
            print("\n🏆 Top 10 Most Important Features:")
            for i, (_, row) in enumerate(importance_df.head(10).iterrows(), 1):
                print(f"{i:2d}. {row['feature']:<30} {row['importance']:.4f}")
            
            # Plot feature importance
            plt.figure(figsize=(12, 8))
            top_features = importance_df.head(15)
            plt.barh(range(len(top_features)), top_features['importance'], color='skyblue')
            plt.yticks(range(len(top_features)), top_features['feature'])
            plt.xlabel('Feature Importance')
            plt.title('Top 15 Feature Importance')
            plt.gca().invert_yaxis()
            plt.tight_layout()
            plt.show()
            
            # Feature importance insights
            print(f"\n💡 Key Insights:")
            print(f"Most important feature: {importance_df.iloc[0]['feature']}")
            print(f"Least important feature: {importance_df.iloc[-1]['feature']}")
            
            # Cumulative importance
            cumulative_importance = importance_df['importance'].cumsum()
            features_80_percent = (cumulative_importance <= 0.8).sum()
            print(f"Features explaining 80% of importance: {features_80_percent}")
            
    except Exception as e:
        print(f"❌ Error analyzing feature importance: {e}")
    
    # Model complexity analysis
    print(f"\n🔧 Model Complexity:")
    if hasattr(model, 'n_estimators'):
        print(f"Number of estimators: {model.n_estimators}")
    if hasattr(model, 'max_depth'):
        print(f"Maximum depth: {model.max_depth}")
    if hasattr(model, 'n_features_in_'):
        print(f"Number of features: {model.n_features_in_}")
        
else:
    print("⚠️ Cannot analyze interpretability without model and data.")


## 6. Recommendations {#recommendations}


In [None]:
# Final Recommendations and Next Steps
if model is not None and df is not None:
    print("📋 FINAL RECOMMENDATIONS & NEXT STEPS")
    print("=" * 50)
    
    print("\n🎯 Model Performance Summary:")
    print(f"• ROC-AUC Score: {roc_auc:.3f} ({'Excellent' if roc_auc > 0.8 else 'Good' if roc_auc > 0.7 else 'Fair'})")
    print(f"• Precision: {precision:.3f} ({'High' if precision > 0.3 else 'Moderate'})")
    print(f"• Recall: {recall:.3f} ({'High' if recall > 0.6 else 'Moderate'})")
    print(f"• F1-Score: {f1:.3f} ({'Good' if f1 > 0.4 else 'Moderate'})")
    
    print("\n💼 Business Recommendations:")
    print("1. 🎯 Implement risk-based intervention strategy:")
    print("   • High-risk patients (>70%): Multiple reminders + phone calls")
    print("   • Medium-risk patients (30-70%): SMS reminders")
    print("   • Low-risk patients (<30%): Standard confirmation")
    
    print("\n2. 📱 Optimize intervention timing:")
    print("   • Send reminders 1-2 days before appointment")
    print("   • Follow up on same day for high-risk patients")
    
    print("\n3. 📊 Monitor and iterate:")
    print("   • Track intervention effectiveness")
    print("   • Retrain model monthly with new data")
    print("   • A/B test different intervention strategies")
    
    print("\n🔧 Technical Recommendations:")
    print("1. 🚀 Model Deployment:")
    print("   • Deploy model as REST API")
    print("   • Integrate with appointment scheduling system")
    print("   • Implement real-time scoring")
    
    print("\n2. 📈 Model Improvement:")
    print("   • Collect additional features (weather, traffic, etc.)")
    print("   • Implement ensemble methods")
    print("   • Use deep learning for complex patterns")
    
    print("\n3. 🛡️ Model Monitoring:")
    print("   • Monitor prediction drift")
    print("   • Track model performance over time")
    print("   • Implement automated retraining")
    
    print("\n📊 Expected Business Impact:")
    print(f"• Potential cost savings: ${net_savings:,.0f}")
    print(f"• ROI: {roi:.1f}%")
    print(f"• Patients to target: {high_risk_patients:,}")
    
    print("\n✅ Model is ready for production deployment!")
    
else:
    print("⚠️ Cannot provide recommendations without model evaluation.")
    print("Please run the complete training pipeline first.")
