# Notebook 05: Model Evaluation and Visualization

## Objective
Evaluate both models and create comprehensive visualizations:
1. Model performance comparison
2. Prediction accuracy plots
3. Residual analysis
4. Feature importance rankings
5. Engagement trend analysis

In [None]:
import sys
import os

# Add source directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src import evaluate
import matplotlib.pyplot as plt

print("="*80)
print("MODEL EVALUATION & VISUALIZATION")
print("="*80)

## Generate Full Evaluation Report

This generates all 8 visualizations and compares both models.

In [None]:
# Generate complete evaluation report
results = evaluate.generate_full_report(
    scraped_model_path="../models/scraped_model.pkl",
    api_model_path="../models/api_model.pkl",
    scraped_data_path="../data/processed/scraped_processed.csv",
    api_data_path="../data/processed/api_processed.csv",
    output_dir="../reports/figures"
)

## Detailed Results

In [None]:
print("\n" + "="*80)
print("DETAILED RESULTS")
print("="*80)

print("\nScraped Model (Predicting Views):")
print(f"  Training R-squared: {results['scraped_metrics']['train_r2']:.4f}")
print(f"  Test R-squared: {results['scraped_metrics']['test_r2']:.4f}")
print(f"  Test RMSE: {results['scraped_metrics']['test_rmse']:,.2f}")
print(f"  Test MAE: {results['scraped_metrics']['test_mae']:,.2f}")

print("\nAPI Model (Predicting Engagement Rate):")
print(f"  Training R-squared: {results['api_metrics']['train_r2']:.4f}")
print(f"  Test R-squared: {results['api_metrics']['test_r2']:.4f}")
print(f"  Test RMSE: {results['api_metrics']['test_rmse']:.6f}")
print(f"  Test MAE: {results['api_metrics']['test_mae']:.6f}")

print("\nTop 5 Features for Scraped Model:")
if results['scraped_importance'] is not None:
    for idx, row in results['scraped_importance'].head(5).iterrows():
        print(f"  {idx+1}. {row['feature']:<30} {row['importance']:.4f}")

print("\nTop 5 Features for API Model:")
if results['api_importance'] is not None:
    for idx, row in results['api_importance'].head(5).iterrows():
        print(f"  {idx+1}. {row['feature']:<30} {row['importance']:.4f}")

print("\n" + "="*80)
print("EVALUATION COMPLETE")
print("="*80)
print("\nAll visualizations saved to: reports/figures/")
print("\nGenerated visualizations:")
print("  1. model_comparison.png")
print("  2. scraped_actual_vs_predicted.png")
print("  3. api_actual_vs_predicted.png")
print("  4. scraped_residuals.png")
print("  5. api_residuals.png")
print("  6. scraped_feature_importance.png")
print("  7. api_feature_importance.png")
print("  8. engagement_trends.png")
print("="*80)

## Display Visualizations

View the generated plots in the notebook.

In [None]:
from IPython.display import Image, display

# Display key visualizations
figures_dir = "../reports/figures"
key_figures = [
    ("Model Comparison", "model_comparison.png"),
    ("Scraped Model: Actual vs Predicted", "scraped_actual_vs_predicted.png"),
    ("API Model: Actual vs Predicted", "api_actual_vs_predicted.png"),
    ("Scraped Feature Importance", "scraped_feature_importance.png"),
    ("API Feature Importance", "api_feature_importance.png"),
    ("Engagement Trends", "engagement_trends.png")
]

for title, filename in key_figures:
    path = os.path.join(figures_dir, filename)
    if os.path.exists(path):
        print(f"\n{title}:")
        print("="*60)
        display(Image(filename=path, width=800))
    else:
        print(f"Warning: {filename} not found")