# 05 - Evaluation

This notebook summarizes model results and creates visualizations:
- Overall model comparison
- Time series plots
- Residual analysis
- Feature importance

In [None]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd()))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

In [None]:
# Load metrics
metrics_df = pd.read_csv('../results/tables/metrics_per_protocol.csv')
print(f"Loaded metrics for {metrics_df['protocol'].nunique()} protocols, {metrics_df['model'].nunique()} models")

In [None]:
# Overall model comparison
print("\n=== OVERALL MODEL COMPARISON ===")
overall = metrics_df.groupby('model')[['rmse_usd', 'mae_usd', 'mape', 'r2', 'directional_accuracy']].mean()
overall = overall.round(4)
overall

In [None]:
# Save overall metrics
overall.reset_index().to_csv('../results/tables/metrics_overall.csv', index=False)
print("Overall metrics saved to results/tables/metrics_overall.csv")

In [None]:
# Visualization: Model comparison bar chart
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# R² comparison
ax = axes[0]
model_order = overall.sort_values('r2', ascending=False).index
sns.barplot(data=metrics_df, x='model', y='r2', order=model_order, ax=ax)
ax.set_title('R² Score by Model')
ax.set_xlabel('')
ax.tick_params(axis='x', rotation=45)

# MAPE comparison
ax = axes[1]
sns.barplot(data=metrics_df, x='model', y='mape', order=model_order, ax=ax)
ax.set_title('MAPE by Model')
ax.set_xlabel('')
ax.tick_params(axis='x', rotation=45)

# Directional Accuracy comparison
ax = axes[2]
sns.barplot(data=metrics_df, x='model', y='directional_accuracy', order=model_order, ax=ax)
ax.set_title('Directional Accuracy by Model')
ax.set_xlabel('')
ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
os.makedirs('../results/figures', exist_ok=True)
plt.savefig('../results/figures/model_comparison.png', dpi=150)
plt.show()

In [None]:
# Visualization: Model performance by protocol
fig, ax = plt.subplots(figsize=(12, 6))

pivot_r2 = metrics_df.pivot(index='protocol', columns='model', values='r2')
pivot_r2.plot(kind='bar', ax=ax)
ax.set_title('R² Score by Protocol and Model')
ax.set_xlabel('Protocol')
ax.set_ylabel('R²')
ax.legend(title='Model', bbox_to_anchor=(1.02, 1))
ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('../results/figures/r2_by_protocol.png', dpi=150)
plt.show()

In [None]:
# Summary statistics
print("\n=== SUMMARY STATISTICS ===")
print(f"\nBest model by R²: {overall['r2'].idxmax()} (R² = {overall['r2'].max():.4f})")
print(f"Best model by MAPE: {overall['mape'].idxmin()} (MAPE = {overall['mape'].min():.4f})")
print(f"Best model by Directional Accuracy: {overall['directional_accuracy'].idxmax()} ({overall['directional_accuracy'].max():.2%})")

# Per-protocol best models
print("\n=== BEST MODEL PER PROTOCOL ===")
best_per_proto = metrics_df.loc[metrics_df.groupby('protocol')['r2'].idxmax()][['protocol', 'model', 'r2', 'mape']]
best_per_proto

In [None]:
# Check for top features if available
try:
    top_features = pd.read_csv('../results/tables/top_features.csv')
    print("\n=== TOP FEATURES ===")
    display(top_features.head(20))
except FileNotFoundError:
    print("Top features file not found")