In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Model imports
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import PoissonRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# Set random seed for reproducibility
np.random.seed(42)

In [None]:
# =============================================================================
# STEP 9: GENERATE VISUALIZATIONS
# =============================================================================
print("\n[STEP 9] Generating visualizations...")

# 1. Model Comparison Bar Chart
plt.figure(figsize=(10, 6))
models = results_df['Model']
rmse_values = results_df['Avg RMSE']
colors = ['#d62728', '#ff7f0e', '#2ca02c', '#1f77b4']

plt.bar(models, rmse_values, color=colors, alpha=0.7, edgecolor='black')
plt.axhline(y=baseline_rmse, color='red', linestyle='--', label='Naive Baseline')
plt.xlabel('Model', fontsize=12, fontweight='bold')
plt.ylabel('Average RMSE', fontsize=12, fontweight='bold')
plt.title('Model Performance Comparison (Cross-Validation)', fontsize=14, fontweight='bold')
plt.xticks(rotation=15, ha='right')
plt.legend()
plt.tight_layout()
plt.savefig('model_comparison_rmse.png', dpi=300, bbox_inches='tight')
print("Saved: model_comparison_rmse.png")
plt.close()

# 2. Predictions vs Actual (Random Forest)
plt.figure(figsize=(10, 6))
plt.scatter(y_test_final, rf_final_pred, alpha=0.5, s=20)
plt.plot([y_test_final.min(), y_test_final.max()],
         [y_test_final.min(), y_test_final.max()],
         'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Ticket Count', fontsize=12, fontweight='bold')
plt.ylabel('Predicted Ticket Count', fontsize=12, fontweight='bold')
plt.title('Random Forest: Predicted vs Actual Ticket Counts', fontsize=14, fontweight='bold')
plt.legend()
plt.tight_layout()
plt.savefig('rf_predictions_vs_actual.png', dpi=300, bbox_inches='tight')
print("Saved: rf_predictions_vs_actual.png")
plt.close()

# 3. Feature Importance (Random Forest)
feature_importance = pd.DataFrame({
    'Feature': feature_cols,
    'Importance': final_rf.feature_importances_
}).sort_values('Importance', ascending=False)

plt.figure(figsize=(10, 6))
plt.barh(feature_importance['Feature'], feature_importance['Importance'], color='steelblue')
plt.xlabel('Importance Score', fontsize=12, fontweight='bold')
plt.ylabel('Feature', fontsize=12, fontweight='bold')
plt.title('Random Forest Feature Importance', fontsize=14, fontweight='bold')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
print("Saved: feature_importance.png")
plt.close()

# 4. Residual Plot
residuals = y_test_final - rf_final_pred
plt.figure(figsize=(10, 6))
plt.scatter(rf_final_pred, residuals, alpha=0.5, s=20)
plt.axhline(y=0, color='r', linestyle='--', lw=2)
plt.xlabel('Predicted Ticket Count', fontsize=12, fontweight='bold')
plt.ylabel('Residuals', fontsize=12, fontweight='bold')
plt.title('Random Forest Residual Plot', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('residual_plot.png', dpi=300, bbox_inches='tight')
print("Saved: residual_plot.png")
plt.close()

In [None]:
# =============================================================================
# STEP 10: SAVE RESULTS
# =============================================================================
print("\n[STEP 10] Saving results...")

# Save model performance metrics
results_df.to_csv('model_performance_metrics.csv', index=False)
print("Saved: model_performance_metrics.csv")

# Save feature importance
feature_importance.to_csv('feature_importance.csv', index=False)
print("Saved: feature_importance.csv")

# Save final predictions
predictions_df = pd.DataFrame({
    'Actual': y_test_final.values,
    'Poisson_Predicted': poisson_final_pred,
    'RandomForest_Predicted': rf_final_pred,
    'Poisson_Error': y_test_final.values - poisson_final_pred,
    'RF_Error': y_test_final.values - rf_final_pred
})
predictions_df.to_csv('../OUTPUT/final_predictions.csv', index=False)
print("Saved: final_predictions.csv")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)
print(f"\nAll outputs saved to ../OUTPUT/ directory")
print(f"Review the visualizations and metrics to assess model performance.")
print(f"\nNext steps:")
print(f"1. Review OUTPUT folder for all generated figures and tables")
print(f"2. Update README.md with reproduction instructions")
print(f"3. Prepare presentation materials using these results")
print("="*80)