# 03 - Model Training & Evaluation
## Fresh Flow Markets - Demand Forecasting

This notebook trains and evaluates demand forecasting models:
- Baseline models (Naive, Moving Average)
- XGBoost
- Prophet
- Ensemble

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from src.data.loader import load_key_tables, load_config
from src.data.cleaner import clean_all
from src.features.builder import build_features
from src.models.trainer import train_all_models, time_series_split
from src.models.evaluator import evaluate_predictions, evaluate_by_store

config = load_config()
tables = load_key_tables(config)
tables = clean_all(tables)
features = build_features(tables, top_n_items=30)
print(f'Feature matrix: {features.shape}')

## Train/Test Split

In [None]:
train, val, test = time_series_split(features)
print(f'Train: {len(train)} rows ({train["date"].min()} to {train["date"].max()})')
print(f'Val:   {len(val)} rows ({val["date"].min()} to {val["date"].max()})')
print(f'Test:  {len(test)} rows ({test["date"].min()} to {test["date"].max()})')

## Train All Models

In [None]:
results = train_all_models(features, config)

# Comparison table
comparison = []
for name, res in results.items():
    m = res['metrics']['overall']
    comparison.append({'Model': name, 'MAE': m['mae'], 'RMSE': m['rmse'], 'MAPE': m['mape']})

comp_df = pd.DataFrame(comparison).sort_values('MAE')
comp_df

## Feature Importance (XGBoost)

In [None]:
if 'xgboost' in results:
    importance = results['xgboost']['model'].get_feature_importance(top_n=20)
    fig = px.bar(importance, x='importance', y='feature', orientation='h',
                 title='XGBoost Feature Importance (Top 20)')
    fig.update_layout(template='plotly_white', yaxis={'categoryorder': 'total ascending'})
    fig.show()

## Forecast Visualization

In [None]:
# Get XGBoost predictions on test set
if 'xgboost' in results:
    model = results['xgboost']['model']
    test_preds = model.predict(test)
    test_with_preds = test.copy()
    test_with_preds['predicted'] = test_preds.values

    # Aggregate daily
    daily_actual = test_with_preds.groupby('date')['quantity_sold'].sum().reset_index()
    daily_pred = test_with_preds.groupby('date')['predicted'].sum().reset_index()
    daily_combined = daily_actual.merge(daily_pred, on='date')

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=daily_combined['date'], y=daily_combined['quantity_sold'],
                             name='Actual', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=daily_combined['date'], y=daily_combined['predicted'],
                             name='Forecast', line=dict(color='red', dash='dash')))
    fig.update_layout(title='Test Period: Actual vs Forecast (Daily Total)',
                      template='plotly_white', xaxis_title='Date', yaxis_title='Total Quantity')
    fig.show()