# 06: Forecast Validation

Rolling-origin cross-validation and grouped (Leave-One-State-Out) CV for forecast accuracy.

In [None]:
import sys
import os

# Set working directory to project root
os.chdir(os.path.dirname(os.path.abspath('__file__')))
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
print(f"Working dir: {os.getcwd()}")

sys.path.insert(0, 'src')

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, LeaveOneGroupOut
import plotly.express as px

from validation_forecast import (
    rolling_origin_cv, 
    grouped_cv_leave_one_state_out,
    calculate_mape
)

Working dir: d:\UIDAI\code


## Load Data

In [None]:
features = pd.read_parquet('data/processed/model_features.parquet')
print(f"Loaded {len(features):,} rows")
print(f"Features: {len([c for c in features.columns if c not in ['state', 'district', 'year', 'week_number']])}")

Loaded 21,337 rows
Features: 19


## Rolling-Origin Cross-Validation

In [None]:
target = features['bio_update_child'].fillna(0)
rolling_results = rolling_origin_cv(features, target, n_splits=5)
print(f"\nMean MAPE: {rolling_results['mean_mape']:.2f}%")

ROLLING-ORIGIN CROSS-VALIDATION
LightGBM not available. Using simple baseline.

Mean MAPE: nan%


## Grouped CV: Leave-One-State-Out

In [None]:
grouped_results = grouped_cv_leave_one_state_out(
    features, target, features['state']
)

# Only plot if we have results
if 'group_results' in grouped_results and not grouped_results['group_results'].empty:
    fig = px.bar(
        grouped_results['group_results'],
        x='state', y='mape',
        title='MAPE by State (Leave-One-Out CV)'
    )
    fig.show()
else:
    print("No grouped results to plot")

## Fold Results Visualization

In [None]:
if 'fold_results' in rolling_results and not rolling_results['fold_results'].empty:
    fig = px.line(
        rolling_results['fold_results'],
        x='fold', y=['mape', 'rmse'],
        title='CV Metrics by Fold'
    )
    fig.show()
else:
    print("No fold results to plot")

## Summary

In [None]:
print("FORECAST VALIDATION SUMMARY")
print("="*40)
print(f"Rolling CV MAPE: {rolling_results['mean_mape']:.2f}%")
print(f"Grouped CV MAPE: {grouped_results['mean_mape']:.2f}%")
print(f"State Disparity: {grouped_results.get('disparity', 0):.2f}%")

if rolling_results['mean_mape'] <= 20:
    print("\n✅ PASS: MAPE ≤ 20%")
else:
    print(f"\n⚠️ MAPE > 20% target")