# WeatherVane Model Validation — Reproducible Runbook

Use this notebook to execute the standardized validation pipeline on cross-validated MMM results and produce audit-ready artifacts.

## Checklist
- Refresh or generate cross-validation results before running.
- Execute the notebook top-to-bottom without skipping cells.
- Attach generated artifacts when submitting validation evidence.

### Prerequisites
1. Python 3.11 environment with WeatherVane dependencies installed.
2. Cross-validation metrics JSON (default: `state/analytics/mmm_training_results_cv.json`).
3. Optional: set the environment variable `VALIDATION_RUN_ID` to control artifact folder naming.

In [None]:
from __future__ import annotations

import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, Any

import numpy as np
import pandas as pd

# Enforce deterministic hash seeds for reproducibility
os.environ.setdefault("PYTHONHASHSEED", "42")

# Seed numpy RNG used for any downstream sampling
np.random.seed(42)

cwd = Path.cwd().resolve()
candidate_roots = [cwd, cwd.parent, cwd.parents[1] if len(cwd.parents) > 1 else cwd]
PROJECT_ROOT: Path | None = None
for candidate in candidate_roots:
    if (candidate / "apps").is_dir() and (candidate / "notebooks").is_dir():
        PROJECT_ROOT = candidate
        break

if PROJECT_ROOT is None:
    raise RuntimeError("Unable to locate project root. Start Jupyter from the repo root or notebooks directory.")

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"Project root: {PROJECT_ROOT}")


In [None]:
from apps.model.mmm_lightweight_weather import load_cv_results_from_json
from apps.model.validate_model_performance import (
    ValidationThresholds,
    export_validation_report,
    generate_validation_report,
    validate_all_models,
)

DEFAULT_CV_PATH = PROJECT_ROOT / "state/analytics/mmm_training_results_cv.json"
CV_RESULTS_PATH = Path(os.environ.get("CV_RESULTS_PATH", str(DEFAULT_CV_PATH))).expanduser().resolve()
RUN_ID = os.environ.get("VALIDATION_RUN_ID") or datetime.utcnow().strftime("%Y%m%d_%H%M%S")
ARTIFACT_DIR = PROJECT_ROOT / "artifacts/validation_runs" / RUN_ID
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Using CV results: {CV_RESULTS_PATH}")
print(f"Artifacts directory: {ARTIFACT_DIR}")

if not CV_RESULTS_PATH.exists():
    raise FileNotFoundError(
        "Cross-validation results were not found. "
        "Generate them with `python scripts/train_mmm_synthetic_cv.py --n-folds 5` before proceeding."
    )


In [None]:
cv_results = load_cv_results_from_json(CV_RESULTS_PATH)

if not cv_results:
    raise ValueError("No cross-validation results loaded; ensure the training pipeline produced data.")

print(f"Loaded cross-validation metrics for {len(cv_results)} tenants.")


In [None]:
thresholds = ValidationThresholds()
validation_results = validate_all_models(cv_results, thresholds)
report = generate_validation_report(validation_results, thresholds)

report_path = ARTIFACT_DIR / "validation_report.json"
export_validation_report(validation_results, report, report_path)

print(f"Validation report saved to {report_path}")
print(
    f"Pass rate: {report['validation_summary']['pass_rate']:.1%} "
    f"({report['validation_summary']['passing_models']}/{report['validation_summary']['total_models']})"
)


In [None]:
records = []
for tenant_name, result in validation_results.items():
    records.append(
        {
            "tenant": tenant_name,
            "mean_r2": float(result.mean_r2),
            "std_r2": float(result.std_r2),
            "mean_rmse": float(result.mean_rmse),
            "passes_all_checks": result.passes_all_checks,
            "failure_reasons": "; ".join(result.failure_reasons) if result.failure_reasons else "",
        }
    )

validation_df = (
    pd.DataFrame.from_records(records)
    .sort_values(by="mean_r2", ascending=False)
    .reset_index(drop=True)
)

validation_df


In [None]:
failing = validation_df[~validation_df["passes_all_checks"]]
if not failing.empty:
    print("Failing tenants:")
    display(failing[["tenant", "mean_r2", "failure_reasons"]])
else:
    print("All tenants pass the validation thresholds.")


In [None]:
summary = report["validation_summary"]
thresholds_used = report["thresholds"]

exit_criteria_met = summary["pass_rate"] >= 0.80
status = "✅" if exit_criteria_met else "⚠️"

print(f"{status} Pass rate {summary['pass_rate']:.1%} (target >= 80%).")
print(
    f"Passing {summary['passing_models']} / {summary['total_models']} models | "
    f"Failing {summary['failing_models']} models."
)

summary_path = ARTIFACT_DIR / "validation_summary.json"
with open(summary_path, "w") as f:
    json.dump({"summary": summary, "thresholds": thresholds_used}, f, indent=2)

print(f"Summary written to {summary_path}")


## Next Steps
- Share `validation_report.json` and `validation_summary.json` under the artifacts folder.
- Investigate failing tenants (if any) using the failure reasons above.
- Update remediation plans or thresholds only with reviewer approval.