In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import mean_squared_error

PROJECT_ROOT = Path("..")
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"
ARTIFACTS = PROJECT_ROOT / "artifacts"

Xdf = pd.read_parquet(DATA_PROCESSED / "vinv_inputs_raw.parquet")
ydf = pd.read_parquet(DATA_PROCESSED / "vinv_targets_monthly.parquet")
bmk = pd.read_parquet(DATA_PROCESSED / "vinv_benchmarks_monthly.parquet")

lin = joblib.load(ARTIFACTS / "models" / "linear_model.pkl")
tree = joblib.load(ARTIFACTS / "models" / "tree_model.pkl")

In [None]:
# Join and prep (same keys as modeling notebook)
Xdf["date"] = pd.to_datetime(Xdf["date"])
ydf["date"] = pd.to_datetime(ydf["date"])
bmk["date"] = pd.to_datetime(bmk["date"])

df = Xdf.merge(ydf, on="date", how="inner").merge(bmk, on="date", how="left")

target_col = "target_value_spread_fwd_1m"
feature_cols = [c for c in df.columns if c not in {"date", target_col} and df[c].dtype != "object"]

df = df.dropna(subset=[target_col]).sort_values("date")
X = df[feature_cols].select_dtypes(include=[np.number]).fillna(0.0)
y = df[target_col].astype(float)

pred_lin = lin.predict(X)
pred_tree = tree.predict(X)

out = pd.DataFrame({
    "date": df["date"].values,
    "y_true": y.values,
    "pred_ridge": pred_lin,
    "pred_tree": pred_tree,
})
out.to_csv(ARTIFACTS / "walkforward_results.csv", index=False)

rmse_ridge = mean_squared_error(out["y_true"], out["pred_ridge"], squared=False)
rmse_tree  = mean_squared_error(out["y_true"], out["pred_tree"], squared=False)

print("RMSE ridge:", rmse_ridge)
print("RMSE tree :", rmse_tree)
python
Copy code


In [None]:
# Model card v1 (minimal but credible)
card = f"""# VinV ML Extension — Model Card (v1)

## Purpose
Predict `target_value_spread_fwd_1m` (Value – Growth forward spread) using VinV features and Tier-1 macro controls.

## Data
- Features: `vinv_inputs_raw.parquet` (point-in-time monthly)
- Targets: `vinv_targets_monthly.parquet`
- Benchmarks: `vinv_benchmarks_monthly.parquet`

## Models (Baselines)
- Ridge Regression
- Random Forest Regressor

## Evaluation
- Time-ordered validation (walk-forward style)
- Primary metric: RMSE on full history (Tranche 1 baseline)

## Results
- RMSE (Ridge): {rmse_ridge:.6f}
- RMSE (RF): {rmse_tree:.6f}

## Known Limitations (Tranche 1)
- Benchmark/target definitions may be placeholders until total return series are finalized.
- No transaction cost modeling in Tranche 1.
- Universe governance hardening deferred to Tranche 3.

## Next Steps
- Add portfolio translation + turnover + cost haircuts (Tranche 2)
- Formal universe membership & point-in-time rules (Tranche 3)
- Regime gating + ablation tests (Tranche 4)
"""
(ARTIFACTS / "vinv_model_card_v1.md").write_text(card)
print("Wrote model card.")