# Export final model bundle
Load the winning `best_overall_model` artifacts and export a standardized bundle to `final_model/` containing hyperparameters (`model_config.json`) and metrics (`benchmark_results.json`).

In [4]:
from pathlib import Path
import json
from datetime import datetime

# Locate repo root by searching for artifacts folder (same pattern as other notebooks)
_start = Path.cwd().resolve()
_candidates = [_start] + list(_start.parents)
REPO_ROOT = None
for p in _candidates:
    if (p / 'notebooks/model_evaluation_final/artifacts/best_overall_model').exists():
        REPO_ROOT = p
        break
if REPO_ROOT is None:
    raise FileNotFoundError('Cannot locate repo root with best_overall_model artifacts.')

ARTIFACTS_DIR = REPO_ROOT / 'notebooks/model_evaluation_final/artifacts'
BEST_OVERALL_DIR = ARTIFACTS_DIR / 'best_overall_model'
FINAL_DIR = REPO_ROOT / 'final_model'
FINAL_DIR.mkdir(exist_ok=True, parents=True)
ARTIFACTS_DIR, BEST_OVERALL_DIR, FINAL_DIR

(PosixPath('/Users/aayushrijal/Documents/GitHub/volatility_forecast/notebooks/model_evaluation_final/artifacts'),
 PosixPath('/Users/aayushrijal/Documents/GitHub/volatility_forecast/notebooks/model_evaluation_final/artifacts/best_overall_model'),
 PosixPath('/Users/aayushrijal/Documents/GitHub/volatility_forecast/final_model'))

In [5]:
# Load best_overall metrics and hyperparams
with open(BEST_OVERALL_DIR / 'metrics.json') as f:
    best_metrics = json.load(f)
with open(BEST_OVERALL_DIR / 'hyperparams.json') as f:
    best_hyper = json.load(f)

best_overall_block = best_metrics.get('best_overall')
if best_overall_block is None:
    raise ValueError('best_overall missing in metrics.json')
best_hyper_block = best_hyper.get('best_overall')
timestamp = datetime.utcnow().isoformat() + 'Z'
best_overall_block, best_hyper_block

({'type': 'individual',
  'name': 'xgb_tuned',
  'source': 'xgboost/xgb_tuned',
  'train': {'rmse': 0.008045408197754638,
   'mae': 0.005604441560017746,
   'r2': 0.7170392862840006},
  'val': {'rmse': 0.00907279671175083,
   'mae': 0.00558345122421248,
   'r2': 0.453820241414309},
  'test': {'rmse': 0.004869681452443808,
   'mae': 0.0041195188750506005,
   'r2': -0.7500920503923685}},
 {'type': 'individual',
  'hyperparams': {'best_model_name': 'xgb_tuned',
   'best_params': {'colsample_bytree': 0.7,
    'gamma': 0,
    'learning_rate': 0.03,
    'max_depth': 3,
    'min_child_weight': 4,
    'n_estimators': 300,
    'reg_lambda': 1.5,
    'subsample': 0.8}}})

In [6]:
# Build outputs

def prune_keys(obj, drop_keys=None):
    if drop_keys is None:
        drop_keys = {'type', 'source', 'name'}
    if isinstance(obj, dict):
        return {k: prune_keys(v, drop_keys) for k, v in obj.items() if k not in drop_keys}
    if isinstance(obj, list):
        return [prune_keys(v, drop_keys) for v in obj]
    return obj


def extract_metrics(block):
    if block is None:
        return None
    metrics = {}
    for split in ['train', 'val', 'test']:
        if isinstance(block.get(split), dict):
            metrics[split] = block[split]
    components = block.get('components')
    if isinstance(components, dict):
        metrics['components'] = {
            name: extract_metrics(comp) for name, comp in components.items() if comp is not None
        }
    return metrics


def normalize_hyperparams(block):
    if block is None:
        return {}
    cleaned = prune_keys(block)
    if isinstance(cleaned, dict) and isinstance(cleaned.get('hyperparams'), dict):
        hp = dict(cleaned['hyperparams'])
        if isinstance(hp.get('best_params'), dict):
            hp = hp['best_params']
        hp.pop('best_model_name', None)
        return hp
    return cleaned


model_name = (
    (best_overall_block or {}).get('name')
    or ((best_hyper_block or {}).get('hyperparams') or {}).get('best_model_name')
    or 'best_overall'
)
benchmark_metrics = extract_metrics(best_overall_block)
normalized_hyperparams = normalize_hyperparams(best_hyper_block)

model_config = {
    'timestamp': timestamp,
    'hyperparams': {
        'model_name': model_name,
        'hyperparams': normalized_hyperparams
    }
}

benchmark_results = {
    'timestamp': timestamp,
    'model_name': model_name,
    'metrics': benchmark_metrics
}

# Save
with open(FINAL_DIR / 'model_config.json', 'w') as f:
    json.dump(model_config, f, indent=2)
with open(FINAL_DIR / 'benchmark_results.json', 'w') as f:
    json.dump(benchmark_results, f, indent=2)

print('Saved model_config.json and benchmark_results.json to', FINAL_DIR)
(model_config, benchmark_results)

Saved model_config.json and benchmark_results.json to /Users/aayushrijal/Documents/GitHub/volatility_forecast/final_model


({'timestamp': '2026-02-03T23:19:53.919523Z',
  'hyperparams': {'model_name': 'xgb_tuned',
   'hyperparams': {'colsample_bytree': 0.7,
    'gamma': 0,
    'learning_rate': 0.03,
    'max_depth': 3,
    'min_child_weight': 4,
    'n_estimators': 300,
    'reg_lambda': 1.5,
    'subsample': 0.8}}},
 {'timestamp': '2026-02-03T23:19:53.919523Z',
  'model_name': 'xgb_tuned',
  'metrics': {'train': {'rmse': 0.008045408197754638,
    'mae': 0.005604441560017746,
    'r2': 0.7170392862840006},
   'val': {'rmse': 0.00907279671175083,
    'mae': 0.00558345122421248,
    'r2': 0.453820241414309},
   'test': {'rmse': 0.004869681452443808,
    'mae': 0.0041195188750506005,
    'r2': -0.7500920503923685}}})