# 02 — Exogenous Feature Ablation: BTC-only vs BTC+Exogenous

Compare two runs: BTC-only vs BTC plus a synthetic gold series (replace with real exogenous CSVs).

In [None]:
import os, sys, yaml, pandas as pd, numpy as np
from pathlib import Path
PROJECT_ROOT = Path.cwd() / 'btc_return_forecasting_full'
sys.path.insert(0, str(PROJECT_ROOT))
from src.pipeline import run_pipeline


In [None]:
base_cfg_path = PROJECT_ROOT / 'configs' / 'example_daily.yaml'
with open(base_cfg_path, 'r') as f:
    base_cfg = yaml.safe_load(f)

# Ensure BTC CSV exists
csv_path = PROJECT_ROOT / base_cfg['data']['main_csv']
csv_path.parent.mkdir(parents=True, exist_ok=True)
if not csv_path.exists():
    dates = pd.date_range('2022-01-01', periods=800, freq='D', tz='UTC')
    price = 40000 + np.cumsum(np.random.randn(len(dates)) * 150)
    pd.DataFrame({'timestamp': dates.tz_convert('UTC').strftime('%Y-%m-%d'), 'price': price}).to_csv(csv_path, index=False)


In [None]:
# BTC-only
btc_only = dict(base_cfg)
btc_only['run']['run_name'] = 'nb_btc_only'
btc_only['exogenous'] = {'enabled': False, 'assets': []}
btc_only['tuning']['n_candidates'] = 5

# BTC + synthetic gold
gold_csv = PROJECT_ROOT / 'data' / 'gold_demo.csv'
if not gold_csv.exists():
    dates = pd.date_range('2022-01-01', periods=800, freq='D', tz='UTC')
    gold = 1800 + np.cumsum(np.random.randn(len(dates)) * 2.0)
    pd.DataFrame({'timestamp': dates.tz_convert('UTC').strftime('%Y-%m-%d'), 'price': gold}).to_csv(gold_csv, index=False)

btc_exo = dict(base_cfg)
btc_exo['run']['run_name'] = 'nb_btc_plus_exogenous'
btc_exo['exogenous'] = {'enabled': True, 'assets': [{'name':'gold','csv':str(gold_csv),'price_col':'price'}]}
btc_exo['tuning']['n_candidates'] = 5


## Run both experiments

In [None]:
out1, lb1 = run_pipeline(btc_only)
out2, lb2 = run_pipeline(btc_exo)
print('BTC-only:', out1)
print('BTC+exo:', out2)
print(lb1.sort_values('r2', ascending=False).head())
print(lb2.sort_values('r2', ascending=False).head())

## Compare R²

In [None]:
cmp = (lb1.merge(lb2, on='model', suffixes=('_btc_only','_btc_exo'))
        [['model','r2_btc_only','r2_btc_exo']]
        .sort_values('r2_btc_exo', ascending=False))
cmp.head(10)

### ΔR² bar chart

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

delta = cmp.copy()
delta['delta_r2'] = delta['r2_btc_exo'] - delta['r2_btc_only']
plt.figure()
plt.bar(delta['model'], delta['delta_r2'])
plt.title('ΔR² from adding exogenous features (higher is better)')
plt.xlabel('Model')
plt.ylabel('ΔR²')
plt.tight_layout()
plt.show()