# 01 Evaluation

This notebook is the final evaluation entrypoint for the BTCUSDT regime multi-strategy project.

It loads outputs from the pipeline and provides a compact review of:
- backtest metrics vs buy-and-hold baseline
- regime/strategy/cost diagnostics
- walk-forward summary
- stress test summary


In [None]:
from pathlib import Path
import json
import pandas as pd

NOTEBOOK_DIR = Path.cwd()
REPO = (NOTEBOOK_DIR / '..').resolve() if (NOTEBOOK_DIR / '..' / 'configs').exists() else NOTEBOOK_DIR
METRICS_DIR = REPO / 'results' / 'metrics'
REGIME_PLOT = REPO / 'results' / 'regime' / 'plots' / 'BTCUSDT_price_regime_1h.png'

print('repo:', REPO)
print('metrics_dir:', METRICS_DIR)


In [None]:
def read_json(path: Path):
    if not path.exists():
        print(f'missing: {path}')
        return {}
    with path.open('r', encoding='utf-8') as f:
        return json.load(f)

backtest_metrics = read_json(METRICS_DIR / 'BTCUSDT_backtest_metrics.json')
walk_forward = read_json(METRICS_DIR / 'BTCUSDT_walk_forward_summary.json')
stress = read_json(METRICS_DIR / 'BTCUSDT_backtest_stress_summary.json')
diag = read_json(METRICS_DIR / 'BTCUSDT_backtest_diag_summary.json')


## Backtest vs Buy-and-Hold

In [None]:
if backtest_metrics:
    keep = [
        'total_return', 'annual_return', 'sharpe', 'max_drawdown',
        'bh_total_return', 'bh_sharpe', 'bh_max_drawdown',
        'excess_total_return', 'excess_sharpe', 'outperform_buy_hold'
    ]
    table = {k: backtest_metrics.get(k) for k in keep}
    display(pd.DataFrame([table]).T.rename(columns={0: 'value'}))
else:
    print('backtest metrics not found')


## Diagnostics Snapshot

In [None]:
if diag:
    display(pd.DataFrame([
        {
            'rows_joined': diag.get('rows_joined'),
            'bars_per_year': diag.get('bars_per_year'),
            'position_lag_bars': diag.get('position_lag_bars'),
            'default_regime': diag.get('default_regime'),
        }
    ]))
else:
    print('diagnostics summary not found')


## Walk-Forward Summary

In [None]:
if walk_forward:
    oos = walk_forward.get('oos_metrics', {})
    out = {
        'selection_metric': walk_forward.get('selection_metric'),
        'fold_count': walk_forward.get('fold_count'),
        'candidate_count': walk_forward.get('candidate_count'),
        'oos_total_return': oos.get('total_return'),
        'oos_sharpe': oos.get('sharpe'),
        'oos_max_drawdown': oos.get('max_drawdown'),
        'oos_bh_total_return': oos.get('bh_total_return'),
    }
    display(pd.DataFrame([out]))
else:
    print('walk-forward summary not found')


## Stress Test Summary

In [None]:
if stress:
    baseline = stress.get('baseline', {}).get('metrics', {})
    extremes = stress.get('extremes', {})
    out = {
        'scenario_count': stress.get('scenario_count'),
        'baseline_total_return': baseline.get('total_return'),
        'baseline_sharpe': baseline.get('sharpe'),
        'baseline_max_drawdown': baseline.get('max_drawdown'),
        'baseline_bh_total_return': baseline.get('bh_total_return'),
        'worst_total_return_scenario': extremes.get('worst_total_return_scenario'),
        'worst_drawdown_scenario': extremes.get('worst_drawdown_scenario'),
        'best_sharpe_scenario': extremes.get('best_sharpe_scenario'),
    }
    display(pd.DataFrame([out]))
else:
    print('stress summary not found')


## Optional: Regime Plot

In [None]:
if REGIME_PLOT.exists():
    try:
        from IPython.display import Image, display
        display(Image(filename=str(REGIME_PLOT)))
    except Exception as e:
        print('could not render image:', e)
else:
    print('regime plot not found:', REGIME_PLOT)
