# Phase 1 Validation Harness (T26/T27)
This notebook exercises Phase 1 validations for deterministic SIM (T26) and paper safety rails (T27).

In [None]:
import json
import os
import sys
from pathlib import Path

from research.utils import (
    discover_run_artifacts,
    hash_json_normalized,
    hash_jsonl_normalized,
    run_cmd,
)

PROJECT_ROOT = Path('.').resolve()
DATA_ROOT = Path(os.environ.get('QUANTO_DATA_ROOT', PROJECT_ROOT / '.quanto_data')).resolve()
PROMOTION_DIR = DATA_ROOT / 'promotions' / 'candidate'


def _load_candidate_id() -> str:
    override = os.environ.get('NOTEBOOK_CANDIDATE_ID')
    if override:
        return override.strip()
    promos = sorted(PROMOTION_DIR.glob('*.json'))
    if not promos:
        raise RuntimeError(f'No candidate promotions found in {PROMOTION_DIR}')
    payload = json.loads(promos[0].read_text(encoding='utf-8'))
    experiment_id = payload.get('experiment_id')
    if not experiment_id:
        raise RuntimeError(f'Promotion record {promos[0]} missing experiment_id')
    return experiment_id


CANDIDATE_ID = _load_candidate_id()
BASELINE_ID = CANDIDATE_ID
SPEC_PATH = DATA_ROOT / 'experiments' / CANDIDATE_ID / 'spec' / 'experiment_spec.json'
SPEC_PAYLOAD = json.loads(SPEC_PATH.read_text(encoding='utf-8'))
START_DATE = SPEC_PAYLOAD.get('start_date')
END_DATE = SPEC_PAYLOAD.get('end_date')
SYMBOLS = SPEC_PAYLOAD.get('symbols') or []
print(f'Candidate: {CANDIDATE_ID}')
print(f'Window: {START_DATE} -> {END_DATE}')


In [None]:
import shutil


def run_shadow_once(output_dir: Path, *, max_steps: int | None = None, resume: bool = False, reset: bool = False):
    output_dir = Path(output_dir)
    output_dir.parent.mkdir(parents=True, exist_ok=True)
    cmd = [
        sys.executable,
        'scripts/run_shadow.py',
        '--experiment-id', CANDIDATE_ID,
        '--replay',
        '--start-date', START_DATE,
        '--end-date', END_DATE,
        '--output-dir', str(output_dir),
    ]
    if max_steps is not None:
        cmd += ['--max-steps', str(max_steps)]
    if resume:
        cmd.append('--resume')
    if reset:
        cmd.append('--reset')
    result = run_cmd(cmd, cwd=PROJECT_ROOT, check=False)
    if result.stdout:
        print(result.stdout.strip())
    if result.stderr:
        print(result.stderr.strip())
    return result


def gather_hashes(run_dir: Path) -> dict[str, str]:
    artifacts = discover_run_artifacts(Path(run_dir))
    hashes: dict[str, str] = {}
    hashes['steps'] = hash_jsonl_normalized(artifacts['steps'])
    if 'metrics' in artifacts:
        hashes['metrics'] = hash_json_normalized(artifacts['metrics'])
    if 'execution_metrics' in artifacts:
        hashes['execution_metrics'] = hash_json_normalized(artifacts['execution_metrics'])
    print(f"Hashes for {run_dir} -> {hashes}")
    return hashes


In [None]:
for script in ('scripts/run_shadow.py', 'scripts/run_paper.py', 'scripts/qualify_experiment.py'):
    print(f'=== {script} --help ===')
    result = run_cmd([sys.executable, script, '-h'], cwd=PROJECT_ROOT, check=False)
    print(result.stdout)


In [None]:
sim_root = DATA_ROOT / 'shadow' / CANDIDATE_ID
run_one = sim_root / 'notebook_sim_run1'
run_two = sim_root / 'notebook_sim_run2'
run_shadow_once(run_one, reset=True)
run_shadow_once(run_two, reset=True)
hashes_one = gather_hashes(run_one)
hashes_two = gather_hashes(run_two)
assert hashes_one['steps'] == hashes_two['steps'], 'Twin-run determinism violated'


In [None]:
resume_run = sim_root / 'notebook_sim_resume'
reference_run = sim_root / 'notebook_sim_reference_full'
partial_steps = 2
run_shadow_once(resume_run, max_steps=partial_steps, reset=True)
run_shadow_once(resume_run, resume=True)
run_shadow_once(reference_run, reset=True)
hash_resume = gather_hashes(resume_run)
hash_reference = gather_hashes(reference_run)
assert hash_resume['steps'] == hash_reference['steps'], 'Resume hashes mismatch'


In [None]:
paper_root = DATA_ROOT / 'paper' / CANDIDATE_ID
paper_root.mkdir(parents=True, exist_ok=True)
failfast_config = paper_root / 'notebook_failfast.json'
universe = SYMBOLS[:2] if len(SYMBOLS) >= 2 else ['AAPL', 'MSFT']
base_payload = {
    'experiment_id': CANDIDATE_ID,
    'execution_mode': 'alpaca_paper',
    'universe': universe,
    'broker': {'alpaca_base_url': 'https://paper-api.alpaca.markets'},
}
failfast_config.write_text(json.dumps(base_payload, indent=2), encoding='utf-8')
env_failfast = dict(os.environ)
env_failfast.pop('ALPACA_API_KEY', None)
env_failfast.pop('ALPACA_SECRET_KEY', None)
result_failfast = run_cmd([sys.executable, 'scripts/run_paper.py', '--config', str(failfast_config)], cwd=PROJECT_ROOT, env=env_failfast, check=False)
print(result_failfast.stdout)
print(result_failfast.stderr)
assert result_failfast.returncode != 0, 'Expected fail-fast without credentials'

live_payload = dict(base_payload)
live_payload['broker'] = {'alpaca_base_url': 'https://api.alpaca.markets'}
live_config = paper_root / 'notebook_live_url.json'
live_config.write_text(json.dumps(live_payload, indent=2), encoding='utf-8')
env_live = dict(os.environ)
env_live['ALPACA_API_KEY'] = env_live.get('ALPACA_API_KEY', 'demo')
env_live['ALPACA_SECRET_KEY'] = env_live.get('ALPACA_SECRET_KEY', 'demo')
result_live = run_cmd([sys.executable, 'scripts/run_paper.py', '--config', str(live_config)], cwd=PROJECT_ROOT, env=env_live, check=False)
print(result_live.stdout)
print(result_live.stderr)
assert result_live.returncode != 0, 'Expected live URL rejection'


In [None]:
registry_root = DATA_ROOT / 'experiments'
qual_cmd = [
    sys.executable,
    'scripts/qualify_experiment.py',
    '--experiment-id', CANDIDATE_ID,
    '--baseline', BASELINE_ID,
    '--registry-root', str(registry_root),
]
qual_result = run_cmd(qual_cmd, cwd=PROJECT_ROOT, check=False)
print(qual_result.stdout)
print(qual_result.stderr)
assert qual_result.returncode == 0
report_path = registry_root / CANDIDATE_ID / 'promotion' / 'qualification_report.json'
print(f'Qualification report: {report_path}')
comp_cmd = [
    sys.executable,
    'scripts/compare_experiments.py',
    '--candidate', CANDIDATE_ID,
    '--baseline', BASELINE_ID,
    '--registry-root', str(registry_root),
]
comp_result = run_cmd(comp_cmd, cwd=PROJECT_ROOT, check=False)
print(comp_result.stdout)
print(comp_result.stderr)
