# KTND-Finance: Full Experiment Pipeline

**Run everything in 2 cells:**
1. **Cell 1** (Setup) - Install deps + clone repo (~2 min)
2. **Cell 2** (Run All) - Downloads data, trains models, runs baselines/robustness/rolling, generates figures (~2-3 hours)

Set runtime to **GPU (T4)** before running: Runtime > Change runtime type > T4 GPU

Then hit **Runtime > Run all** and walk away.

In [None]:
#@title 1. Setup (install + clone + verify) - ~2 min

# Install missing dependencies (torch/numpy/pandas/scipy/sklearn/matplotlib are pre-installed)
!pip install -q yfinance>=1.0.0 hmmlearn>=0.3.0 statsmodels>=0.14.0 arch>=6.0.0 pyyaml>=6.0

# Clone repo
import os, sys
REPO_URL = "https://github.com/keshavkrishnan08/kind_finance.git"
REPO_DIR = "/content/ktnd_finance"

if os.path.exists(REPO_DIR):
    !cd {REPO_DIR} && git pull
else:
    !git clone {REPO_URL} {REPO_DIR}

os.chdir(REPO_DIR)
sys.path.insert(0, REPO_DIR)

# Verify
import torch, numpy as np
from src.model.vampnet import NonEquilibriumVAMPNet
print(f"Python {sys.version.split()[0]} | PyTorch {torch.__version__} | "
      f"CUDA: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print("Setup complete.")

In [None]:
#@title 2. Run ENTIRE experiment pipeline (~2-3 hours) - just run this and walk away

import subprocess, time, json, os, sys

REPO_DIR = "/content/ktnd_finance"
os.chdir(REPO_DIR)
python = sys.executable

def run(name, cmd):
    """Run a stage, print status, return success."""
    print(f"\n{'='*70}")
    print(f"  STAGE: {name}")
    print(f"{'='*70}")
    t0 = time.time()
    result = subprocess.run(cmd, shell=True, cwd=REPO_DIR)
    elapsed = time.time() - t0
    status = 'OK' if result.returncode == 0 else 'FAILED'
    print(f"  >> {name}: {status} ({elapsed/60:.1f} min)")
    return result.returncode == 0

pipeline_start = time.time()
results = {}

# --- Stage 1: Quick tests (skip slow synthetic tests) ---
results['tests'] = run('Quick tests',
    f'{python} -m pytest tests/ -q --tb=short -k "not test_synthetic" 2>&1 | tail -5')

# --- Stage 2: Download data ---
results['download'] = run('Download data',
    f'{python} data/download.py --mode all')

# --- Stage 3: Train univariate (SPY, 1993-2025) ---
results['train_uni'] = run('Train univariate (SPY)',
    f'{python} experiments/run_main.py --config config/univariate.yaml --mode univariate --seed 42')

# --- Stage 4: Train multiasset (11 ETFs, 2007-2025) ---
results['train_multi'] = run('Train multiasset (11 ETFs)',
    f'{python} experiments/run_main.py --config config/multiasset.yaml --mode multiasset --seed 42')

# --- Stage 5: Baselines (HMM, DMD, PCA, VIX threshold) ---
results['baselines'] = run('Baselines',
    f'{python} experiments/run_baselines.py --config config/default.yaml')

# --- Stage 6: Robustness tests (CK, bootstrap, permutation, etc.) ---
results['robustness'] = run('Robustness tests (univariate)',
    f'{python} experiments/run_robustness.py --config config/default.yaml --mode univariate')

# --- Stage 7: Rolling spectral analysis ---
results['rolling'] = run('Rolling spectral analysis',
    f'{python} experiments/run_rolling.py --config config/default.yaml --mode univariate')

# --- Stage 8: Generate figures ---
results['figures'] = run('Generate figures',
    f'{python} experiments/run_figures.py --results-dir outputs/results --figures-dir outputs/figures')

# --- Final report ---
total_min = (time.time() - pipeline_start) / 60
n_ok = sum(results.values())
n_total = len(results)

print(f"\n\n{'='*70}")
print(f"  PIPELINE COMPLETE: {n_ok}/{n_total} stages passed ({total_min:.1f} min total)")
print(f"{'='*70}")
for name, ok in results.items():
    print(f"  {'OK' if ok else 'FAIL':6s}  {name}")

# Print key metrics
rpath = os.path.join(REPO_DIR, 'outputs', 'results', 'analysis_results.json')
if os.path.exists(rpath):
    with open(rpath) as f:
        r = json.load(f)
    print(f"\n  Key Metrics:")
    print(f"    Spectral gap:         {r.get('spectral_gap', 'N/A')}")
    print(f"    Entropy (empirical):  {r.get('entropy_empirical', 'N/A')} "
          f"[{r.get('entropy_ci_lower', '?')}, {r.get('entropy_ci_upper', '?')}] 95% CI")
    print(f"    Mean irreversibility: {r.get('mean_irreversibility', 'N/A')}")
    print(f"    Irrev method:         {r.get('irrev_method', 'N/A')}")
    print(f"    DB violation:         {r.get('detailed_balance_violation', 'N/A')}")
    print(f"    Complex modes:        {r.get('n_complex_modes', 'N/A')}/{r.get('n_modes', 'N/A')}")
    print(f"    FT ratio:             {r.get('fluctuation_theorem_ratio', 'N/A')}")
print(f"{'='*70}")

In [None]:
#@title 3. View figures (run after pipeline finishes)

from IPython.display import Image, display
import glob, os

fig_dir = "/content/ktnd_finance/outputs/figures"
pngs = sorted(glob.glob(f"{fig_dir}/*.png"))
print(f"Generated {len(pngs)} figures:\n")
for p in pngs:
    print(f"--- {os.path.basename(p)} ---")
    display(Image(filename=p, width=800))
    print()

In [None]:
#@title 4. Download all results as zip

!cd /content/ktnd_finance && zip -rq /content/ktnd_results.zip outputs/
from google.colab import files
files.download('/content/ktnd_results.zip')
print("Download started.")

In [None]:
#@title 5. Ablation study (~1-2 hours with 3 seeds) - RECOMMENDED for PRE submission

import subprocess, time, sys

python = sys.executable
print("Running 13 ablation variants x 3 seeds...")
print("This tests sensitivity to: architecture, n_modes, lag, embedding, dropout,")
print("window size, shared weights, loss components, standardization, linear features.\n")

t0 = time.time()
result = subprocess.run(
    f'{python} experiments/run_ablations.py --config config/default.yaml --n-seeds 3 --n-jobs 1',
    shell=True, cwd="/content/ktnd_finance"
)
elapsed = (time.time() - t0) / 60
status = 'OK' if result.returncode == 0 else 'FAILED'
print(f"\nAblations: {status} ({elapsed:.1f} min)")

# Show summary
import pandas as pd, os
summary_path = "/content/ktnd_finance/outputs/results/ablation_summary.csv"
if os.path.exists(summary_path):
    df = pd.read_csv(summary_path)
    print(f"\n{len(df)} ablation variants completed:")
    print(df[['name', 'vamp2_mean', 'spectral_gap_mean', 'entropy_total_mean']].to_string(index=False))
