In [1]:
# --- Path plumbing (point to src) + autoreload ---
from pathlib import Path
import sys
import pandas as pd

%load_ext autoreload
%autoreload 2

NB   = Path.cwd()
ROOT = NB.parent
SRC  = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.append(str(SRC))

from data_pipeline.config import ROOT as ROOT_CFG, PROCESSED_DIR, CLEANED
from data_pipeline.loaders import load_vol_surface_wrds, load_forward_prices_wrds, load_option_volume_wrds
print("CWD:", NB)
print("SRC:", SRC)
print("ROOT:", ROOT_CFG)
print("PROC:", PROCESSED_DIR)
print("CLEANED:", CLEANED)

CWD: /Users/ya/Desktop/deep-hedging-rl/notebooks
SRC: /Users/ya/Desktop/deep-hedging-rl/src
ROOT: /Users/ya/Desktop/deep-hedging-rl
PROC: /Users/ya/Desktop/deep-hedging-rl/data/processed
CLEANED: /Users/ya/Desktop/deep-hedging-rl/data/processed/cleaned


## Importing and checking the data

In [2]:
fp_market_spx = CLEANED / "market_plus_panel_spx.parquet"
market_spx = pd.read_parquet(fp_market_spx)
display(market_spx.head())

Unnamed: 0_level_0,close_gspc,vix,rate_10y,hvol_10d,hvol_14d,hvol_30d,hvol_60d,hvol_91d,hvol_122d,hvol_152d,...,hvol_365d,hvol_547d,hvol_730d,hvol_1825d,fwd_front,index,high_gspc,low_gspc,open_gspc,rv_21d
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-01-04,617.700012,13.78,5.65,0.066934,0.06226,0.099637,0.091679,0.088536,0.082083,0.07694,...,0.078532,0.08396,0.089418,0.102954,619.154402,11573,624.48999,613.960022,621.320007,0.125956
1996-01-04,617.700012,13.78,5.65,0.066934,0.06226,0.099637,0.091679,0.088536,0.082083,0.07694,...,0.078532,0.08396,0.089418,0.102954,620.656417,11573,624.48999,613.960022,621.320007,0.125956
1996-01-04,617.700012,13.78,5.65,0.066934,0.06226,0.099637,0.091679,0.088536,0.082083,0.07694,...,0.078532,0.08396,0.089418,0.102954,621.805572,11573,624.48999,613.960022,621.320007,0.125956
1996-01-04,617.700012,13.78,5.65,0.066934,0.06226,0.099637,0.091679,0.088536,0.082083,0.07694,...,0.078532,0.08396,0.089418,0.102954,626.574252,11573,624.48999,613.960022,621.320007,0.125956
1996-01-04,617.700012,13.78,5.65,0.066934,0.06226,0.099637,0.091679,0.088536,0.082083,0.07694,...,0.078532,0.08396,0.089418,0.102954,630.078895,11573,624.48999,613.960022,621.320007,0.125956


In [3]:
print(market_spx.isna().sum())

close_gspc    0
vix           0
rate_10y      0
hvol_10d      0
hvol_14d      0
hvol_30d      0
hvol_60d      0
hvol_91d      0
hvol_122d     0
hvol_152d     0
hvol_182d     0
hvol_273d     0
hvol_365d     0
hvol_547d     0
hvol_730d     0
hvol_1825d    0
fwd_front     0
index         0
high_gspc     0
low_gspc      0
open_gspc     0
rv_21d        0
dtype: int64


In [4]:
fp_market_spy = CLEANED / "market_plus_panel_spy.parquet"
market_spy = pd.read_parquet(fp_market_spy)
display(market_spy.head())

Unnamed: 0_level_0,close_spy,vix,rate_10y,hvol_10d,hvol_14d,hvol_30d,hvol_60d,hvol_91d,hvol_122d,hvol_152d,...,hvol_273d,hvol_365d,hvol_547d,hvol_730d,hvol_1825d,fwd_front,high_spy,low_spy,open_spy,rv_21d
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-21,79.452278,14.36,4.16,0.125623,0.118257,0.097512,0.089121,0.09803,0.10453,0.099892,...,0.108384,0.112556,0.112634,0.137524,0.203975,116.787588,80.282316,79.363833,80.139442,0.094577
2005-01-21,79.452278,14.36,4.16,0.125623,0.118257,0.097512,0.089121,0.09803,0.10453,0.099892,...,0.108384,0.112556,0.112634,0.137524,0.203975,117.001989,80.282316,79.363833,80.139442,0.094577
2005-01-21,79.452278,14.36,4.16,0.125623,0.118257,0.097512,0.089121,0.09803,0.10453,0.099892,...,0.108384,0.112556,0.112634,0.137524,0.203975,116.70621,80.282316,79.363833,80.139442,0.094577
2005-01-21,79.452278,14.36,4.16,0.125623,0.118257,0.097512,0.089121,0.09803,0.10453,0.099892,...,0.108384,0.112556,0.112634,0.137524,0.203975,117.040972,80.282316,79.363833,80.139442,0.094577
2005-01-21,79.452278,14.36,4.16,0.125623,0.118257,0.097512,0.089121,0.09803,0.10453,0.099892,...,0.108384,0.112556,0.112634,0.137524,0.203975,117.458144,80.282316,79.363833,80.139442,0.094577


In [5]:
print(market_spy.isna().sum())

close_spy     0
vix           0
rate_10y      0
hvol_10d      0
hvol_14d      0
hvol_30d      0
hvol_60d      0
hvol_91d      0
hvol_122d     0
hvol_152d     0
hvol_182d     0
hvol_273d     0
hvol_365d     0
hvol_547d     0
hvol_730d     0
hvol_1825d    0
fwd_front     0
high_spy      0
low_spy       0
open_spy      0
rv_21d        0
dtype: int64


In [6]:
import pyarrow.dataset as ds, pandas as pd, numpy as np
from pathlib import Path
from data_pipeline.config import ROOT

D = ds.dataset(ROOT / "data/processed/cleaned/SPX", format="parquet")
df = D.to_table(columns=["date","tenor_d","put_call","delta"]).to_pandas()

print("put_call uniques:", sorted(pd.Series(df["put_call"]).dropna().unique().tolist()))
print(df["delta"].describe())   # check if around ±1 or ±100

# candidates near 30D/25Δ with current tolerances
x = df.copy()
x = x[x["tenor_d"].sub(30).abs() <= 15]
x["abs_delta"] = x["delta"].abs()
cand = x[x["abs_delta"].sub(0.25).abs() <= 0.05]
print("rows near 30D & 25Δ:", len(cand), "| dates covered:", cand["date"].nunique())


put_call uniques: ['C', 'P']
count    2.950670e+07
mean     1.373999e-01
std      5.965577e-01
min     -9.999990e-01
25%     -1.969120e-01
50%     -1.986000e-03
75%      7.594440e-01
max      1.000000e+00
Name: delta, dtype: float64
rows near 30D & 25Δ: 192720 | dates covered: 5291


## Simulator

In [7]:
from pathlib import Path
import pyarrow.dataset as ds
import pandas as pd
import numpy as np
import shutil

# --- streaming ATM builder ---

def _pick_best_per_date(df, target_dte, delta_target, dte_tol, delta_tol):
    # df needs: date, tenor_d, delta, bid, ask, iv, put_call
    x = df.copy()
    x = x[(x["tenor_d"].sub(target_dte).abs() <= dte_tol)]
    x["abs_delta"] = x["delta"].abs()
    x = x[(x["abs_delta"].sub(delta_target).abs() <= delta_tol)]
    if x.empty:
        return x[["date","iv"]].iloc[0:0]

    x["dte_diff"] = (x["tenor_d"] - target_dte).abs()
    x["atm_diff"] = (x["abs_delta"] - delta_target).abs()
    x["spread"]   = (x["ask"] - x["bid"]).astype("float32")

    y = (x.sort_values(["date","dte_diff","atm_diff","spread"])
           .drop_duplicates("date", keep="first"))
    return y[["date","iv","dte_diff","atm_diff","spread"]]

def build_iv_atm_streaming(clean_dir: Path, target_dte: int, out_name: str,
                           delta_target=0.50, dte_tol=15, delta_tol=0.15) -> pd.DataFrame:
    tmp = clean_dir.parent / f"__tmp_{out_name}"
    tmp.mkdir(parents=True, exist_ok=True)

    # MAP: per part → ≤1 row per date
    for i, part in enumerate(sorted(clean_dir.glob("part_*.parquet")), 1):
        dset = ds.dataset(part, format="parquet")
        need = ["date","tenor_d","delta","bid","ask","iv","put_call"]
        cols = [c for c in need if c in dset.schema.names]
        # minimal column presence check
        assert {"date","tenor_d","delta","bid","ask","iv"}.issubset(cols), f"Missing cols in {part}: {cols}"

        tbl = dset.to_table(columns=cols)  # loads one part only (bounded)
        df  = tbl.to_pandas()

        cand = _pick_best_per_date(df, target_dte, delta_target, dte_tol, delta_tol)
        if len(cand):
            cand.to_parquet(tmp / f"cand_{i:04d}.parquet", index=False)

    # REDUCE: across parts
    if not any(tmp.glob("cand_*.parquet")):
        shutil.rmtree(tmp, ignore_errors=True)
        return pd.DataFrame(columns=["date", out_name])

    allc = ds.dataset(tmp, format="parquet").to_table().to_pandas()
    allc = (allc.sort_values(["date","dte_diff","atm_diff","spread"])
                 .drop_duplicates("date", keep="first"))
    out = (allc[["date","iv"]]
           .rename(columns={"iv": out_name})
           .sort_values("date")
           .reset_index(drop=True))

    shutil.rmtree(tmp, ignore_errors=True)
    return out


In [8]:
# --- build features ---

from data_pipeline.config import ROOT
SPX_CLEAN = ROOT / "data/processed/cleaned/SPX"

iv30_spx = build_iv_atm_streaming(SPX_CLEAN, 30, "iv_atm_30d_spx")
iv91_spx = build_iv_atm_streaming(SPX_CLEAN, 91, "iv_atm_91d_spx")
fe_spx = iv30_spx.merge(iv91_spx, on="date", how="outer")
fe_spx["iv_ts_slope_spx"] = fe_spx["iv_atm_91d_spx"] - fe_spx["iv_atm_30d_spx"]


In [9]:
# --- merge with your OHLC/volume panel ---

# Use the correct DF name you already have:
mp = market_spy.reset_index().rename(columns={"index":"date"})
mp["date"] = pd.to_datetime(mp["date"]).dt.normalize()

panel = mp.merge(fe_spx, on="date", how="left").sort_values("date")

# safe ffill only for existing columns
ff_cols = [c for c in ["iv_atm_30d_spx","iv_atm_91d_spx","iv_ts_slope_spx"] if c in panel.columns]
panel[ff_cols] = panel[ff_cols].ffill(limit=2)

# sanity: returns not all zero
assert panel["close_spy"].pct_change().abs().sum() > 0

In [11]:
# ensure clean daily ordering
panel = panel.sort_values("date").drop_duplicates("date").set_index("date")

# forward return used by the env
panel["ret_fwd"] = panel["close_spy"].pct_change().shift(-1)

# drop the last row (no next-close to realize)
panel = panel.iloc[:-1]


In [13]:
# --- env & baseline rollout (no double work) ---

from simulator.env import HedgingEnv
from simulator.rewards import pnl_only
from simulator.baselines import no_hedge_policy

state_cols = ["iv_atm_30d_spx","iv_atm_91d_spx","iv_ts_slope_spx","vix","rate_10y","rv_21d"]
state_cols = [c for c in state_cols if c in panel.columns]  # guard

# optional train split scaler
split = panel.index < pd.Timestamp("2020-01-01")
mu    = panel.loc[split, state_cols].astype("float32").mean(0).values
sigma = panel.loc[split, state_cols].astype("float32").std(0).clip(1e-6).values
def zscore_window(w): return (w - mu) / sigma

env = HedgingEnv(
    df=panel,
    features=state_cols,
    reward_fn=pnl_only,
    window=60,
    txn_cost_bps=1.0,
    pos_limit=1.0,
    scaler=zscore_window,
    rng_seed=0,
)

result = env.rollout(no_hedge_policy())
result


{'rewards': array([ 0.,  0.,  0.,  0.,  0., -0.,  0., -0., -0.,  0.,  0.,  0., -0.,
         0.,  0.,  0.,  0., -0., -0.,  0.,  0., -0.,  0.,  0.,  0.,  0.,
         0., -0., -0.,  0.,  0., -0.,  0.,  0., -0., -0.,  0., -0., -0.,
        -0., -0., -0.,  0., -0.,  0.,  0.,  0.,  0.,  0., -0., -0., -0.,
         0., -0., -0., -0., -0., -0., -0.,  0., -0., -0.,  0., -0.,  0.,
        -0., -0.,  0., -0., -0.,  0.,  0., -0., -0.,  0.,  0., -0., -0.,
         0., -0., -0.,  0., -0.,  0.,  0.,  0.,  0.,  0., -0.,  0.,  0.,
        -0., -0.,  0.,  0.,  0., -0., -0.,  0.,  0.,  0., -0., -0.,  0.,
         0.,  0.,  0.,  0., -0.,  0.,  0.,  0., -0., -0., -0.,  0., -0.,
         0.,  0., -0., -0.,  0.,  0.,  0.,  0.,  0.,  0., -0.,  0.,  0.,
         0.,  0.,  0., -0.,  0., -0., -0.,  0., -0.,  0., -0., -0.,  0.,
         0., -0., -0.,  0., -0.,  0.,  0., -0., -0., -0.,  0.,  0.,  0.,
        -0.,  0., -0.,  0., -0., -0., -0.,  0., -0.,  0., -0.,  0., -0.,
         0.,  0.,  0.,  0., -0.,  0., -0

In [25]:
# overlay: use the hedge-leg PnL computed by the env
def overlay_pnl(pnl, info):
    # info has: 'ret' (ret_fwd), 'pos' (current hedge), 'cost', 'nav', etc.
    return float(pnl)

# residual: long 1x SPY hedged by h_t (pos); deduct cost once
def residual_pnl(_pnl, info):
    r    = float(info["ret"])   # forward return
    h    = float(info["pos"])   # hedge ratio after action
    cost = float(info["cost"])
    return (1.0 - h) * r - cost


In [26]:
env_overlay = HedgingEnv(df=panel, features=state_cols, reward_fn=overlay_pnl,
                         window=60, txn_cost_bps=1.0, pos_limit=1.0, scaler=zscore_window, rng_seed=0)

env_resid = HedgingEnv(df=panel, features=state_cols, reward_fn=residual_pnl,
                       window=60, txn_cost_bps=1.0, pos_limit=1.0, scaler=zscore_window, rng_seed=0)


In [31]:
from simulator.baselines import momentum_policy, volatility_targeting, delta_hedge_policy

def ann_sharpe(r, freq=252): 
    mu, sd = r.mean(), r.std(ddof=1)
    return (mu * freq) / (sd * np.sqrt(freq)) if sd > 0 else 0.0

policy = momentum_policy(feature_idx=0, k=1.0)

# Example after rollout:
res_overlay = env_overlay.rollout(policy)   # returns dict with 'rewards','nav','positions','rets','costs'
res_resid   = env_resid.rollout(policy)

# Overlay metrics
overlay_r = pd.Series(res_overlay['rewards'])
print("Overlay Sharpe:", ann_sharpe(overlay_r))

n = len(res_resid['rewards'])
r_unhedged = panel['ret_fwd'].iloc[:n].astype(float).to_numpy()
r_resid    = np.asarray(res_resid['rewards'], dtype=float)

mask = np.isfinite(r_unhedged) & np.isfinite(r_resid)
diff = r_resid[mask] - r_unhedged[mask]
TE   = diff.std(ddof=1) * np.sqrt(252)

# Residual metrics
unhedged_r = pd.Series(panel['ret_fwd'].iloc[:len(res_resid['rewards'])])
resid_r    = pd.Series(res_resid['rewards'])
print("Residual TE (annualized):", TE)
print("Residual Sharpe:", ann_sharpe(resid_r))


Overlay Sharpe: -0.4622416406316541
Residual TE (annualized): 0.6848671321047259
Residual Sharpe: 1.499590221220459


In [32]:
policy = volatility_targeting(feature_idx=0, ann_vol_target=0.15)

res_overlay = env_overlay.rollout(policy)
res_resid   = env_resid.rollout(policy)

In [33]:
import numpy as np, pandas as pd

overlay_r = pd.Series(res_overlay['rewards'])
resid_r   = pd.Series(res_resid['rewards'])

print("Overlay Sharpe:", overlay_r.mean()/overlay_r.std() * np.sqrt(252))
print("Residual Sharpe:", resid_r.mean()/resid_r.std() * np.sqrt(252))


Overlay Sharpe: 1.4762078728637804
Residual Sharpe: 0.8222117909828652


In [34]:
n = len(res_resid['rewards'])
r_unhedged = panel['ret_fwd'].iloc[:n].to_numpy()
diff = np.asarray(res_resid['rewards']) - r_unhedged
TE = diff.std(ddof=1) * np.sqrt(252)
print("Residual TE:", TE)


Residual TE: 0.4399224903377772
