# 06_Walk_Forward_Validation

Perform rolling walk-forward train/test splits and validate out-of-sample agent performance.


In [None]:
from src.agents.ppo_agent import PPOAgent
from src.agents.evaluation import evaluate_model
from src.environment.Multi_asset_env import MultiAsset21DeepHedgingEnv
from src.environment.option_pricing import create_synthetic_option_chain
from src.utils.data_utils import download_market_data
from src.config.settings import get_config

import pandas as pd
import matplotlib.pyplot as plt

cfg = get_config('data')
df = download_market_data(**cfg)
option_chain = create_synthetic_option_chain(df, get_config('option'))

strikes = get_config('option')['strike_offsets']
expiries = get_config('option')['expiry_days']
types_ = get_config('option')['option_types']
asset_universe = [{'strike_offset': s, 'expiry_days': e, 'type': t}
                  for e in expiries for s in strikes for t in types_]


In [None]:
train_years = 3
test_years = 1

all_dates = df['date'].sort_values().unique()
first_date = all_dates[0]
last_date = all_dates[-1]

results_wf = []
current_start = first_date

while True:
    train_start = current_start
    train_end = train_start + pd.DateOffset(years=train_years) - pd.Timedelta(days=1)
    test_start = train_end + pd.Timedelta(days=1)
    test_end = test_start + pd.DateOffset(years=test_years) - pd.Timedelta(days=1)
    
    if test_end > last_date:
        break

    df_train = df[(df['date'] >= train_start) & (df['date'] <= train_end)].reset_index(drop=True)
    chain_train = create_synthetic_option_chain(df_train, get_config('option'))
    df_test = df[(df['date'] >= test_start) & (df['date'] <= test_end)].reset_index(drop=True)
    chain_test = create_synthetic_option_chain(df_test, get_config('option'))

    if len(df_train) < 100 or len(df_test) < 50:
        break

    env_train = MultiAsset21DeepHedgingEnv(df_train, chain_train, asset_universe)
    agent = PPOAgent(env_train)
    model = agent.create_model()
    model.learn(total_timesteps=15000)

    env_test = MultiAsset21DeepHedgingEnv(df_test, chain_test, asset_universe)
    metrics = evaluate_model(model, env_test, episodes=5)
    results_wf.append({'train_start': train_start, 'train_end': train_end, 'test_start': test_start, 'test_end': test_end,
                       'sharpe': metrics['sharpe_ratio'], 'mean_reward': metrics['mean_reward'] })

    current_start = current_start + pd.DateOffset(years=test_years)

wf_summary = pd.DataFrame(results_wf)
wf_summary['test_end'] = pd.to_datetime(wf_summary['test_end'])
wf_summary


## 6.2 Walk-Forward OOS Performance


In [None]:
plt.plot(wf_summary['test_end'], wf_summary['sharpe'], marker='o', label='Walk-forward OOS Sharpe')
plt.title("Walk-forward Out-of-Sample Sharpe Ratio")
plt.xlabel("End of Test Period")
plt.ylabel("Sharpe Ratio")
plt.grid()
plt.show()


In [None]:
plt.plot(wf_summary['test_end'], wf_summary['mean_reward'], marker='s', label='Walk-forward OOS PnL')
plt.title("Walk-forward Out-of-Sample Mean Reward")
plt.xlabel("End of Test Period")
plt.ylabel("Mean Reward")
plt.grid()
plt.show()
