# Deep Hedging Replication

This notebook replicates the numerical experiments from:
- **Buehler et al. (Deep Hedging)** - Section 5
- **Kozyra (Oxford MSc thesis)** - RNN/LSTM and two-stage training

## Setup

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from utils.config import Config, set_seed
from utils.logging_utils import ExperimentLogger
from env.data_generator import DataGenerator
from env.heston import HestonParams, HestonModel
from models.deep_hedging import DeepHedgingModel
from models.kozyra_models import HedgingRNN, HedgingLSTM
from models.baselines import BlackScholesHedge, LelandHedge, WhalleyWilmottHedge, evaluate_baseline
from train.trainer import DeepHedgingTrainer
from eval.evaluator import HedgingEvaluator
from eval.plotting import plot_pnl_histogram, plot_pnl_boxplot, plot_delta_paths

# Set seed for reproducibility
set_seed(42)

# Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

## 1. Configuration

Setting up parameters as specified in Buehler et al.

In [None]:
# Market parameters
n_steps = 30
T = 30 / 365  # 30 days
S0 = 100.0
K = 100.0  # ATM
r = 0.0
cost_multiplier = 0.0  # Start frictionless

# Heston parameters
heston_params = HestonParams(
    S0=S0,
    v0=0.04,    # Initial variance
    r=r,
    kappa=1.0,  # Mean reversion
    theta=0.04, # Long-term variance
    sigma=0.2,  # Vol of vol
    rho=-0.7    # Correlation
)

# Training parameters
n_train = 90000
n_val = 10000
n_test = 100000
batch_size = 256
learning_rate = 0.005
n_epochs = 50
lambda_risk = 1.0

print("Configuration:")
print(f"  Time steps: {n_steps}")
print(f"  Maturity: {T:.4f} years ({T*365:.0f} days)")
print(f"  Train/Val/Test: {n_train}/{n_val}/{n_test}")

## 2. Data Generation

In [None]:
# Create data generator
generator = DataGenerator(
    n_steps=n_steps,
    T=T,
    S0=S0,
    K=K,
    r=r,
    cost_multiplier=cost_multiplier,
    model_type='heston',
    heston_params=heston_params
)

# Generate datasets
train_data, val_data, test_data = generator.generate_train_val_test(
    n_train=n_train,
    n_val=n_val,
    n_test=n_test,
    base_seed=42
)

# Create dataloaders
train_loader, val_loader, test_loader = generator.get_dataloaders(
    train_data, val_data, test_data,
    batch_size=batch_size
)

print(f"\nDataset info:")
print(f"  Features shape: {train_data.features.shape}")
print(f"  Stock paths shape: {train_data.stock_paths.shape}")
print(f"  Input dim: {train_data.n_features}")

## 3. Visualize Sample Paths

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Sample stock paths
n_sample = 50
sample_paths = test_data.stock_paths[:n_sample].numpy()

ax1 = axes[0]
for i in range(n_sample):
    ax1.plot(sample_paths[i], alpha=0.3, linewidth=0.8)
ax1.axhline(K, color='red', linestyle='--', label=f'Strike K={K}')
ax1.set_xlabel('Time Step')
ax1.set_ylabel('Stock Price')
ax1.set_title('Sample Stock Price Paths (Heston Model)')
ax1.legend()

# Payoff distribution
ax2 = axes[1]
payoffs = test_data.payoffs.numpy()
ax2.hist(payoffs, bins=50, edgecolor='black', alpha=0.7)
ax2.set_xlabel('Payoff')
ax2.set_ylabel('Frequency')
ax2.set_title(f'Option Payoff Distribution (Mean={payoffs.mean():.2f})')

plt.tight_layout()
plt.show()

## 4. Train Deep Hedging Model (Buehler et al.)

In [None]:
# Create model
input_dim = train_data.n_features

deep_hedging_model = DeepHedgingModel(
    input_dim=input_dim,
    n_steps=n_steps,
    cost_multiplier=cost_multiplier,
    lambda_risk=lambda_risk,
    share_weights=False
)

print(f"Model parameters: {sum(p.numel() for p in deep_hedging_model.parameters()):,}")

In [None]:
# Train
trainer = DeepHedgingTrainer(
    model=deep_hedging_model,
    lambda_risk=lambda_risk,
    cost_multiplier=cost_multiplier,
    learning_rate=learning_rate,
    device=device
)

history = trainer.fit(
    train_loader, val_loader,
    n_epochs=n_epochs,
    patience=10,
    verbose=True
)

In [None]:
# Plot learning curves
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

ax1 = axes[0]
ax1.plot(history['train_loss'], label='Train')
ax1.plot(history['val_loss'], label='Validation')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Learning Curves')
ax1.legend()

ax2 = axes[1]
ax2.plot(history['val_mean_pnl'], label='Mean P&L')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Mean P&L')
ax2.set_title('Validation P&L')
ax2.legend()

plt.tight_layout()
plt.show()

## 5. Evaluate and Compare with Baselines

In [None]:
# Evaluate Deep Hedging
dh_metrics, dh_pnl, dh_deltas = trainer.evaluate(test_loader)

print("Deep Hedging Results:")
for k, v in dh_metrics.items():
    print(f"  {k}: {v:.4f}")

In [None]:
# Run baselines
stock_paths = test_data.stock_paths.numpy()
payoffs = test_data.payoffs.numpy()
time_grid = np.linspace(0, T, n_steps + 1)

# Black-Scholes
bs_hedge = BlackScholesHedge(sigma=0.2, r=r)
bs_deltas = bs_hedge.compute_deltas_vectorized(stock_paths, time_grid, K, T)
bs_pnl, bs_info = evaluate_baseline(bs_hedge, stock_paths, time_grid, K, T, cost_multiplier)

# Leland
leland_hedge = LelandHedge(sigma=0.2, r=r, cost=cost_multiplier)
leland_pnl, leland_info = evaluate_baseline(leland_hedge, stock_paths, time_grid, K, T, cost_multiplier)

print("\nBaseline Results:")
print(f"  BS Delta:  mean={np.mean(bs_pnl):.4f}, std={np.std(bs_pnl):.4f}")
print(f"  Leland:    mean={np.mean(leland_pnl):.4f}, std={np.std(leland_pnl):.4f}")
print(f"  Deep Hedging: mean={np.mean(dh_pnl):.4f}, std={np.std(dh_pnl):.4f}")

In [None]:
# Compare P&L distributions
pnl_dict = {
    'Deep Hedging': dh_pnl,
    'BS Delta': bs_pnl,
    'Leland': leland_pnl
}

fig = plot_pnl_histogram(pnl_dict, title='P&L Distribution Comparison')
plt.show()

In [None]:
# Boxplot comparison
fig = plot_pnl_boxplot(pnl_dict, title='P&L Comparison')
plt.show()

## 6. Train Kozyra RNN Model

In [None]:
# Create RNN model (Kozyra specification)
rnn_model = HedgingRNN(
    state_dim=input_dim,
    hidden_size=50,
    num_layers=2
)

# Kozyra training parameters
kozyra_loader = generator.get_dataloaders(
    train_data, val_data, test_data,
    batch_size=200  # Kozyra batch size
)[0:3]

rnn_trainer = DeepHedgingTrainer(
    model=rnn_model,
    lambda_risk=lambda_risk,
    cost_multiplier=cost_multiplier,
    learning_rate=0.0005,  # Kozyra learning rate
    device=device
)

print(f"RNN parameters: {sum(p.numel() for p in rnn_model.parameters()):,}")

In [None]:
# Train RNN
rnn_history = rnn_trainer.fit(
    kozyra_loader[0], kozyra_loader[1],
    n_epochs=50,
    patience=10,
    verbose=True
)

In [None]:
# Evaluate RNN
rnn_metrics, rnn_pnl, rnn_deltas = rnn_trainer.evaluate(kozyra_loader[2])

print("\nKozyra RNN Results:")
for k, v in rnn_metrics.items():
    print(f"  {k}: {v:.4f}")

## 7. Final Comparison

In [None]:
# Full comparison
all_pnl = {
    'Deep Hedging': dh_pnl,
    'Kozyra RNN': rnn_pnl,
    'BS Delta': bs_pnl,
    'Leland': leland_pnl
}

# Summary table
print("\n" + "="*70)
print("FINAL RESULTS SUMMARY")
print("="*70)
print(f"{'Strategy':<20} {'Mean P&L':>12} {'Std P&L':>12} {'VaR95':>12} {'CVaR95':>12}")
print("-"*70)

for name, pnl in all_pnl.items():
    losses = -pnl
    var95 = np.percentile(losses, 95)
    cvar95 = np.mean(losses[losses >= var95])
    print(f"{name:<20} {np.mean(pnl):>12.4f} {np.std(pnl):>12.4f} {var95:>12.4f} {cvar95:>12.4f}")

print("="*70)

In [None]:
# Final visualization
fig = plot_pnl_boxplot(all_pnl, title='Strategy Comparison')
plt.savefig('../figures/strategy_comparison.pdf', bbox_inches='tight')
plt.show()

## 8. Statistical Tests

In [None]:
from utils.statistics import paired_ttest, bootstrap_ci

# Compare Deep Hedging vs BS Delta
t_stat, p_val = paired_ttest(dh_pnl, bs_pnl)
print(f"Deep Hedging vs BS Delta:")
print(f"  t-statistic: {t_stat:.4f}")
print(f"  p-value: {p_val:.6f}")
print(f"  Significant at 5%: {p_val < 0.05}")

# Bootstrap CI for mean difference
diff = dh_pnl - bs_pnl
mean_diff, ci_lower, ci_upper = bootstrap_ci(diff, np.mean, n_bootstrap=1000)
print(f"\nMean difference: {mean_diff:.4f} [{ci_lower:.4f}, {ci_upper:.4f}]")

## Next Steps

1. Add transaction costs and re-run experiments
2. Implement two-stage training (Kozyra)
3. Test enhanced models (Transformer, Signatures)
4. Validate on real market data