In [2]:
%pip install torch

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.6.0-cp39-none-macosx_11_0_arm64.whl (66.5 MB)
[K     |████████████████████████████████| 66.5 MB 9.2 MB/s eta 0:00:012     |████████████████▋               | 34.5 MB 20.4 MB/s eta 0:00:02
[?25hCollecting sympy==1.13.1
  Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)
Collecting filelock
  Downloading filelock-3.17.0-py3-none-any.whl (16 kB)
Collecting networkx
  Downloading networkx-3.2.1-py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 22.0 MB/s eta 0:00:01
[?25hCollecting fsspec
  Downloading fsspec-2025.2.0-py3-none-any.whl (184 kB)
[K     |████████████████████████████████| 184 kB 19.3 MB/s eta 0:00:01
[?25hCollecting jinja2
  Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)
Collecting mpmath<1.4,>=1.1.0
  Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-3.0.2-cp39-cp39-macosx

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from typing import List, Tuple

class HedgingNetwork(nn.Module):
    def __init__(self, input_dim: int = 4, hidden_dim: int = 64):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Tanh()  # Output between -1 and 1 to limit position changes
        )
    
    def forward(self, x):
        return self.network(x)

class RLHedger:
    def __init__(self, 
                 initial_options: int = 100,
                 learning_rate: float = 1e-4,
                 lambda_risk: float = 0.5,
                 transaction_cost: float = 0.0):
        
        self.initial_options = initial_options
        self.lambda_risk = lambda_risk
        self.transaction_cost = transaction_cost
        
        # Initialize network and optimizer
        self.policy = HedgingNetwork()
        self.optimizer = optim.Adam(self.policy.parameters(), lr=learning_rate)
        
    def get_state(self, df: pd.DataFrame, t: int) -> torch.Tensor:
        """
        Create state vector from current market conditions
        """
        state = torch.tensor([
            df['Asset_Price'][t],
            df['Call_Price'][t],
            df['Time'][t],  # Time to maturity
            df.get('Total_Adjustments', pd.Series([0] * len(df)))[t]  # Current hedge position
        ], dtype=torch.float32)
        
        return state
    
    def calculate_pnl(self, 
                     df: pd.DataFrame, 
                     adjustments: List[float], 
                     K: float) -> Tuple[float, float]:
        """
        Calculate P&L and risk measure for the episode
        """
        df = df.copy()
        
        # Add hedging adjustments to dataframe
        df['Adjustments_(Contracts)'] = adjustments
        df['Total_Adjustments'] = np.cumsum(adjustments)
        df['Adjustment_Cash_Flow'] = -df['Adjustments_(Contracts)'] * df['Asset_Price']
        
        # Calculate components of P&L
        adjustment_cash_flows = df['Adjustment_Cash_Flow'].sum()
        option_payoff = self.initial_options * (max(df['Asset_Price'].iloc[-1] - K, 0) - df['Call_Price'].iloc[0])
        
        # Initial hedge position P&L
        initial_hedge = adjustments[0]
        hedge_pnl = initial_hedge * (df['Asset_Price'].iloc[0] - df['Asset_Price'].iloc[-1])
        
        # Transaction costs
        if self.transaction_cost > 0:
            transaction_costs = self.transaction_cost * np.abs(adjustments).sum() * df['Asset_Price'].mean()
        else:
            transaction_costs = 0
            
        total_pnl = adjustment_cash_flows + option_payoff + hedge_pnl - transaction_costs
        
        # Calculate risk measure (using standard deviation of daily P&L as example)
        daily_pnl = df['Adjustment_Cash_Flow'].values
        risk = np.std(daily_pnl)
        
        return total_pnl, risk
    
    def train_episode(self, df: pd.DataFrame, K: float) -> Tuple[float, List[float]]:
        """
        Train on one episode (price path)
        """
        self.optimizer.zero_grad()
        
        adjustments = []
        states = []
        log_probs = []
        
        # Generate hedging decisions
        for t in range(len(df)):
            state = self.get_state(df, t)
            action = self.policy(state)
            
            # Scale action to reasonable adjustment size
            scaled_action = action.item() * 0.1  # Limit size of position changes
            
            adjustments.append(scaled_action)
            states.append(state)
            
        # Calculate objective
        total_pnl, risk = self.calculate_pnl(df, adjustments, K)
        objective = total_pnl - self.lambda_risk * risk
        
        # Backpropagate
        (-objective).backward()
        self.optimizer.step()
        
        return objective.item(), adjustments
    
    def hedge(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Apply learned hedging strategy to new data
        """
        df = df.copy()
        adjustments = []
        
        with torch.no_grad():
            for t in range(len(df)):
                state = self.get_state(df, t)
                action = self.policy(state)
                scaled_action = action.item() * 0.1
                adjustments.append(scaled_action)
        
        df['Adjustments_(Contracts)'] = adjustments
        df['Total_Adjustments'] = np.cumsum(adjustments)
        df['Adjustment_Cash_Flow'] = -df['Adjustments_(Contracts)'] * df['Asset_Price']
        
        return df

# Usage example:
def train_hedger(price_paths: List[pd.DataFrame], 
                 K: float,
                 n_episodes: int = 1000,
                 **kwargs):
    
    hedger = RLHedger(**kwargs)
    training_results = []
    
    for episode in range(n_episodes):
        # Randomly select a price path
        df = price_paths[np.random.randint(len(price_paths))]
        
        # Train on this path
        objective, _ = hedger.train_episode(df, K)
        training_results.append(objective)
        
        if episode % 100 == 0:
            print(f"Episode {episode}, Objective: {objective:.2f}")
    
    return hedger, training_results

# Example usage:
"""
# Generate multiple price paths
price_paths = [generate_price_path() for _ in range(100)]

# Train the hedger
hedger, results = train_hedger(
    price_paths=price_paths,
    K=100,
    n_episodes=1000,
    initial_options=100,
    lambda_risk=0.5,
    transaction_cost=0.001
)

# Apply to new data
test_df = generate_price_path()
hedged_df = hedger.hedge(test_df)

# Compare with BSM delta hedging
bsm_df = dynamic_delta_hedging(test_df)
"""

'\n# Generate multiple price paths\nprice_paths = [generate_price_path() for _ in range(100)]\n\n# Train the hedger\nhedger, results = train_hedger(\n    price_paths=price_paths,\n    K=100,\n    n_episodes=1000,\n    initial_options=100,\n    lambda_risk=0.5,\n    transaction_cost=0.001\n)\n\n# Apply to new data\ntest_df = generate_price_path()\nhedged_df = hedger.hedge(test_df)\n\n# Compare with BSM delta hedging\nbsm_df = dynamic_delta_hedging(test_df)\n'