# Neural Black-Scholes: Physics-Informed Neural Networks for Option Pricing

This notebook demonstrates how to train a neural network to price options while respecting the Black-Scholes PDE constraints (Physics-Informed Neural Networks - PINNs).

## Key Concepts
- **Standard NN**: Learn option prices from data alone
- **Physics-Informed NN**: Add PDE residual as a loss term, enforcing BS equation
- **Automatic Differentiation**: Use PyTorch autograd to compute Greeks automatically

## Requirements
```bash
pip install torch numpy matplotlib scipy
```

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## 1. Black-Scholes Analytical Solution (Ground Truth)

In [None]:
def black_scholes_call(S, K, T, r, sigma):
    """Analytical Black-Scholes call option price"""
    if T <= 0:
        return max(S - K, 0)
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)

def black_scholes_delta(S, K, T, r, sigma):
    """Analytical Black-Scholes delta"""
    if T <= 0:
        return 1.0 if S > K else 0.0
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    return norm.cdf(d1)

# Test analytical solution
S, K, T, r, sigma = 100, 100, 1.0, 0.05, 0.2
print(f"BS Call Price: ${black_scholes_call(S, K, T, r, sigma):.4f}")
print(f"BS Delta: {black_scholes_delta(S, K, T, r, sigma):.4f}")

## 2. Generate Training Data

In [None]:
def generate_training_data(n_samples=10000):
    """Generate synthetic option pricing data"""
    # Sample parameters from reasonable ranges
    S = np.random.uniform(50, 150, n_samples)      # Stock price
    K = np.random.uniform(50, 150, n_samples)      # Strike
    T = np.random.uniform(0.01, 2.0, n_samples)    # Time to maturity
    r = np.random.uniform(0.01, 0.10, n_samples)   # Risk-free rate
    sigma = np.random.uniform(0.1, 0.5, n_samples) # Volatility
    
    # Calculate BS prices
    prices = np.array([black_scholes_call(s, k, t, ri, sig) 
                       for s, k, t, ri, sig in zip(S, K, T, r, sigma)])
    
    # Normalize inputs for better training
    X = np.column_stack([S/100, K/100, T, r*10, sigma*2])  # Rough normalization
    y = prices / 100  # Normalize prices
    
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32).unsqueeze(1)

X_train, y_train = generate_training_data(50000)
X_test, y_test = generate_training_data(5000)
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

## 3. Standard Neural Network for Option Pricing

In [None]:
class OptionPricingNN(nn.Module):
    """Standard feedforward neural network for option pricing"""
    def __init__(self, input_dim=5, hidden_dims=[64, 64, 32]):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.ReLU(),
                nn.BatchNorm1d(hidden_dim)
            ])
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, 1))
        layers.append(nn.Softplus())  # Ensure positive prices
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

# Train standard NN
model_standard = OptionPricingNN().to(device)
optimizer = torch.optim.Adam(model_standard.parameters(), lr=0.001)
criterion = nn.MSELoss()

X_train_dev = X_train.to(device)
y_train_dev = y_train.to(device)

losses = []
for epoch in range(200):
    model_standard.train()
    optimizer.zero_grad()
    y_pred = model_standard(X_train_dev)
    loss = criterion(y_pred, y_train_dev)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch+1}: Loss = {loss.item():.6f}")

plt.figure(figsize=(10, 4))
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.title('Standard NN Training Loss')
plt.yscale('log')
plt.show()

## 4. Physics-Informed Neural Network (PINN)

The Black-Scholes PDE for a call option is:

$$\frac{\partial V}{\partial t} + \frac{1}{2}\sigma^2 S^2 \frac{\partial^2 V}{\partial S^2} + rS\frac{\partial V}{\partial S} - rV = 0$$

We add the PDE residual as a regularization term in the loss function.

In [None]:
class PhysicsInformedOptionNN(nn.Module):
    """PINN that respects Black-Scholes PDE"""
    def __init__(self, hidden_dims=[64, 64, 32]):
        super().__init__()
        layers = []
        prev_dim = 5  # S, K, T, r, sigma
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.Tanh()  # Smooth activation for better gradients
            ])
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, 1))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)
    
    def compute_pde_residual(self, S, K, T, r, sigma):
        """Compute Black-Scholes PDE residual using automatic differentiation"""
        # Enable gradients for inputs
        S = S.requires_grad_(True)
        T = T.requires_grad_(True)
        
        # Forward pass
        x = torch.stack([S/100, K/100, T, r*10, sigma*2], dim=1)
        V = self.network(x) * 100  # Denormalize
        
        # Compute gradients
        dV_dS = torch.autograd.grad(V.sum(), S, create_graph=True)[0]
        dV_dT = torch.autograd.grad(V.sum(), T, create_graph=True)[0]
        d2V_dS2 = torch.autograd.grad(dV_dS.sum(), S, create_graph=True)[0]
        
        # BS PDE: dV/dT + 0.5*sigma^2*S^2*d2V/dS2 + r*S*dV/dS - r*V = 0
        # Note: We use -dV/dT since T is time to maturity (decreasing)
        residual = -dV_dT + 0.5 * sigma**2 * S**2 * d2V_dS2 + r * S * dV_dS - r * V.squeeze()
        
        return residual

# Train PINN
model_pinn = PhysicsInformedOptionNN().to(device)
optimizer_pinn = torch.optim.Adam(model_pinn.parameters(), lr=0.001)

def train_pinn(model, optimizer, X, y, n_epochs=200, pde_weight=0.1):
    losses_data = []
    losses_pde = []
    
    for epoch in range(n_epochs):
        model.train()
        optimizer.zero_grad()
        
        # Data loss
        y_pred = model(X)
        loss_data = criterion(y_pred, y)
        
        # PDE loss (sample subset for efficiency)
        n_pde = min(1000, len(X))
        idx = torch.randperm(len(X))[:n_pde]
        S_pde = X[idx, 0] * 100
        K_pde = X[idx, 1] * 100
        T_pde = X[idx, 2]
        r_pde = X[idx, 3] / 10
        sigma_pde = X[idx, 4] / 2
        
        residual = model.compute_pde_residual(S_pde, K_pde, T_pde, r_pde, sigma_pde)
        loss_pde = (residual ** 2).mean()
        
        # Combined loss
        loss = loss_data + pde_weight * loss_pde
        loss.backward()
        optimizer.step()
        
        losses_data.append(loss_data.item())
        losses_pde.append(loss_pde.item())
        
        if (epoch + 1) % 50 == 0:
            print(f"Epoch {epoch+1}: Data Loss = {loss_data.item():.6f}, PDE Loss = {loss_pde.item():.6f}")
    
    return losses_data, losses_pde

losses_data, losses_pde = train_pinn(model_pinn, optimizer_pinn, X_train_dev, y_train_dev)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(losses_data)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Data MSE Loss')
axes[0].set_title('PINN Data Loss')
axes[0].set_yscale('log')

axes[1].plot(losses_pde)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('PDE Residual')
axes[1].set_title('PINN Physics Loss')
axes[1].set_yscale('log')
plt.tight_layout()
plt.show()

## 5. Compare Models and Compute Greeks via Autodiff

In [None]:
def compute_greeks_autodiff(model, S, K, T, r, sigma):
    """Compute option Greeks using automatic differentiation"""
    S_t = torch.tensor([S/100], requires_grad=True, dtype=torch.float32).to(device)
    K_t = torch.tensor([K/100], dtype=torch.float32).to(device)
    T_t = torch.tensor([T], requires_grad=True, dtype=torch.float32).to(device)
    r_t = torch.tensor([r*10], dtype=torch.float32).to(device)
    sigma_t = torch.tensor([sigma*2], requires_grad=True, dtype=torch.float32).to(device)
    
    x = torch.stack([S_t, K_t, T_t, r_t, sigma_t], dim=1)
    V = model(x) * 100  # Denormalize
    
    # Delta = dV/dS
    delta = torch.autograd.grad(V, S_t, create_graph=True)[0] / 100
    
    # Gamma = d2V/dS2
    gamma = torch.autograd.grad(delta, S_t, create_graph=True)[0] / 100
    
    # Theta = -dV/dT (negative because T is time to maturity)
    theta = -torch.autograd.grad(V, T_t)[0]
    
    # Vega = dV/dsigma
    vega = torch.autograd.grad(V, sigma_t)[0] / 2
    
    return {
        'price': V.item(),
        'delta': delta.item(),
        'gamma': gamma.item(),
        'theta': theta.item() / 365,  # Daily theta
        'vega': vega.item() / 100  # Per 1% vol change
    }

# Compare models
test_params = {'S': 100, 'K': 100, 'T': 0.5, 'r': 0.05, 'sigma': 0.2}

bs_price = black_scholes_call(**test_params)
bs_delta = black_scholes_delta(**test_params)

model_standard.eval()
model_pinn.eval()

greeks_standard = compute_greeks_autodiff(model_standard, **test_params)
greeks_pinn = compute_greeks_autodiff(model_pinn, **test_params)

print("\n" + "="*60)
print("COMPARISON: S=100, K=100, T=0.5y, r=5%, sigma=20%")
print("="*60)
print(f"{'Metric':<15} {'BS Analytical':<15} {'Standard NN':<15} {'PINN':<15}")
print("-"*60)
print(f"{'Price':<15} ${bs_price:<14.4f} ${greeks_standard['price']:<14.4f} ${greeks_pinn['price']:<14.4f}")
print(f"{'Delta':<15} {bs_delta:<15.4f} {greeks_standard['delta']:<15.4f} {greeks_pinn['delta']:<15.4f}")
print(f"{'Gamma':<15} {'N/A':<15} {greeks_standard['gamma']:<15.4f} {greeks_pinn['gamma']:<15.4f}")
print(f"{'Theta (daily)':<15} {'N/A':<15} {greeks_standard['theta']:<15.4f} {greeks_pinn['theta']:<15.4f}")
print(f"{'Vega':<15} {'N/A':<15} {greeks_standard['vega']:<15.4f} {greeks_pinn['vega']:<15.4f}")

## 6. Visualize Option Price Surface

In [None]:
# Create meshgrid for visualization
S_range = np.linspace(70, 130, 50)
T_range = np.linspace(0.05, 1.0, 50)
S_grid, T_grid = np.meshgrid(S_range, T_range)

# Compute prices for each model
K, r, sigma = 100, 0.05, 0.2

# Analytical BS
bs_prices = np.array([[black_scholes_call(s, K, t, r, sigma) for s in S_range] for t in T_range])

# Neural network predictions
with torch.no_grad():
    X_viz = torch.tensor(np.column_stack([
        S_grid.flatten()/100,
        np.full(S_grid.size, K/100),
        T_grid.flatten(),
        np.full(S_grid.size, r*10),
        np.full(S_grid.size, sigma*2)
    ]), dtype=torch.float32).to(device)
    
    pinn_prices = model_pinn(X_viz).cpu().numpy().reshape(S_grid.shape) * 100

# Plot
fig = plt.figure(figsize=(15, 5))

ax1 = fig.add_subplot(131, projection='3d')
ax1.plot_surface(S_grid, T_grid, bs_prices, cmap='viridis', alpha=0.8)
ax1.set_xlabel('Stock Price')
ax1.set_ylabel('Time to Maturity')
ax1.set_zlabel('Option Price')
ax1.set_title('Black-Scholes Analytical')

ax2 = fig.add_subplot(132, projection='3d')
ax2.plot_surface(S_grid, T_grid, pinn_prices, cmap='viridis', alpha=0.8)
ax2.set_xlabel('Stock Price')
ax2.set_ylabel('Time to Maturity')
ax2.set_zlabel('Option Price')
ax2.set_title('PINN Prediction')

ax3 = fig.add_subplot(133, projection='3d')
error = np.abs(pinn_prices - bs_prices)
ax3.plot_surface(S_grid, T_grid, error, cmap='hot', alpha=0.8)
ax3.set_xlabel('Stock Price')
ax3.set_ylabel('Time to Maturity')
ax3.set_zlabel('Absolute Error')
ax3.set_title('PINN Error vs BS')

plt.tight_layout()
plt.show()

print(f"\nMean Absolute Error (PINN vs BS): ${error.mean():.4f}")
print(f"Max Absolute Error (PINN vs BS): ${error.max():.4f}")

## Summary

This notebook demonstrated:

1. **Standard Neural Network**: Learns option prices from data alone
2. **Physics-Informed Neural Network (PINN)**: Incorporates Black-Scholes PDE as a regularization term
3. **Automatic Differentiation**: Computes Greeks (Delta, Gamma, Theta, Vega) automatically from the network

### Key Advantages of PINNs for Finance:
- **Better Extrapolation**: PDE constraint helps in regions with sparse training data
- **Interpretability**: Network respects known financial physics
- **Consistent Greeks**: Derivatives are mathematically consistent with the pricing function

### Extensions to Try:
- Add jump-diffusion or stochastic volatility models
- Train on real market option prices (implied vol surfaces)
- Use boundary conditions (payoff at T=0) as additional constraints