In [3]:
# Imports and configuration
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
from pathlib import Path
import importlib.util

print("PyTorch version:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

bs_utils = None
bs_utils_path = Path("d10122f4-7b4a-4370-a214-edef01eb206b.py")
if bs_utils_path.exists():
    spec = importlib.util.spec_from_file_location("bs_utils", bs_utils_path)
    bs_utils = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(bs_utils)
    print(f"Loaded bs_utils from {bs_utils_path}")
else:
    print("Optional bs_utils module not found; continuing without it.")

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
plt.style.use("seaborn-v0_8")


PyTorch version: 2.5.1
Using device: cuda
Optional bs_utils module not found; continuing without it.


In [4]:
# Load the CSV and basic checks
CSV_PATH = "../options_SPY_calls.csv"  # adjust path if needed

try:
    df = pd.read_csv(CSV_PATH)
except FileNotFoundError as exc:
    raise FileNotFoundError(f"Could not find CSV at {CSV_PATH}. Update CSV_PATH if necessary.") from exc

print("Loaded shape:", df.shape)
display(df.head())

required_cols = ["S0", "K", "T", "C_mkt"]
for col in required_cols:
    if col not in df.columns:
        raise ValueError(f"Missing required column: {col}")

if "r" not in df.columns:
    df["r"] = 0.02
if "q" not in df.columns:
    df["q"] = 0.0

df = df.dropna(subset=["S0", "K", "T", "r", "q", "C_mkt"]).reset_index(drop=True)
df = df[df["T"] > 0.0].reset_index(drop=True)

print("Final number of rows:", len(df))
print(df[["S0", "K", "T", "r", "q", "C_mkt"]].describe())


Loaded shape: (2830, 6)


Unnamed: 0,S0,K,C_mkt,T,type,iv
0,672.65,530,156.6,0.0,C,1.88
1,672.65,540,126.43,0.0,C,1.72
2,672.65,545,134.2,0.0,C,1.66
3,672.65,555,116.71,0.0,C,1.57
4,672.65,560,104.69,0.0,C,1.47


Final number of rows: 2713
                 S0            K            T             r       q  \
count  2.713000e+03  2713.000000  2713.000000  2.713000e+03  2713.0   
mean   6.726500e+02   660.784003     0.412311  2.000000e-02     0.0   
std    1.137078e-13    68.803961     0.528264  3.470087e-18     0.0   
min    6.726500e+02   523.000000     0.010000  2.000000e-02     0.0   
25%    6.726500e+02   610.000000     0.060000  2.000000e-02     0.0   
50%    6.726500e+02   664.000000     0.210000  2.000000e-02     0.0   
75%    6.726500e+02   705.000000     0.590000  2.000000e-02     0.0   
max    6.726500e+02   820.000000     2.190000  2.000000e-02     0.0   

             C_mkt  
count  2713.000000  
mean     50.817564  
std      48.629587  
min       0.010000  
25%       6.180000  
50%      37.500000  
75%      85.500000  
max     209.480000  


In [5]:
# Heston characteristic function and call pricing utilities
INTEGRAL_U_MAX = 200.0
INTEGRAL_STEPS = 512

def heston_cf_torch(u, T, S0, r, q, kappa, theta, sigma, rho, v0):
    """Heston characteristic function Ï†(u) evaluated in PyTorch (vectorized)."""
    real_dtype = torch.float64
    complex_dtype = torch.complex128

    u = u.to(complex_dtype)
    S0 = S0.to(real_dtype)
    T = T.to(real_dtype)
    r = r.to(real_dtype)
    q = q.to(real_dtype)
    kappa = torch.clamp(kappa.to(real_dtype), min=1e-6)
    theta = torch.clamp(theta.to(real_dtype), min=1e-6)
    sigma = torch.clamp(sigma.to(real_dtype), min=1e-6)
    rho = torch.clamp(rho.to(real_dtype), min=-0.999, max=0.999)
    v0 = torch.clamp(v0.to(real_dtype), min=1e-8)

    i = torch.complex(torch.tensor(0.0, dtype=real_dtype, device=u.device),
                      torch.tensor(1.0, dtype=real_dtype, device=u.device))
    x0 = torch.log(S0)
    a = kappa * theta
    b = kappa - rho * sigma * i * u
    d = torch.sqrt(b * b + (sigma ** 2) * (u * u + i * u))
    g = (b - d) / (b + d)

    exp_neg_dT = torch.exp(-d * T)
    one_minus_g_exp = 1.0 - g * exp_neg_dT
    one_minus_g = 1.0 - g

    log_term = torch.log(one_minus_g_exp / one_minus_g)
    C = (r - q) * i * u * T + (a / (sigma ** 2)) * ((b - d) * T - 2.0 * log_term)
    D = ((b - d) / (sigma ** 2)) * ((1.0 - exp_neg_dT) / one_minus_g_exp)

    return torch.exp(C + D * v0 + i * u * x0)

def heston_call_price_torch(S0, K, T, r, q, kappa, theta, sigma, rho, v0,
                            u_max=INTEGRAL_U_MAX, n_u=INTEGRAL_STEPS):
    """Vectorized Heston call price via numerical integration (trapezoidal rule)."""
    device = S0.device
    real_dtype = torch.float64
    complex_dtype = torch.complex128

    def prep(x):
        return x.to(real_dtype).unsqueeze(0)

    S0_b = prep(S0)
    K_b = prep(K)
    T_b = prep(T)
    r_b = prep(r)
    q_b = prep(q)
    kappa_b = prep(kappa)
    theta_b = prep(theta)
    sigma_b = prep(sigma)
    rho_b = prep(rho)
    v0_b = prep(v0)

    u = torch.linspace(1e-6, u_max, n_u, device=device, dtype=real_dtype).unsqueeze(1)
    u_c = u.to(complex_dtype)

    cf1 = heston_cf_torch(u_c - 1j, T_b, S0_b, r_b, q_b, kappa_b, theta_b, sigma_b, rho_b, v0_b)
    numer1 = torch.exp(-1j * u_c * torch.log(K_b)) * cf1
    integrand1 = (numer1 / (1j * u_c)).real

    cf2 = heston_cf_torch(u_c, T_b, S0_b, r_b, q_b, kappa_b, theta_b, sigma_b, rho_b, v0_b)
    numer2 = torch.exp(-1j * u_c * torch.log(K_b)) * cf2
    integrand2 = (numer2 / (1j * u_c)).real

    du = u[1] - u[0]
    P1 = 0.5 + (du / math.pi) * integrand1.sum(dim=0)
    P2 = 0.5 + (du / math.pi) * integrand2.sum(dim=0)

    discount_stock = torch.exp(-q_b * T_b)
    discount_strike = torch.exp(-r_b * T_b)
    call = S0_b * discount_stock * P1 - K_b * discount_strike * P2
    return call.squeeze(0)


In [6]:
# Prepare neural-network inputs and targets
input_cols = ["T", "K", "S0", "r", "q"]
if "iv" in df.columns:
    input_cols.append("iv")

X_raw = df[input_cols].values.astype(np.float32)
y_price = df[["C_mkt"]].values.astype(np.float32)

X_mean = X_raw.mean(axis=0, keepdims=True)
X_std = X_raw.std(axis=0, keepdims=True) + 1e-8
X_norm = (X_raw - X_mean) / X_std

X_tensor = torch.from_numpy(X_norm)
y_price_tensor = torch.from_numpy(y_price)

print(f"Feature tensor shape: {X_tensor.shape}")
print(f"Price tensor shape: {y_price_tensor.shape}")


Feature tensor shape: torch.Size([2713, 6])
Price tensor shape: torch.Size([2713, 1])


In [7]:
# Dataset and loaders
class HestonDataset(Dataset):
    def __init__(self, X, price):
        self.X = X
        self.price = price

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.price[idx]

full_dataset = HestonDataset(X_tensor, y_price_tensor)
dataset_size = len(full_dataset)
val_size = max(1, int(0.2 * dataset_size)) if dataset_size >= 5 else max(1, dataset_size // 2)
train_size = dataset_size - val_size
if train_size <= 0:
    train_size, val_size = dataset_size, 0

if val_size > 0:
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
else:
    train_dataset, val_dataset = full_dataset, None

batch_size = min(64, max(1, train_size)) if train_size > 0 else 1
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) if val_dataset else None

print(f"Train size: {train_size}, Val size: {val_size}")
print(f"Batch size: {batch_size}")


Train size: 2171, Val size: 542
Batch size: 64


In [8]:
# Neural network that outputs Heston parameters
class HestonParamNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 5),
        )

    def forward(self, x):
        raw_params = self.net(x)
        kappa_raw, theta_raw, sigma_raw, rho_raw, v0_raw = torch.unbind(raw_params, dim=-1)
        kappa = nn.functional.softplus(kappa_raw) + 1e-4
        theta = nn.functional.softplus(theta_raw) + 1e-4
        sigma = nn.functional.softplus(sigma_raw) + 1e-4
        rho = torch.tanh(rho_raw)
        v0 = nn.functional.softplus(v0_raw) + 1e-6
        return torch.stack([kappa, theta, sigma, rho, v0], dim=-1)

input_dim = X_tensor.shape[1]
model = HestonParamNet(input_dim=input_dim).to(device)
print(model)


HestonParamNet(
  (net): Sequential(
    (0): Linear(in_features=6, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=5, bias=True)
  )
)


In [9]:
# Loss, optimizer, and normalization helpers
def rmse_loss(pred, target):
    mse = torch.mean((pred - target) ** 2)
    return torch.sqrt(mse + 1e-8)

optimizer = optim.Adam(model.parameters(), lr=1e-3)

X_mean_tensor = torch.from_numpy(X_mean).to(device)
X_std_tensor = torch.from_numpy(X_std).to(device)
column_index = {col: idx for idx, col in enumerate(input_cols)}

idx_T = column_index["T"]
idx_K = column_index["K"]
idx_S0 = column_index["S0"]
idx_r = column_index["r"]
idx_q = column_index["q"]


In [10]:
# Training loop
num_epochs = 100
history = {"train": [], "val": []}

for epoch in range(1, num_epochs + 1):
    model.train()
    train_losses = []
    for X_batch, price_batch in train_loader:
        X_batch = X_batch.to(device)
        price_batch = price_batch.to(device).squeeze(-1).double()

        optimizer.zero_grad()
        params = model(X_batch)
        kappa, theta, sigma, rho, v0 = torch.unbind(params, dim=-1)

        X_batch_unnorm = X_batch * X_std_tensor + X_mean_tensor
        T_batch = X_batch_unnorm[:, idx_T].double()
        K_batch = X_batch_unnorm[:, idx_K].double()
        S0_batch = X_batch_unnorm[:, idx_S0].double()
        r_batch = X_batch_unnorm[:, idx_r].double()
        q_batch = X_batch_unnorm[:, idx_q].double()

        pred_price = heston_call_price_torch(
            S0_batch, K_batch, T_batch, r_batch, q_batch,
            kappa.double(), theta.double(), sigma.double(), rho.double(), v0.double()
        )

        loss = rmse_loss(pred_price, price_batch)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    train_rmse = float(np.mean(train_losses)) if train_losses else float('nan')
    history["train"].append(train_rmse)

    model.eval()
    val_rmse = float('nan')
    if val_loader is not None:
        val_losses = []
        with torch.no_grad():
            for X_batch, price_batch in val_loader:
                X_batch = X_batch.to(device)
                price_batch = price_batch.to(device).squeeze(-1).double()

                params = model(X_batch)
                kappa, theta, sigma, rho, v0 = torch.unbind(params, dim=-1)

                X_batch_unnorm = X_batch * X_std_tensor + X_mean_tensor
                T_batch = X_batch_unnorm[:, idx_T].double()
                K_batch = X_batch_unnorm[:, idx_K].double()
                S0_batch = X_batch_unnorm[:, idx_S0].double()
                r_batch = X_batch_unnorm[:, idx_r].double()
                q_batch = X_batch_unnorm[:, idx_q].double()

                pred_price = heston_call_price_torch(
                    S0_batch, K_batch, T_batch, r_batch, q_batch,
                    kappa.double(), theta.double(), sigma.double(), rho.double(), v0.double()
                )

                loss = rmse_loss(pred_price, price_batch)
                val_losses.append(loss.item())
        val_rmse = float(np.mean(val_losses)) if val_losses else float('nan')
    history["val"].append(val_rmse)

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:4d} | Train RMSE: {train_rmse:.6f} | Val RMSE: {val_rmse:.6f}")


Epoch    1 | Train RMSE: 71519.743320 | Val RMSE: 50585.186560
Epoch   10 | Train RMSE: 9756.150221 | Val RMSE: 8482.829567
Epoch   20 | Train RMSE: 5982.017953 | Val RMSE: 4561.006856
Epoch   30 | Train RMSE: 4523.030701 | Val RMSE: 3316.353773
Epoch   40 | Train RMSE: 3496.186771 | Val RMSE: 2754.783018
Epoch   50 | Train RMSE: 3175.600325 | Val RMSE: 2810.920869
Epoch   60 | Train RMSE: 2720.796125 | Val RMSE: 2446.945312
Epoch   70 | Train RMSE: 2419.350311 | Val RMSE: 2257.896416
Epoch   80 | Train RMSE: 2267.978003 | Val RMSE: 2134.974899
Epoch   90 | Train RMSE: 2217.509110 | Val RMSE: 1938.526165
Epoch  100 | Train RMSE: 2355.983583 | Val RMSE: 2096.919636


In [11]:
# Evaluation and diagnostics
model.eval()
with torch.no_grad():
    X_full = X_tensor.to(device)
    price_full = y_price_tensor.squeeze(-1).to(device).double()
    params_full = model(X_full)
    kappa_full, theta_full, sigma_full, rho_full, v0_full = torch.unbind(params_full, dim=-1)

    X_full_unnorm = X_full * X_std_tensor + X_mean_tensor
    T_full = X_full_unnorm[:, idx_T].double()
    K_full = X_full_unnorm[:, idx_K].double()
    S0_full = X_full_unnorm[:, idx_S0].double()
    r_full = X_full_unnorm[:, idx_r].double()
    q_full = X_full_unnorm[:, idx_q].double()

    price_pred_full = heston_call_price_torch(
        S0_full, K_full, T_full, r_full, q_full,
        kappa_full.double(), theta_full.double(), sigma_full.double(), rho_full.double(), v0_full.double()
    )
    final_rmse = math.sqrt(torch.mean((price_pred_full - price_full) ** 2).item())

print(f"Final RMSE on full dataset: {final_rmse:.6f}")

params_np = {
    'kappa': kappa_full.cpu().numpy(),
    'theta': theta_full.cpu().numpy(),
    'sigma': sigma_full.cpu().numpy(),
    'rho': rho_full.cpu().numpy(),
    'v0': v0_full.cpu().numpy(),
}
for name, values in params_np.items():
    print(f"{name:>5}: mean={values.mean():.6f}, std={values.std():.6f}, min={values.min():.6f}, max={values.max():.6f}")

comparison = df.copy()
comparison["price_heston_nn"] = price_pred_full.cpu().numpy()
comparison["abs_error"] = np.abs(comparison["price_heston_nn"] - comparison["C_mkt"])
comparison["rel_error"] = comparison["abs_error"] / np.maximum(comparison["C_mkt"], 1e-8)

display_cols = ["S0", "K", "T", "C_mkt", "price_heston_nn", "abs_error", "rel_error"]
display(comparison[display_cols].head())

sample = comparison.sample(min(5, len(comparison)), random_state=SEED)
display(sample[display_cols])


Final RMSE on full dataset: 2325.760127
kappa: mean=25.010395, std=19.890818, min=0.666863, max=121.347252
theta: mean=1.427577, std=1.927889, min=0.001200, max=10.580199
sigma: mean=23.391321, std=17.802574, min=1.407637, max=111.390312
  rho: mean=0.999874, std=0.000256, min=0.998849, max=1.000000
   v0: mean=13.112293, std=12.450914, min=0.410428, max=88.609901


Unnamed: 0,S0,K,T,C_mkt,price_heston_nn,abs_error,rel_error
0,672.65,570,0.01,102.32,13631.574257,13529.254257,132.224924
1,672.65,575,0.01,102.28,12020.374471,11918.094471,116.524193
2,672.65,580,0.01,85.62,10498.175379,10412.555379,121.613588
3,672.65,590,0.01,75.65,7331.196906,7255.546906,95.909411
4,672.65,600,0.01,76.35,4852.168658,4775.818658,62.551652


Unnamed: 0,S0,K,T,C_mkt,price_heston_nn,abs_error,rel_error
506,672.65,583,0.04,96.99,-5397.331903,5494.321903,56.648334
1535,672.65,590,0.27,101.07,529.501209,428.431209,4.238955
1905,672.65,643,0.46,66.27,102.08876,35.81876,0.540497
930,672.65,663,0.1,23.25,2448.181186,2424.931186,104.298116
2676,672.65,640,2.19,119.9,1540.185814,1420.285814,11.845586
