In [83]:
# 1. Imports and configuration
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

print("PyTorch version:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Optional Black-Scholes utilities (uncomment if needed)
# import d10122f4_7b4a_4370_a214_edef01eb206b as bs_utils

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
plt.style.use("seaborn-v0_8")


PyTorch version: 2.5.1
Using device: cuda


In [84]:
# 2. Load the CSV and basic checks
CSV_PATH = "../Heston_call.csv"  # adjust if needed

df = pd.read_csv(CSV_PATH)
print("Loaded shape:", df.shape)
display(df.head())

required_cols = ["S0", "K", "T", "C_mkt"]
for col in required_cols:
    if col not in df.columns:
        raise ValueError(f"Missing required column: {col}")

if "r" not in df.columns:
    df["r"] = 0.02
if "q" not in df.columns:
    df["q"] = 0.0

df = df.dropna(subset=["S0", "K", "T", "r", "q", "C_mkt"]).reset_index(drop=True)
df = df[df["T"] > 0.0].reset_index(drop=True)

print("Final number of rows:", len(df))
print(df[["S0", "K", "T", "r", "q", "C_mkt"]].describe())


Loaded shape: (2725, 6)


Unnamed: 0,S0,K,C_mkt,T,type,iv
0,667.840027,570.0,93.78,0.006484,C,1e-05
1,667.840027,575.0,102.28,0.006484,C,1e-05
2,667.840027,580.0,85.62,0.006484,C,1e-05
3,667.840027,590.0,75.65,0.006484,C,1e-05
4,667.840027,600.0,76.35,0.006484,C,1e-05


Final number of rows: 2725
                S0            K            T             r       q  \
count  2725.000000  2725.000000  2725.000000  2.725000e+03  2725.0   
mean    667.840027   658.623119     0.407258  2.000000e-02     0.0   
std       0.000000    69.045304     0.526842  3.470084e-18     0.0   
min     667.840027   518.000000     0.006484  2.000000e-02     0.0   
25%     667.840027   609.000000     0.055799  2.000000e-02     0.0   
50%     667.840027   662.000000     0.209224  2.000000e-02     0.0   
75%     667.840027   703.000000     0.590045  2.000000e-02     0.0   
max     667.840027   815.000000     2.184566  2.000000e-02     0.0   

             C_mkt  
count  2725.000000  
mean     50.598261  
std      49.798740  
min       0.010000  
25%       4.760000  
50%      36.470000  
75%      86.110000  
max     205.280000  


In [85]:
# 3. Analytic Heston pricing utilities (characteristic function + call price)
INTEGRAL_U_MAX = 200.0
INTEGRAL_STEPS = 512

def heston_cf_torch(u, T, S0, r, q, kappa, theta, sigma, rho, v0):
    """Heston characteristic function Ï†(u) computed in PyTorch (broadcastable)."""
    dtype = torch.float64
    complex_dtype = torch.complex128

    u = u.to(complex_dtype)
    S0 = torch.clamp(S0.to(dtype), min=1e-8)
    T = torch.clamp(T.to(dtype), min=1e-8)
    r = r.to(dtype)
    q = q.to(dtype)
    kappa = torch.clamp(kappa.to(dtype), min=1e-6)
    theta = torch.clamp(theta.to(dtype), min=1e-6)
    sigma = torch.clamp(sigma.to(dtype), min=1e-6)
    rho = torch.clamp(rho.to(dtype), min=-0.999, max=0.999)
    v0 = torch.clamp(v0.to(dtype), min=1e-8)

    i = torch.complex(torch.tensor(0.0, dtype=dtype, device=u.device),
                      torch.tensor(1.0, dtype=dtype, device=u.device))
    x0 = torch.log(S0)
    a = kappa * theta
    b = kappa - rho * sigma * i * u
    d = torch.sqrt(b * b + (sigma ** 2) * (u * u + i * u))
    g = (b - d) / (b + d)

    exp_neg_dT = torch.exp(-d * T)
    one_minus_g_exp = 1.0 - g * exp_neg_dT
    one_minus_g = 1.0 - g

    C = (r - q) * i * u * T + (a / (sigma ** 2)) * ((b - d) * T - 2.0 * torch.log(one_minus_g_exp / one_minus_g))
    D = ((b - d) / (sigma ** 2)) * ((1.0 - exp_neg_dT) / one_minus_g_exp)
    return torch.exp(C + D * v0 + i * u * x0)

def heston_call_price_torch(S0, K, T, r, q, kappa, theta, sigma, rho, v0,
                            u_max=INTEGRAL_U_MAX, n_u=INTEGRAL_STEPS):
    """Semi-analytic Heston call price computed via numerical integration (RMSE loss driver)."""
    device = S0.device
    dtype = torch.float64
    complex_dtype = torch.complex128

    def _to_batch(x):
        return x.to(dtype).unsqueeze(0)

    S0_b = _to_batch(S0)
    K_b = _to_batch(K)
    T_b = _to_batch(T)
    r_b = _to_batch(r)
    q_b = _to_batch(q)
    kappa_b = _to_batch(kappa)
    theta_b = _to_batch(theta)
    sigma_b = _to_batch(sigma)
    rho_b = _to_batch(rho)
    v0_b = _to_batch(v0)

    u = torch.linspace(1e-6, u_max, n_u, device=device, dtype=dtype).unsqueeze(1)
    u_complex = u.to(complex_dtype)

    cf1 = heston_cf_torch(u_complex - 1j, T_b, S0_b, r_b, q_b, kappa_b, theta_b, sigma_b, rho_b, v0_b)
    numer1 = torch.exp(-1j * u_complex * torch.log(K_b)) * cf1
    integrand1 = (numer1 / (1j * u_complex)).real

    cf2 = heston_cf_torch(u_complex, T_b, S0_b, r_b, q_b, kappa_b, theta_b, sigma_b, rho_b, v0_b)
    numer2 = torch.exp(-1j * u_complex * torch.log(K_b)) * cf2
    integrand2 = (numer2 / (1j * u_complex)).real

    du = u[1] - u[0]
    P1 = 0.5 + (du / math.pi) * integrand1.sum(dim=0)
    P2 = 0.5 + (du / math.pi) * integrand2.sum(dim=0)

    discount_stock = torch.exp(-q_b * T_b)
    discount_strike = torch.exp(-r_b * T_b)
    call = S0_b * discount_stock * P1 - K_b * discount_strike * P2
    return call.squeeze(0).to(torch.float32)


In [86]:
# 4. Prepare inputs (features) and target prices
input_cols = ["T", "K", "S0", "r", "q"]
if "iv" in df.columns:
    input_cols.append("iv")

X_raw = df[input_cols].values.astype(np.float32)
y_price = df[["C_mkt"]].values.astype(np.float32)

X_mean = X_raw.mean(axis=0, keepdims=True)
X_std = X_raw.std(axis=0, keepdims=True) + 1e-8
X_norm = (X_raw - X_mean) / X_std

X_tensor = torch.from_numpy(X_norm)
y_price_tensor = torch.from_numpy(y_price)

print(f"Feature tensor shape: {X_tensor.shape}")
print(f"Price tensor shape: {y_price_tensor.shape}")


Feature tensor shape: torch.Size([2725, 6])
Price tensor shape: torch.Size([2725, 1])


In [87]:
# 5. Dataset and DataLoaders
class HestonDataset(Dataset):
    def __init__(self, X, price):
        self.X = X
        self.price = price

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.price[idx]

full_dataset = HestonDataset(X_tensor, y_price_tensor)
dataset_size = len(full_dataset)
val_size = max(1, int(0.2 * dataset_size)) if dataset_size >= 5 else max(1, dataset_size // 2)
train_size = dataset_size - val_size
if train_size <= 0:
    train_size, val_size = dataset_size, 0

if val_size > 0:
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
else:
    train_dataset, val_dataset = full_dataset, None

batch_size = min(64, max(1, train_size)) if train_size > 0 else 1
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) if val_dataset else None

print(f"Train size: {train_size}, Val size: {val_size}")
print(f"Batch size: {batch_size}")


Train size: 2180, Val size: 545
Batch size: 64


In [88]:
# 6. Neural network mapping inputs -> Heston parameters (kappa, theta, sigma, rho, v0)
class HestonParamNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 5),
        )

    def forward(self, x):
        raw_params = self.net(x)
        kappa_raw, theta_raw, sigma_raw, rho_raw, v0_raw = torch.unbind(raw_params, dim=-1)
        kappa = nn.functional.softplus(kappa_raw) + 1e-4
        theta = nn.functional.softplus(theta_raw) + 1e-4
        sigma = nn.functional.softplus(sigma_raw) + 1e-4
        rho = torch.tanh(rho_raw)
        v0 = nn.functional.softplus(v0_raw) + 1e-6
        return torch.stack([kappa, theta, sigma, rho, v0], dim=-1)

input_dim = X_tensor.shape[1]
model = HestonParamNet(input_dim=input_dim).to(device)
print(model)


HestonParamNet(
  (net): Sequential(
    (0): Linear(in_features=6, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=5, bias=True)
  )
)


In [89]:
# 7. Loss function (RMSE on prices) and optimizer
def rmse_loss(pred, target):
    mse = torch.mean((pred - target) ** 2)
    return torch.sqrt(mse + 1e-8)

optimizer = optim.Adam(model.parameters(), lr=1e-3)

X_mean_tensor = torch.from_numpy(X_mean).to(device)
X_std_tensor = torch.from_numpy(X_std).to(device)
column_index = {col: idx for idx, col in enumerate(input_cols)}
idx_T = column_index["T"]
idx_K = column_index["K"]
idx_S0 = column_index["S0"]
idx_r = column_index["r"]
idx_q = column_index["q"]


In [90]:
# 8. Training loop (analytic Heston pricing inside)
num_epochs = 1000
history = {"train": [], "val": []}

for epoch in range(1, num_epochs + 1):
    model.train()
    train_losses = []
    for X_batch, price_batch in train_loader:
        X_batch = X_batch.to(device)
        price_batch = price_batch.to(device).squeeze(-1)

        optimizer.zero_grad()
        params = model(X_batch)
        kappa, theta, sigma, rho, v0 = torch.unbind(params, dim=-1)

        X_batch_unnorm = X_batch * X_std_tensor + X_mean_tensor
        T_batch = X_batch_unnorm[:, idx_T]
        K_batch = X_batch_unnorm[:, idx_K]
        S0_batch = X_batch_unnorm[:, idx_S0]
        r_batch = X_batch_unnorm[:, idx_r]
        q_batch = X_batch_unnorm[:, idx_q]

        pred_price = heston_call_price_torch(
            S0_batch, K_batch, T_batch, r_batch, q_batch,
            kappa, theta, sigma, rho, v0
        )

        loss = rmse_loss(pred_price, price_batch)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    train_rmse = float(np.mean(train_losses)) if train_losses else float('nan')
    history["train"].append(train_rmse)

    model.eval()
    val_rmse = float('nan')
    if val_loader is not None:
        val_losses = []
        with torch.no_grad():
            for X_batch, price_batch in val_loader:
                X_batch = X_batch.to(device)
                price_batch = price_batch.to(device).squeeze(-1)

                params = model(X_batch)
                kappa, theta, sigma, rho, v0 = torch.unbind(params, dim=-1)

                X_batch_unnorm = X_batch * X_std_tensor + X_mean_tensor
                T_batch = X_batch_unnorm[:, idx_T]
                K_batch = X_batch_unnorm[:, idx_K]
                S0_batch = X_batch_unnorm[:, idx_S0]
                r_batch = X_batch_unnorm[:, idx_r]
                q_batch = X_batch_unnorm[:, idx_q]

                pred_price = heston_call_price_torch(
                    S0_batch, K_batch, T_batch, r_batch, q_batch,
                    kappa, theta, sigma, rho, v0
                )

                loss = rmse_loss(pred_price, price_batch)
                val_losses.append(loss.item())
        val_rmse = float(np.mean(val_losses)) if val_losses else float('nan')
    history["val"].append(val_rmse)

    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:4d} | Train RMSE (C_Heston vs C_mkt): {train_rmse:.6f} | Val RMSE: {val_rmse:.6f}")


Epoch    1 | Train RMSE (C_Heston vs C_mkt): 70509.547545 | Val RMSE: 56275.167969
Epoch    5 | Train RMSE (C_Heston vs C_mkt): 18543.738630 | Val RMSE: 20254.123589
Epoch   10 | Train RMSE (C_Heston vs C_mkt): 11918.041309 | Val RMSE: 14523.875977
Epoch   15 | Train RMSE (C_Heston vs C_mkt): 9403.926060 | Val RMSE: 11471.277724
Epoch   20 | Train RMSE (C_Heston vs C_mkt): 10027.888149 | Val RMSE: 11043.816026
Epoch   25 | Train RMSE (C_Heston vs C_mkt): 6240.623807 | Val RMSE: 7626.850939
Epoch   30 | Train RMSE (C_Heston vs C_mkt): 5578.779164 | Val RMSE: 6656.103841
Epoch   35 | Train RMSE (C_Heston vs C_mkt): 4872.521756 | Val RMSE: 6005.335097
Epoch   40 | Train RMSE (C_Heston vs C_mkt): 4697.660393 | Val RMSE: 5705.691732
Epoch   45 | Train RMSE (C_Heston vs C_mkt): 4443.510812 | Val RMSE: 5283.103380
Epoch   50 | Train RMSE (C_Heston vs C_mkt): 4159.241985 | Val RMSE: 5551.345052
Epoch   55 | Train RMSE (C_Heston vs C_mkt): 4047.675642 | Val RMSE: 4830.551975
Epoch   60 | Train 

In [91]:
# 9. Evaluation and inspection of learned parameters
model.eval()
with torch.no_grad():
    X_full = X_tensor.to(device)
    price_full = y_price_tensor.squeeze(-1).to(device)
    params_full = model(X_full)
    kappa_full, theta_full, sigma_full, rho_full, v0_full = torch.unbind(params_full, dim=-1)

    X_full_unnorm = X_full * X_std_tensor + X_mean_tensor
    T_full = X_full_unnorm[:, idx_T]
    K_full = X_full_unnorm[:, idx_K]
    S0_full = X_full_unnorm[:, idx_S0]
    r_full = X_full_unnorm[:, idx_r]
    q_full = X_full_unnorm[:, idx_q]

    price_pred_full = heston_call_price_torch(
        S0_full, K_full, T_full, r_full, q_full,
        kappa_full, theta_full, sigma_full, rho_full, v0_full
    )

    final_rmse = math.sqrt(torch.mean((price_pred_full - price_full) ** 2).item())

print(f"Final RMSE on full dataset (C_Heston vs C_mkt): {final_rmse:.6f}")

params_np = {
    'kappa': kappa_full.cpu().numpy(),
    'theta': theta_full.cpu().numpy(),
    'sigma': sigma_full.cpu().numpy(),
    'rho': rho_full.cpu().numpy(),
    'v0': v0_full.cpu().numpy(),
}
for name, values in params_np.items():
    print(f"{name:>5}: mean={values.mean():.6f}, std={values.std():.6f}, min={values.min():.6f}, max={values.max():.6f}")

comparison = df.copy()
comparison["price_heston_nn"] = price_pred_full.cpu().numpy()
comparison["abs_error"] = np.abs(comparison["price_heston_nn"] - comparison["C_mkt"])
comparison["rel_error"] = comparison["abs_error"] / np.maximum(comparison["C_mkt"], 1e-8)

display_cols = ["S0", "K", "T", "C_mkt", "price_heston_nn", "abs_error", "rel_error"]
display(comparison[display_cols].head())

sample = comparison.sample(min(5, len(comparison)), random_state=SEED)
display(sample[display_cols])


Final RMSE on full dataset (C_Heston vs C_mkt): 1025.060181
kappa: mean=66.349487, std=59.386948, min=0.653494, max=248.257767
theta: mean=1.652600, std=3.379706, min=0.000101, max=17.967478
sigma: mean=48.314754, std=42.134430, min=1.315583, max=152.131012
  rho: mean=0.999979, std=0.000054, min=0.999699, max=1.000000
   v0: mean=34.225864, std=32.846226, min=0.476586, max=167.515839


Unnamed: 0,S0,K,T,C_mkt,price_heston_nn,abs_error,rel_error
0,667.840027,570.0,0.006484,93.78,24893.527344,24799.747344,264.446016
1,667.840027,575.0,0.006484,102.28,22456.355469,22354.075469,218.55764
2,667.840027,580.0,0.006484,85.62,20446.423828,20360.803828,237.804296
3,667.840027,590.0,0.006484,75.65,16923.255859,16847.605859,222.704638
4,667.840027,600.0,0.006484,76.35,13198.65332,13122.30332,171.870377


Unnamed: 0,S0,K,T,C_mkt,price_heston_nn,abs_error,rel_error
2228,667.840027,675.0,0.8421,48.88,268.829071,219.949071,4.499776
1344,667.840027,560.0,0.209224,112.96,1068.023071,955.063071,8.454878
1418,667.840027,648.0,0.209224,35.29,277.32016,242.03016,6.858321
1833,667.840027,576.0,0.373607,111.2,537.19104,425.99104,3.830855
605,667.840027,681.0,0.036621,3.36,-389.206329,392.566329,116.835217
