In [1]:
# Deep calibration notebook: Heston global fit + NN correction (Option C)
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

print("PyTorch version:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)


PyTorch version: 2.5.1
Using device: cuda


<torch._C.Generator at 0x7fc8e0220a50>

In [2]:
# Stage 0: Load option data and build normalized features
CSV_PATH = "../Heston_call.csv"  # adjust to your dataset

try:
    df = pd.read_csv(CSV_PATH)
except FileNotFoundError as exc:
    raise FileNotFoundError(f"CSV not found at {CSV_PATH}. Update the path.") from exc

required = ["S0", "K", "T", "C_mkt"]
for col in required:
    if col not in df.columns:
        raise ValueError(f"Missing required column: {col}")

if "r" not in df.columns:
    df["r"] = 0.02
if "q" not in df.columns:
    df["q"] = 0.0

df = df.dropna(subset=["S0", "K", "T", "C_mkt", "r", "q"]).reset_index(drop=True)
df = df[df["T"] > 0].reset_index(drop=True)

print(f"Dataset cleaned: {len(df)} rows")
display(df.head())

print(df[["S0", "K", "T", "r", "q", "C_mkt"]].describe())

# Create model-friendly features
df["log_moneyness"] = np.log(df["K"] / df["S0"])
df["C_norm"] = df["C_mkt"] / df["S0"]

feature_cols = ["log_moneyness", "T", "r", "q"]
X = df[feature_cols].values.astype(np.float32)
y_price = df["C_mkt"].values.astype(np.float32)

X_mean = X.mean(axis=0, keepdims=True)
X_std = X.std(axis=0, keepdims=True) + 1e-8
X_norm = (X - X_mean) / X_std

X_tensor = torch.from_numpy(X_norm).to(torch.float32).to(device)
y_price_tensor = torch.from_numpy(y_price).to(torch.float32).to(device)

S0_all = torch.from_numpy(df["S0"].values.astype(np.float32)).to(device)
K_all = torch.from_numpy(df["K"].values.astype(np.float32)).to(device)
T_all = torch.from_numpy(df["T"].values.astype(np.float32)).to(device)
r_all = torch.from_numpy(df["r"].values.astype(np.float32)).to(device)
q_all = torch.from_numpy(df["q"].values.astype(np.float32)).to(device)


Dataset cleaned: 2713 rows


Unnamed: 0,S0,K,C_mkt,T,iv,r,q
0,672.88,570.0,93.78,0.0064,0.8926,0.02,0.0
1,672.88,575.0,102.28,0.0064,0.8535,0.02,0.0
2,672.88,580.0,85.62,0.0064,0.8179,0.02,0.0
3,672.88,590.0,75.65,0.0064,0.7671,0.02,0.0
4,672.88,600.0,71.34,0.0064,0.687,0.02,0.0


            S0            K            T             r       q        C_mkt
count  2713.00  2713.000000  2713.000000  2.713000e+03  2713.0  2713.000000
mean    672.88   660.784003     0.408908  2.000000e-02     0.0    49.291489
std       0.00    68.803961     0.528502  3.470087e-18     0.0    48.306460
min     672.88   523.000000     0.006400  2.000000e-02     0.0     0.010000
25%     672.88   610.000000     0.055700  2.000000e-02     0.0     5.350000
50%     672.88   664.000000     0.209100  2.000000e-02     0.0    33.920000
75%     672.88   705.000000     0.589900  2.000000e-02     0.0    84.040000
max     672.88   820.000000     2.184400  2.000000e-02     0.0   200.050000


In [3]:
# Stage 1: Heston characteristic function and pricing utilities
DTYPE = torch.float64
U_MAX = 60.0
N_U = 600

def _to_dtype(x):
    return x.to(DTYPE)

def heston_cf(u, T, S0, r, q, kappa, theta, sigma, rho, v0):
    i = torch.complex(torch.tensor(0.0, dtype=DTYPE, device=u.device),
                      torch.tensor(1.0, dtype=DTYPE, device=u.device))
    x0 = torch.log(S0)
    a = kappa * theta
    b = kappa - rho * sigma * i * u
    d = torch.sqrt(b * b + (sigma ** 2) * (u * u + i * u))
    g = (b - d) / (b + d)
    exp_neg_dT = torch.exp(-d * T)
    one_minus_g_exp = 1.0 - g * exp_neg_dT
    one_minus_g = 1.0 - g
    C = (r - q) * i * u * T + (a / (sigma ** 2)) * ((b - d) * T - 2.0 * torch.log(one_minus_g_exp / one_minus_g))
    D = ((b - d) / (sigma ** 2)) * ((1.0 - exp_neg_dT) / one_minus_g_exp)
    return torch.exp(C + D * v0 + i * u * x0)

def heston_call_price_batch(S0, K, T, r, q, kappa, theta, sigma, rho, v0,
                             u_max=U_MAX, n_u=N_U):
    S0 = _to_dtype(S0)
    K = _to_dtype(K)
    T = _to_dtype(T)
    r = _to_dtype(r)
    q = _to_dtype(q)
    kappa = _to_dtype(kappa)
    theta = _to_dtype(theta)
    sigma = _to_dtype(sigma)
    rho = _to_dtype(rho)
    v0 = _to_dtype(v0)

    u = torch.linspace(1e-6, u_max, n_u, device=S0.device, dtype=DTYPE).unsqueeze(1)
    cf1 = heston_cf(u - 1j, T.unsqueeze(0), S0.unsqueeze(0), r.unsqueeze(0), q.unsqueeze(0),
                    kappa, theta, sigma, rho, v0)
    cf2 = heston_cf(u, T.unsqueeze(0), S0.unsqueeze(0), r.unsqueeze(0), q.unsqueeze(0),
                    kappa, theta, sigma, rho, v0)
    numer1 = torch.exp(-1j * u * torch.log(K.unsqueeze(0))) * cf1
    numer2 = torch.exp(-1j * u * torch.log(K.unsqueeze(0))) * cf2
    integrand1 = (numer1 / (1j * u)).real
    integrand2 = (numer2 / (1j * u)).real
    du = u[1] - u[0]
    P1 = 0.5 + (du / math.pi) * integrand1.sum(dim=0)
    P2 = 0.5 + (du / math.pi) * integrand2.sum(dim=0)
    discount_stock = torch.exp(-q * T)
    discount_strike = torch.exp(-r * T)
    price = S0 * discount_stock * P1 - K * discount_strike * P2
    return price.to(torch.float32)


In [None]:
# Stage 1: Global Heston calibration via RMSE minimization
max_calib_points = 500
idx = np.random.choice(len(df), size=min(max_calib_points, len(df)), replace=False)

S0_cal = S0_all[idx]
K_cal = K_all[idx]
T_cal = T_all[idx]
r_cal = r_all[idx]
q_cal = q_all[idx]
C_mkt_cal = y_price_tensor[idx]

kappa_raw = nn.Parameter(torch.tensor(1.5, device=device, dtype=torch.float32))
theta_raw = nn.Parameter(torch.tensor(0.04, device=device, dtype=torch.float32))
sigma_raw = nn.Parameter(torch.tensor(0.3, device=device, dtype=torch.float32))
rho_raw = nn.Parameter(torch.tensor(-0.3, device=device, dtype=torch.float32))
v0_raw = nn.Parameter(torch.tensor(0.04, device=device, dtype=torch.float32))

params = [kappa_raw, theta_raw, sigma_raw, rho_raw, v0_raw]
optimizer_calib = optim.Adam(params, lr=5e-2)


def project_params():
    kappa = torch.nn.functional.softplus(kappa_raw) + 1e-5
    theta = torch.nn.functional.softplus(theta_raw) + 1e-5
    sigma = torch.nn.functional.softplus(sigma_raw) + 1e-5
    rho = torch.tanh(rho_raw)
    v0 = torch.nn.functional.softplus(v0_raw) + 1e-6
    return kappa, theta, sigma, rho, v0


def calibration_step():
    params_proj = project_params()
    pred = heston_call_price_batch(S0_cal, K_cal, T_cal, r_cal, q_cal, *params_proj)
    loss = torch.sqrt(torch.mean((pred - C_mkt_cal) ** 2) + 1e-12)
    return loss, params_proj

num_epochs_calib = 5000
for epoch in range(1, num_epochs_calib + 1):
    optimizer_calib.zero_grad()
    loss, projected = calibration_step()
    loss.backward()
    optimizer_calib.step()
    if epoch % 10 == 0 or epoch == 1:
        kappa, theta, sigma, rho, v0 = projected
        print(f"[Calib] Epoch {epoch:03d} | RMSE={loss.item():.5f} | "
              f"kappa={kappa.item():.4f}, theta={theta.item():.4f}, sigma={sigma.item():.4f}, "
              f"rho={rho.item():.4f}, v0={v0.item():.4f}")

with torch.no_grad():
    kappa_star, theta_star, sigma_star, rho_star, v0_star = project_params()

print("Calibrated parameters (θ*):")
print(f"kappa={kappa_star.item():.6f}")
print(f"theta={theta_star.item():.6f}")
print(f"sigma={sigma_star.item():.6f}")
print(f"rho  ={rho_star.item():.6f}")
print(f"v0   ={v0_star.item():.6f}")


[Calib] Epoch 001 | RMSE=89971.53125 | kappa=1.7014, theta=0.7134, sigma=0.8544, rho=-0.2913, v0=0.7133
[Calib] Epoch 010 | RMSE=81952.32031 | kappa=2.0843, theta=0.5078, sigma=0.6602, rho=0.1470, v0=0.9630
[Calib] Epoch 020 | RMSE=74717.96094 | kappa=2.5644, theta=0.3341, sigma=0.7720, rho=0.5568, v0=1.2485
[Calib] Epoch 030 | RMSE=67161.14844 | kappa=3.0829, theta=0.2143, sigma=1.1115, rho=0.7930, v0=1.4715
[Calib] Epoch 040 | RMSE=59602.70703 | kappa=3.5038, theta=0.1385, sigma=1.5912, rho=0.9018, v0=1.6163
[Calib] Epoch 050 | RMSE=52976.27734 | kappa=3.5710, theta=0.0947, sigma=2.1555, rho=0.9484, v0=1.7598
[Calib] Epoch 060 | RMSE=47779.66406 | kappa=3.2731, theta=0.0715, sigma=2.7269, rho=0.9689, v0=1.9805
[Calib] Epoch 070 | RMSE=44235.32812 | kappa=2.9545, theta=0.0592, sigma=3.2547, rho=0.9787, v0=2.2284
[Calib] Epoch 080 | RMSE=42131.41406 | kappa=3.1794, theta=0.0512, sigma=3.7021, rho=0.9838, v0=2.3949
[Calib] Epoch 090 | RMSE=40502.44141 | kappa=3.7005, theta=0.0458, sigma

In [5]:
# Stage 2: Baseline Heston surface and residuals
with torch.no_grad():
    C_heston_all = heston_call_price_batch(S0_all, K_all, T_all, r_all, q_all,
                                           kappa_star, theta_star, sigma_star, rho_star, v0_star)

residual_all = y_price_tensor - C_heston_all
baseline_rmse = torch.sqrt(torch.mean((C_heston_all - y_price_tensor) ** 2)).item()
print(f"Baseline Heston RMSE: {baseline_rmse:.6f}")

df["C_heston"] = C_heston_all.cpu().numpy()
df["residual"] = residual_all.cpu().numpy()


Baseline Heston RMSE: 24263.408203


In [6]:
# Stage 3: Neural network learning ΔC
class ResidualDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

residual_dataset = ResidualDataset(X_tensor, residual_all.unsqueeze(-1))
n_total = len(residual_dataset)
n_val = max(1, int(0.2 * n_total))
n_train = n_total - n_val

train_dataset, val_dataset = random_split(residual_dataset, [n_train, n_val])
batch_size = min(64, n_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


class ResidualNet(nn.Module):
    def __init__(self, input_dim=4):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )

    def forward(self, x):
        return self.net(x)

model = ResidualNet(input_dim=X_tensor.shape[1]).to(device)
optimizer_nn = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)


def rmse(pred, target):
    return torch.sqrt(torch.mean((pred - target) ** 2) + 1e-12)

num_epochs_nn = 5000
for epoch in range(1, num_epochs_nn + 1):
    model.train()
    train_losses = []
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer_nn.zero_grad()
        preds = model(X_batch)
        loss = rmse(preds, y_batch)
        loss.backward()
        optimizer_nn.step()
        train_losses.append(loss.item())
    train_rmse = float(np.mean(train_losses)) if train_losses else float('nan')

    model.eval()
    val_losses = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = model(X_batch)
            val_losses.append(rmse(preds, y_batch).item())
    val_rmse = float(np.mean(val_losses)) if val_losses else float('nan')

    if epoch % 20 == 0 or epoch == 1:
        print(f"[NN] Epoch {epoch:03d} | Train RMSE(ΔC)={train_rmse:.6f} | Val RMSE(ΔC)={val_rmse:.6f}")


[NN] Epoch 001 | Train RMSE(ΔC)=24329.565717 | Val RMSE(ΔC)=22795.820312
[NN] Epoch 020 | Train RMSE(ΔC)=17682.176499 | Val RMSE(ΔC)=15744.543077
[NN] Epoch 040 | Train RMSE(ΔC)=11825.509105 | Val RMSE(ΔC)=11140.764431
[NN] Epoch 060 | Train RMSE(ΔC)=11226.797679 | Val RMSE(ΔC)=10576.873481
[NN] Epoch 080 | Train RMSE(ΔC)=10742.675465 | Val RMSE(ΔC)=10161.130534
[NN] Epoch 100 | Train RMSE(ΔC)=10382.998234 | Val RMSE(ΔC)=9775.043566
[NN] Epoch 120 | Train RMSE(ΔC)=10055.048053 | Val RMSE(ΔC)=9391.939833
[NN] Epoch 140 | Train RMSE(ΔC)=9575.043443 | Val RMSE(ΔC)=8973.908637
[NN] Epoch 160 | Train RMSE(ΔC)=9120.432071 | Val RMSE(ΔC)=8528.063802
[NN] Epoch 180 | Train RMSE(ΔC)=8690.441665 | Val RMSE(ΔC)=8087.319607
[NN] Epoch 200 | Train RMSE(ΔC)=8240.538215 | Val RMSE(ΔC)=7618.491699
[NN] Epoch 220 | Train RMSE(ΔC)=7770.850586 | Val RMSE(ΔC)=7114.085286
[NN] Epoch 240 | Train RMSE(ΔC)=7211.829877 | Val RMSE(ΔC)=6617.700738
[NN] Epoch 260 | Train RMSE(ΔC)=6821.890108 | Val RMSE(ΔC)=6102.3

KeyboardInterrupt: 

In [None]:
# Stage 4: Final performance with correction
model.eval()
with torch.no_grad():
    deltaC_pred = model(X_tensor).squeeze(-1)
    C_final = C_heston_all + deltaC_pred

rmse_baseline = torch.sqrt(torch.mean((C_heston_all - y_price_tensor) ** 2)).item()
rmse_final = torch.sqrt(torch.mean((C_final - y_price_tensor) ** 2)).item()
print(f"Baseline Heston RMSE : {rmse_baseline:.6f}")
print(f"Deep calibration RMSE: {rmse_final:.6f}")
print(f"Improvement          : {rmse_baseline - rmse_final:.6f}")

(df.assign(deltaC_pred=deltaC_pred.cpu().numpy(),
           C_final=C_final.cpu().numpy())
   [["K", "T", "C_mkt", "C_heston", "residual", "deltaC_pred", "C_final"]]
   .head())


Baseline Heston RMSE : 41136.265625
Deep calibration RMSE: 13826.534180
Improvement          : 27309.731445


Unnamed: 0,K,T,C_mkt,C_heston,residual,deltaC_pred,C_final
0,570.0,0.006484,93.78,183591.109375,-183497.328125,-92152.25,91438.859375
1,575.0,0.006484,102.28,177585.1875,-177482.90625,-89319.75,88265.4375
2,580.0,0.006484,85.62,171085.859375,-171000.234375,-86452.34375,84633.515625
3,590.0,0.006484,75.65,156645.140625,-156569.484375,-80597.265625,76047.875
4,600.0,0.006484,76.35,140397.390625,-140321.046875,-74776.679688,65620.710938
