In [1]:
# Deep Calibration of Heston Model with Neural Network Correction
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

print("PyTorch version:", torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
plt.style.use("seaborn-v0_8")


PyTorch version: 2.5.1
Using device: cuda


In [2]:
# Stage 1: Data loading and preprocessing
CSV_PATH = "../Heston_call.csv"  # set to your dataset path

df = pd.read_csv(CSV_PATH)
print("Loaded shape:", df.shape)
display(df.head())

required_cols = ["S0", "K", "T", "C_mkt"]
for col in required_cols:
    if col not in df.columns:
        raise ValueError(f"Missing required column: {col}")

if "r" not in df.columns:
    df["r"] = 0.02
if "q" not in df.columns:
    df["q"] = 0.0

df = df.dropna(subset=["S0", "K", "T", "C_mkt", "r", "q"]).reset_index(drop=True)
df = df[df["T"] > 0.0].reset_index(drop=True)

df["log_moneyness"] = np.log(df["K"] / df["S0"])

features = ["log_moneyness", "T", "r", "q"]
X_raw = df[features].values.astype(np.float32)
y_prices = df["C_mkt"].values.astype(np.float32)

X_mean = X_raw.mean(axis=0, keepdims=True)
X_std = X_raw.std(axis=0, keepdims=True) + 1e-8
X_norm = (X_raw - X_mean) / X_std

X_tensor = torch.from_numpy(X_norm).to(torch.float32)
T_tensor = torch.from_numpy(df["T"].values.astype(np.float64))
K_tensor = torch.from_numpy(df["K"].values.astype(np.float64))
S0_tensor = torch.from_numpy(df["S0"].values.astype(np.float64))
r_tensor = torch.from_numpy(df["r"].values.astype(np.float64))
q_tensor = torch.from_numpy(df["q"].values.astype(np.float64))
y_tensor = torch.from_numpy(y_prices.astype(np.float64))

print(f"Dataset size after cleaning: {len(df)}")


FileNotFoundError: [Errno 2] No such file or directory: '../Heston_call.csv'

In [None]:
# Stage 1: Heston characteristic function and semi-analytical pricing
INTEGRAL_U_MAX = 60.0
INTEGRAL_STEPS = 600
DTYPE = torch.float64


def heston_cf(u, T, S0, r, q, kappa, theta, sigma, rho, v0):
    i = torch.complex(torch.tensor(0.0, dtype=DTYPE, device=u.device),
                      torch.tensor(1.0, dtype=DTYPE, device=u.device))
    x0 = torch.log(S0)
    a = kappa * theta
    b = kappa - rho * sigma * i * u
    d = torch.sqrt(b * b + (sigma ** 2) * (u * u + i * u))
    g = (b - d) / (b + d)
    exp_neg_dT = torch.exp(-d * T)
    one_minus_g_exp = 1.0 - g * exp_neg_dT
    one_minus_g = 1.0 - g
    log_term = torch.log(one_minus_g_exp / one_minus_g)
    C = (r - q) * i * u * T + (a / (sigma ** 2)) * ((b - d) * T - 2.0 * log_term)
    D = ((b - d) / (sigma ** 2)) * ((1.0 - exp_neg_dT) / one_minus_g_exp)
    return torch.exp(C + D * v0 + i * u * x0)


def heston_call_price(S0, K, T, r, q, kappa, theta, sigma, rho, v0,
                      u_max=INTEGRAL_U_MAX, n_u=INTEGRAL_STEPS):
    device = S0.device
    u = torch.linspace(1e-6, u_max, n_u, device=device, dtype=DTYPE)
    u = u.unsqueeze(-1)  # (n_u, 1)
    cf1 = heston_cf(u - 1j, T, S0, r, q, kappa, theta, sigma, rho, v0)
    cf2 = heston_cf(u, T, S0, r, q, kappa, theta, sigma, rho, v0)
    numer1 = torch.exp(-1j * u * torch.log(K)) * cf1
    numer2 = torch.exp(-1j * u * torch.log(K)) * cf2
    integrand1 = (numer1 / (1j * u)).real
    integrand2 = (numer2 / (1j * u)).real
    du = u[1] - u[0]
    P1 = 0.5 + (du / math.pi) * integrand1.sum(dim=0)
    P2 = 0.5 + (du / math.pi) * integrand2.sum(dim=0)
    discount_stock = torch.exp(-q * T)
    discount_strike = torch.exp(-r * T)
    call = S0 * discount_stock * P1 - K * discount_strike * P2
    return call.real


In [None]:
# Stage 1: Global calibration via learnable parameters
class HestonCalibrator(nn.Module):
    def __init__(self):
        super().__init__()
        self.kappa_raw = nn.Parameter(torch.tensor(0.5, dtype=DTYPE))
        self.theta_raw = nn.Parameter(torch.tensor(0.04, dtype=DTYPE))
        self.sigma_raw = nn.Parameter(torch.tensor(0.3, dtype=DTYPE))
        self.rho_raw = nn.Parameter(torch.tensor(-0.1, dtype=DTYPE))
        self.v0_raw = nn.Parameter(torch.tensor(0.04, dtype=DTYPE))

    def forward(self):
        kappa = torch.nn.functional.softplus(self.kappa_raw)
        theta = torch.nn.functional.softplus(self.theta_raw)
        sigma = torch.nn.functional.softplus(self.sigma_raw)
        rho = torch.tanh(self.rho_raw)
        v0 = torch.nn.functional.softplus(self.v0_raw)
        return kappa, theta, sigma, rho, v0


def rmse_loss(pred, target):
    return torch.sqrt(torch.mean((pred - target) ** 2) + 1e-8)

calibrator = HestonCalibrator().to(device)
optimizer = optim.Adam(calibrator.parameters(), lr=5e-2)
num_epochs = 200

T_batch = T_tensor.to(device)
K_batch = K_tensor.to(device)
S0_batch = S0_tensor.to(device)
r_batch = r_tensor.to(device)
q_batch = q_tensor.to(device)
y_batch = y_tensor.to(device)

for epoch in range(1, num_epochs + 1):
    optimizer.zero_grad()
    params = calibrator()
    price_pred = heston_call_price(S0_batch, K_batch, T_batch, r_batch, q_batch, *params)
    loss = rmse_loss(price_pred, y_batch)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Calibration RMSE: {loss.item():.6f}")

kappa_star, theta_star, sigma_star, rho_star, v0_star = [p.detach().clone() for p in calibrator()]
print("Calibrated parameters:")
print(f"kappa={kappa_star.item():.4f}, theta={theta_star.item():.4f}, sigma={sigma_star.item():.4f}, rho={rho_star.item():.4f}, v0={v0_star.item():.4f}")


Epoch 001 | Calibration RMSE: 86447.610331
Epoch 010 | Calibration RMSE: 78579.250728
Epoch 020 | Calibration RMSE: 69684.098061
Epoch 030 | Calibration RMSE: 60744.664967
Epoch 040 | Calibration RMSE: 53399.520696
Epoch 050 | Calibration RMSE: 48090.888596
Epoch 060 | Calibration RMSE: 44658.174407
Epoch 070 | Calibration RMSE: 42277.060922
Epoch 080 | Calibration RMSE: 40453.579204
Epoch 090 | Calibration RMSE: 39038.485182
Epoch 100 | Calibration RMSE: 37880.988210
Epoch 110 | Calibration RMSE: 36910.970931
Epoch 120 | Calibration RMSE: 36080.710106
Epoch 130 | Calibration RMSE: 35357.997290
Epoch 140 | Calibration RMSE: 34720.454363
Epoch 150 | Calibration RMSE: 34151.679191
Epoch 160 | Calibration RMSE: 33639.514486
Epoch 170 | Calibration RMSE: 33174.620438
Epoch 180 | Calibration RMSE: 32749.705185
Epoch 190 | Calibration RMSE: 32358.963965
Epoch 200 | Calibration RMSE: 31997.712869
Calibrated parameters:
kappa=7.3171, theta=0.0210, sigma=6.8394, rho=0.9955, v0=4.8442


In [None]:
# Stage 2: Baseline Heston prices and residuals
with torch.no_grad():
    C_heston = heston_call_price(S0_batch, K_batch, T_batch, r_batch, q_batch,
                                 kappa_star, theta_star, sigma_star, rho_star, v0_star)

C_heston_np = C_heston.cpu().numpy()
residual = y_prices - C_heston_np

df["C_heston"] = C_heston_np
df["residual"] = residual

baseline_rmse = np.sqrt(np.mean((C_heston_np - y_prices) ** 2))
print(f"Baseline Heston RMSE: {baseline_rmse:.6f}")


Baseline Heston RMSE: 31963.055934


In [None]:
# Stage 3: Neural network learning the correction Î”C
class ResidualDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

X_residual = torch.from_numpy(X_norm).to(torch.float32)
y_residual = torch.from_numpy(residual.astype(np.float32)).unsqueeze(-1)

full_dataset = ResidualDataset(X_residual, y_residual)
dataset_size = len(full_dataset)
val_size = max(1, int(0.2 * dataset_size))
train_size = dataset_size - val_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
batch_size = min(64, train_size)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

class ResidualNet(nn.Module):
    def __init__(self, input_dim=4):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )

    def forward(self, x):
        return self.net(x)

residual_model = ResidualNet(input_dim=X_residual.shape[1]).to(device)
optimizer_res = optim.AdamW(residual_model.parameters(), lr=3e-4, weight_decay=1e-4)
num_epochs_nn = 5000

for epoch in range(1, num_epochs_nn + 1):
    residual_model.train()
    train_losses = []
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        optimizer_res.zero_grad()
        preds = residual_model(batch_X)
        loss = rmse_loss(preds, batch_y)
        loss.backward()
        optimizer_res.step()
        train_losses.append(loss.item())
    train_rmse = float(np.mean(train_losses))

    residual_model.eval()
    val_losses = []
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            preds = residual_model(batch_X)
            val_losses.append(rmse_loss(preds, batch_y).item())
    val_rmse = float(np.mean(val_losses))

    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Train RMSE: {train_rmse:.6f} | Val RMSE: {val_rmse:.6f}")


Epoch 001 | Train RMSE: 30854.336719 | Val RMSE: 32413.561415
Epoch 020 | Train RMSE: 24599.669029 | Val RMSE: 25173.185547
Epoch 040 | Train RMSE: 16510.442215 | Val RMSE: 17226.534397
Epoch 060 | Train RMSE: 15446.052902 | Val RMSE: 16444.409071
Epoch 080 | Train RMSE: 14940.844699 | Val RMSE: 15677.548503
Epoch 100 | Train RMSE: 13782.846624 | Val RMSE: 14971.727539
Epoch 120 | Train RMSE: 13150.771819 | Val RMSE: 14112.367296
Epoch 140 | Train RMSE: 12517.972545 | Val RMSE: 13436.052626
Epoch 160 | Train RMSE: 11868.493471 | Val RMSE: 12924.819010
Epoch 180 | Train RMSE: 11444.563588 | Val RMSE: 12486.921875
Epoch 200 | Train RMSE: 11136.032059 | Val RMSE: 12078.605035
Epoch 220 | Train RMSE: 10759.199477 | Val RMSE: 11704.506402
Epoch 240 | Train RMSE: 10429.088184 | Val RMSE: 11342.123589
Epoch 260 | Train RMSE: 10643.606027 | Val RMSE: 10995.494249
Epoch 280 | Train RMSE: 9859.470006 | Val RMSE: 10639.432617
Epoch 300 | Train RMSE: 9642.437612 | Val RMSE: 10271.061632
Epoch 320 

In [None]:
# Stage 4: Final pricing performance
residual_model.eval()
with torch.no_grad():
    X_residual_device = X_residual.to(device)
    deltaC_pred = residual_model(X_residual_device).cpu().numpy().flatten()

C_final = df["C_heston"].values + deltaC_pred
final_rmse = np.sqrt(np.mean((C_final - y_prices) ** 2))
improvement = baseline_rmse - final_rmse

print(f"Baseline Heston RMSE: {baseline_rmse:.6f}")
print(f"NN-corrected RMSE: {final_rmse:.6f}")
print(f"Improvement: {improvement:.6f}")

df["deltaC_pred"] = deltaC_pred
df["C_final"] = C_final

cols_to_show = ["K", "T", "C_mkt", "C_heston", "residual", "deltaC_pred", "C_final"]
display(df[cols_to_show].head())


Baseline Heston RMSE: 31963.055934
NN-corrected RMSE: 481.299399
Improvement: 31481.756535


Unnamed: 0,K,T,C_mkt,C_heston,residual,deltaC_pred,C_final
0,570.0,0.006484,93.78,150500.316295,-150406.536296,-138671.6875,11828.628795
1,575.0,0.006484,102.28,144125.83041,-144023.550411,-133091.265625,11034.564785
2,580.0,0.006484,85.62,137533.113171,-137447.493169,-126942.25,10590.863171
3,590.0,0.006484,75.65,123758.350983,-123682.700981,-114591.265625,9167.085358
4,600.0,0.006484,76.35,109314.236642,-109237.886644,-102600.625,6713.611642
