## Heston Neural Network Calibration from `options_data.csv`

Ce notebook étend la calibration Heston classique (optimisation directe de `u`) en ajoutant :

- la génération d'un jeu de données synthétique de prix d'options via le modèle de Heston,
- l'entraînement d'un réseau de neurones qui apprend à inverser Heston (prix → paramètres),
- l'application de ce réseau sur `options_data.csv` pour obtenir une estimation rapide des paramètres.

L'objectif est de réduire le RMSE sur les paramètres calibrés et d'accélérer la calibration sur de gros jeux de données.


In [7]:
import math
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Import du module Heston torch
import sys
root = Path.cwd()  # racine du projet
sys.path.append(str(root / "Heston" / "NN"))

from heston_torch import HestonParams, carr_madan_call_torch

# Configuration numérique
torch.set_default_dtype(torch.float64)
device = torch.device("cpu")

# Chargement des données de marché
CSV_PATH = root / "option_data" / "options_data.csv"
df = pd.read_csv(CSV_PATH)
required_cols = {"S0", "K", "T", "C_mkt"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing columns in options_data.csv: {missing}")

# On ne garde que les colonnes nécessaires et on supprime les NaN
df = df[list(required_cols)].dropna().copy()

n_total = len(df)
max_points = 300  # taille de grille utilisée pour la calibration et l'entraînement NN
if n_total > max_points:
    df = df.sort_values("T")
    idx = np.linspace(0, n_total - 1, max_points, dtype=int)
    df = df.iloc[idx]

df = df.reset_index(drop=True)
print(f"Utilisation de {len(df)} quotes (sur {n_total}) pour la grille de calibration / NN.")
df.head()


Utilisation de 300 quotes (sur 895) pour la grille de calibration / NN.


Unnamed: 0,K,C_mkt,S0,T
0,570.0,93.78,671.929993,0.004065
1,580.0,85.62,671.929993,0.004065
2,615.0,58.22,671.929993,0.004065
3,635.0,37.0,671.929993,0.004065
4,640.0,29.92,671.929993,0.004065


In [8]:
# Préparation des tenseurs PyTorch à partir de la grille extraite de options_data.csv
S0_t = torch.tensor(df["S0"].to_numpy(), dtype=torch.float64, device=device)
K_t = torch.tensor(df["K"].to_numpy(), dtype=torch.float64, device=device)
T_t = torch.tensor(df["T"].to_numpy(), dtype=torch.float64, device=device)
C_mkt_t = torch.tensor(df["C_mkt"].to_numpy(), dtype=torch.float64, device=device)

r = 0.02  # taux sans risque constant (hypothèse)
q = 0.0   # taux de dividende (hypothèse)

print("T range: ", float(T_t.min()), "→", float(T_t.max()))


def prices_from_params(params: HestonParams) -> torch.Tensor:
    """Calcule les prix Heston sur toute la grille (S0_t, K_t, T_t) pour un jeu de paramètres donné."""
    prices = []
    for S0_i, K_i, T_i in zip(S0_t, K_t, T_t):
        price_i = carr_madan_call_torch(S0_i, r, q, T_i, params, K_i)
        prices.append(price_i)
    return torch.stack(prices)


T range:  0.0040652934562721 → 0.8396817318124365


In [9]:
# Génération de données synthétiques (paramètres Heston aléatoires → prix Heston sur la même grille)

def sample_heston_params(n_samples: int) -> torch.Tensor:
    """Échantillonne des paramètres Heston réalistes.

    Retourne un tenseur shape (n_samples, 5) avec colonnes [kappa, theta, sigma, rho, v0].
    """
    kappa = torch.empty(n_samples, dtype=torch.float64).uniform_(0.5, 5.0)
    theta = torch.empty(n_samples, dtype=torch.float64).uniform_(0.02, 0.20)
    sigma = torch.empty(n_samples, dtype=torch.float64).uniform_(0.1, 0.8)
    rho = torch.empty(n_samples, dtype=torch.float64).uniform_(-0.9, 0.0)
    v0 = torch.empty(n_samples, dtype=torch.float64).uniform_(0.01, 0.20)
    return torch.stack([kappa, theta, sigma, rho, v0], dim=1)


def generate_synthetic_dataset(n_samples: int, verbose: bool = True) -> tuple[torch.Tensor, torch.Tensor]:
    """Génère un jeu (X, y) pour entraîner le NN inverse.

    - X : prix Heston shape (n_samples, n_quotes)
    - y : paramètres Heston shape (n_samples, 5)
    """
    n_quotes = S0_t.shape[0]
    params_mat = sample_heston_params(n_samples)
    X_list: list[torch.Tensor] = []

    for i in range(n_samples):
        if verbose and (i + 1) % max(1, n_samples // 10) == 0:
            print(f"Synthetic sample {i+1}/{n_samples}")
        kappa, theta, sigma, rho, v0 = params_mat[i]
        params = HestonParams(
            kappa=kappa,
            theta=theta,
            sigma=sigma,
            rho=rho,
            v0=v0,
        )
        with torch.no_grad():
            prices = prices_from_params(params)
        X_list.append(prices)

    X = torch.stack(X_list, dim=0)  # (n_samples, n_quotes)
    y = params_mat.to(dtype=torch.float64)  # (n_samples, 5)
    return X, y


# Hyperparamètres du dataset synthétique
n_train_samples = 2000  # augmenter si temps de calcul acceptable
X_all, y_all = generate_synthetic_dataset(n_train_samples, verbose=True)

print("X_all shape:", X_all.shape)
print("y_all shape:", y_all.shape)


Synthetic sample 200/2000
Synthetic sample 400/2000
Synthetic sample 600/2000
Synthetic sample 800/2000
Synthetic sample 1000/2000
Synthetic sample 1200/2000
Synthetic sample 1400/2000
Synthetic sample 1600/2000
Synthetic sample 1800/2000
Synthetic sample 2000/2000
X_all shape: torch.Size([2000, 300])
y_all shape: torch.Size([2000, 5])


In [10]:
# Normalisation des prix (feature-wise) et split train/val
feature_mean = X_all.mean(dim=0)
feature_std = X_all.std(dim=0) + 1e-6
X_all_norm = (X_all - feature_mean) / feature_std

n_samples = X_all_norm.shape[0]
idx = torch.randperm(n_samples)
X_all_norm = X_all_norm[idx]
y_all = y_all[idx]

val_ratio = 0.2
n_val = int(val_ratio * n_samples)
X_val = X_all_norm[:n_val]
y_val = y_all[:n_val]
X_train = X_all_norm[n_val:]
y_train = y_all[n_val:]

print(f"Train samples: {X_train.shape[0]}, Val samples: {X_val.shape[0]}")

batch_size = 64
train_ds = TensorDataset(X_train, y_train)
val_ds = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)


Train samples: 1600, Val samples: 400


In [11]:
class HestonInverseNN(nn.Module):
    def __init__(self, n_quotes: int, hidden_sizes: list[int] | None = None):
        super().__init__()
        if hidden_sizes is None:
            hidden_sizes = [256, 256, 128]
        layers: list[nn.Module] = []
        in_dim = n_quotes
        for h in hidden_sizes:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.ReLU())
            in_dim = h
        layers.append(nn.Linear(in_dim, 5))  # [kappa, theta, sigma, rho, v0]
        self.net = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)


n_quotes = X_all.shape[1]
model = HestonInverseNN(n_quotes=n_quotes)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

print(model)


HestonInverseNN(
  (net): Sequential(
    (0): Linear(in_features=300, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=5, bias=True)
  )
)


In [12]:
n_epochs = 50
best_val_loss = float("inf")
best_state_dict = None

for epoch in range(1, n_epochs + 1):
    model.train()
    train_loss_sum = 0.0
    n_train_batches = 0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        train_loss_sum += loss.item()
        n_train_batches += 1

    train_loss = train_loss_sum / max(1, n_train_batches)

    model.eval()
    val_loss_sum = 0.0
    n_val_batches = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            preds = model(xb)
            loss = criterion(preds, yb)
            val_loss_sum += loss.item()
            n_val_batches += 1
    val_loss = val_loss_sum / max(1, n_val_batches)

    print(f"Epoch {epoch:03d} | train MSE={train_loss:.4e} | val MSE={val_loss:.4e}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state_dict = model.state_dict()

if best_state_dict is not None:
    model.load_state_dict(best_state_dict)
    print(f"Loaded best model (val MSE={best_val_loss:.4e})")


Epoch 001 | train MSE=7.7197e-01 | val MSE=3.6728e-01
Epoch 002 | train MSE=3.0669e-01 | val MSE=2.5851e-01
Epoch 003 | train MSE=2.5023e-01 | val MSE=2.2279e-01
Epoch 004 | train MSE=2.1637e-01 | val MSE=1.9342e-01
Epoch 005 | train MSE=1.8882e-01 | val MSE=1.8027e-01
Epoch 006 | train MSE=1.7878e-01 | val MSE=1.4682e-01
Epoch 007 | train MSE=1.5190e-01 | val MSE=1.2591e-01
Epoch 008 | train MSE=1.2806e-01 | val MSE=1.1384e-01
Epoch 009 | train MSE=1.2115e-01 | val MSE=1.1265e-01
Epoch 010 | train MSE=1.1545e-01 | val MSE=9.9284e-02
Epoch 011 | train MSE=1.0595e-01 | val MSE=1.4751e-01
Epoch 012 | train MSE=1.0534e-01 | val MSE=8.1049e-02
Epoch 013 | train MSE=9.4557e-02 | val MSE=8.3263e-02
Epoch 014 | train MSE=7.0109e-02 | val MSE=6.6195e-02
Epoch 015 | train MSE=7.9168e-02 | val MSE=1.0535e-01
Epoch 016 | train MSE=9.7506e-02 | val MSE=9.1041e-02
Epoch 017 | train MSE=6.9552e-02 | val MSE=8.6717e-02
Epoch 018 | train MSE=7.4135e-02 | val MSE=1.3002e-01
Epoch 019 | train MSE=7.2439

In [13]:
# Application du réseau sur les quotes réelles de options_data.csv

# On reconstruit le vecteur de prix de marché sur la même grille
y_quotes = C_mkt_t.to(dtype=torch.float64)
X_market = y_quotes.unsqueeze(0)  # shape (1, n_quotes)
X_market_norm = (X_market - feature_mean) / feature_std

model.eval()
with torch.no_grad():
    theta_hat = model(X_market_norm.to(device)).cpu().numpy()[0]

kappa_hat, theta_hat_v, sigma_hat, rho_hat, v0_hat = theta_hat
print("Paramètres Heston estimés par le NN:")
print(f"kappa ≈ {kappa_hat:.4f}")
print(f"theta ≈ {theta_hat_v:.4f}")
print(f"sigma ≈ {sigma_hat:.4f}")
print(f"rho   ≈ {rho_hat:.4f}")
print(f"v0    ≈ {v0_hat:.4f}")

# Construction d'un objet HestonParams à partir de ces estimations
params_nn = HestonParams(
    kappa=torch.tensor(kappa_hat, dtype=torch.float64, device=device),
    theta=torch.tensor(theta_hat_v, dtype=torch.float64, device=device),
    sigma=torch.tensor(sigma_hat, dtype=torch.float64, device=device),
    rho=torch.tensor(rho_hat, dtype=torch.float64, device=device),
    v0=torch.tensor(v0_hat, dtype=torch.float64, device=device),
)


Paramètres Heston estimés par le NN:
kappa ≈ 8452878.4924
theta ≈ 5152.2615
sigma ≈ 2574667.4631
rho   ≈ -184377.0146
v0    ≈ 118355.2833


In [14]:
# Comparaison rapide des prix NN-Heston vs marché sur la grille
with torch.no_grad():
    C_heston_nn = prices_from_params(params_nn).cpu().numpy()

cmp_df = df.copy()
cmp_df["C_heston_nn"] = C_heston_nn
print("Aperçu des prix (marché vs NN-Heston):")
from IPython.display import display

display(cmp_df[["S0", "K", "T", "C_mkt", "C_heston_nn"]].head(10))


Aperçu des prix (marché vs NN-Heston):


Unnamed: 0,S0,K,T,C_mkt,C_heston_nn
0,671.929993,570.0,0.004065,93.78,102.082465
1,671.929993,580.0,0.004065,85.62,92.085115
2,671.929993,615.0,0.004065,58.22,57.094323
3,671.929993,635.0,0.004065,37.0,37.099475
4,671.929993,640.0,0.004065,29.92,32.100734
5,671.929993,645.0,0.004065,28.15,27.101972
6,671.929993,654.0,0.004065,19.65,18.104145
7,671.929993,657.0,0.004065,15.83,15.104869
8,671.929993,660.0,0.004065,12.23,12.105625
9,671.929993,663.0,0.004065,9.91,9.106569
