In [37]:
# Neural network approximation of implied volatility surface
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import plotly.graph_objects as go
from plotly.subplots import make_subplots

torch.manual_seed(42)
np.random.seed(42)


In [38]:
CSV_PATH = "../options_SPY_calls.csv"
df = pd.read_csv(CSV_PATH)
required_cols = {"S0", "K", "T", "C_mkt"}
missing = required_cols - set(df.columns)

if missing:
    raise ValueError(f"Missing required columns: {missing}")

S0 = float(df["S0"].iloc[0])
df = df[df["T"] > 0.0].reset_index(drop=True)
print(f"Loaded {CSV_PATH} with {len(df)} rows | Reference S0 = {S0:.4f}")
display(df.head())


Loaded ../options_SPY_calls.csv with 2713 rows | Reference S0 = 672.6500


Unnamed: 0,S0,K,C_mkt,T,type,iv
0,672.65,570,102.32,0.01,C,0.81
1,672.65,575,102.28,0.01,C,0.77
2,672.65,580,85.62,0.01,C,0.76
3,672.65,590,75.65,0.01,C,0.68
4,672.65,600,76.35,0.01,C,0.62


In [39]:
R_RATE = 0.4
Q_RATE = 0.0

def normal_cdf(x: float) -> float:
    return 0.5 * (1.0 + math.erf(x / math.sqrt(2.0)))

def bs_call_price(S0, K, T, r, q, vol):
    if T <= 0:
        return max(S0 * math.exp(-q * T) - K * math.exp(-r * T), 0.0)
    vol = max(vol, 1e-8)
    sqrt_T = math.sqrt(T)
    d1 = (math.log(S0 / K) + (r - q + 0.5 * vol * vol) * T) / (vol * sqrt_T)
    d2 = d1 - vol * sqrt_T
    discount_dom = math.exp(-r * T)
    discount_for = math.exp(-q * T)
    return S0 * discount_for * normal_cdf(d1) - K * discount_dom * normal_cdf(d2)


In [40]:
# Préparation des features : (T, K, S0, r).
# Le réseau apprendra à sortir une volatilité implicite qui sera réinjectée dans le pricer Black-Scholes.
feature_matrix = np.column_stack([
    df["T"].to_numpy(dtype=np.float32),
    df["K"].to_numpy(dtype=np.float32),
    df["S0"].to_numpy(dtype=np.float32),
    np.full(len(df), R_RATE, dtype=np.float32),
])
X = torch.from_numpy(feature_matrix)

# Les prix marché servent de cible : on compare le call BS(S0,K,T,r,iv_pred) à C_mkt.
price_targets = torch.from_numpy(df["C_mkt"].to_numpy(dtype=np.float32)).unsqueeze(1)

dataset = TensorDataset(X, price_targets)
batch_size = min(128, len(dataset))
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
print(f"Training samples: {len(dataset)} | Batch size: {batch_size}")


Training samples: 2713 | Batch size: 128


In [41]:
class ImpliedVolNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Softplus(),
        )

    def forward(self, x):
        return self.net(x)

model = ImpliedVolNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(f"Using device: {device}")


Using device: cuda


In [42]:
def bs_call_price_tensor(S0, K, T, r, q, vol):
    eps = 1e-8
    vol_clamped = torch.clamp(vol, min=eps)
    T_clamped = torch.clamp(T, min=eps)
    S0_clamped = torch.clamp(S0, min=eps)
    K_clamped = torch.clamp(K, min=eps)
    sqrt_T = torch.sqrt(T_clamped)
    d1 = (torch.log(S0_clamped / K_clamped) + (r - q + 0.5 * vol_clamped ** 2) * T_clamped) / (vol_clamped * sqrt_T)
    d2 = d1 - vol_clamped * sqrt_T
    normal_cdf = lambda x: 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
    discount_dom = torch.exp(-r * T_clamped)
    discount_for = torch.exp(-q * T_clamped)
    return S0_clamped * discount_for * normal_cdf(d1) - K_clamped * discount_dom * normal_cdf(d2)

epochs = 300
model.train()
for epoch in range(1, epochs + 1):
    epoch_loss = 0.0
    for batch_X, batch_price in train_loader:
        batch_X = batch_X.to(device)
        batch_price = batch_price.to(device)
        optimizer.zero_grad()
        iv_pred = model(batch_X)
        T_batch = batch_X[:, 0:1]
        K_batch = batch_X[:, 1:2]
        S0_batch = batch_X[:, 2:3]
        r_batch = batch_X[:, 3:4]
        q_batch = torch.zeros_like(r_batch)
        C_pred = bs_call_price_tensor(S0_batch, K_batch, T_batch, r_batch, q_batch, iv_pred)
        mse = torch.mean((C_pred - batch_price) ** 2)
        rmse = torch.sqrt(mse + 1e-12)
        rmse.backward()
        optimizer.step()
        epoch_loss += rmse.item()
    epoch_rmse = epoch_loss / len(train_loader)
    if epoch % 50 == 0 or epoch == 1:
        print(f"Epoch {epoch:04d} | Training RMSE = {epoch_rmse:.6f}")

model.eval()
with torch.no_grad():
    full_X = X.to(device)
    full_prices = price_targets.to(device)
    iv_pred_full = model(full_X)
    q_full = torch.zeros_like(full_X[:, 0:1])
    C_pred_full = bs_call_price_tensor(full_X[:, 2:3], full_X[:, 1:2], full_X[:, 0:1], full_X[:, 3:4], q_full, iv_pred_full)
    final_rmse = torch.sqrt(torch.mean((C_pred_full - full_prices) ** 2)).item()

print(f"Final training RMSE on entire set: {final_rmse:.6f}")
df["iv_nn"] = iv_pred_full.cpu().numpy().ravel()
display(df[["K", "T", "C_mkt", "iv_nn"]].head())


Epoch 0001 | Training RMSE = 93.891540
Epoch 0050 | Training RMSE = 94.755938
Epoch 0100 | Training RMSE = 92.975703
Epoch 0150 | Training RMSE = 95.344902
Epoch 0200 | Training RMSE = 94.259898
Epoch 0250 | Training RMSE = 94.543566
Epoch 0300 | Training RMSE = 95.232685
Final training RMSE on entire set: 94.976746


Unnamed: 0,K,T,C_mkt,iv_nn
0,570,0.01,102.32,1.439005e-32
1,575,0.01,102.28,1.433001e-32
2,580,0.01,85.62,1.503732e-32
3,590,0.01,75.65,1.763617e-32
4,600,0.01,76.35,2.068399e-32


In [43]:
K_grid = np.arange(np.floor((S0 - 100.0) / 10) * 10, np.floor((S0 + 100.0) / 10) * 10 + 1, 1.0)
T_grid = np.arange(0.1, 2.0, 0.01)
KK, TT = np.meshgrid(K_grid, T_grid)

grid_features = np.column_stack([
    TT.ravel().astype(np.float32),
    KK.ravel().astype(np.float32),
    np.full(TT.size, S0, dtype=np.float32),
    np.full(TT.size, R_RATE, dtype=np.float32),
])

model.eval()
with torch.no_grad():
    iv_surface = model(torch.from_numpy(grid_features).to(device)).cpu().numpy().reshape(TT.shape)

IV_surface_NN = iv_surface
print(f"IV surface shape: {IV_surface_NN.shape}")


IV surface shape: (190, 201)


In [44]:
fig = go.Figure(data=[go.Surface(x=KK, y=TT, z=IV_surface_NN, opacity=0.85)])
fig.update_layout(
    title="Neural Network Implied Volatility Surface (inputs: T, K, S0, r=0.4)",
    scene=dict(
        xaxis_title="Strike Price K",
        yaxis_title="Time to Maturity T (years)",
        zaxis_title="Implied Volatility (NN)",
    ),
    template="plotly_dark",
    width=900,
    height=600,
)
fig.show()


In [45]:
def invert_iv(price, S0, K, T, r=R_RATE, q=Q_RATE, tol=1e-6, max_iter=100):
    if T <= 0 or price <= 0:
        return float("nan")
    sigma = 0.3
    sqrt_T = math.sqrt(T)
    for _ in range(max_iter):
        model_price = bs_call_price(S0, K, T, r, q, sigma)
        diff = model_price - price
        if abs(diff) < tol:
            return sigma
        d1 = (math.log(S0 / K) + (r - q + 0.5 * sigma * sigma) * T) / (max(sigma, 1e-8) * sqrt_T)
        vega = S0 * math.exp(-q * T) * sqrt_T * (1.0 / math.sqrt(2 * math.pi)) * math.exp(-0.5 * d1 * d1)
        if vega < 1e-8:
            break
        sigma -= diff / vega
        sigma = max(sigma, 1e-6)
    return sigma

sample_rows = df.sample(min(5, len(df)), random_state=42).copy()
sample_rows["iv_market_bs"] = sample_rows.apply(lambda row: invert_iv(row["C_mkt"], row["S0"], row["K"], row["T"]), axis=1)
sample_rows["price_from_nn_iv"] = sample_rows.apply(lambda row: bs_call_price(row["S0"], row["K"], row["T"], R_RATE, Q_RATE, row["iv_nn"]), axis=1)
display(sample_rows[["K", "T", "C_mkt", "iv_market_bs", "iv_nn", "price_from_nn_iv"]])


Unnamed: 0,K,T,C_mkt,iv_market_bs,iv_nn,price_from_nn_iv
506,583,0.04,96.99,1e-06,1.577486e-32,98.903772
1535,590,0.27,101.07,1e-06,1.764437e-32,143.049718
1905,643,0.46,66.27,1e-06,5.037779e-32,137.715278
930,663,0.1,23.25,1e-06,7.034024e-32,35.646602
2676,640,2.19,119.9,1e-06,4.84937e-32,406.124966
