In [1]:
# Neural network approximation of implied volatility surface
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import plotly.graph_objects as go
from plotly.subplots import make_subplots

torch.manual_seed(42)
np.random.seed(42)


In [2]:
CSV_PATH = "../options_SPY_calls.csv"
df = pd.read_csv(CSV_PATH)
required_cols = {"S0", "K", "T", "C_mkt"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

S0 = float(df["S0"].iloc[0])
df["T"] = (np.floor(df["T"] * 10) / 10).astype(float)
df = df.drop_duplicates(subset=["K", "T"]).reset_index(drop=True)
df = df[df["T"] > 0.0].reset_index(drop=True)
print(f"Loaded {CSV_PATH} with {len(df)} rows | Reference S0 = {S0:.4f}")
display(df.head())


Loaded ../options_SPY_calls.csv with 1294 rows | Reference S0 = 672.6500


Unnamed: 0,S0,K,C_mkt,T,type,iv
0,672.65,525,152.33,0.1,C,0.55
1,672.65,530,144.93,0.1,C,0.54
2,672.65,535,143.39,0.1,C,0.52
3,672.65,540,135.4,0.1,C,0.51
4,672.65,545,131.81,0.1,C,0.49


In [3]:
R_RATE = 0.4
Q_RATE = 0.0

def normal_cdf(x: float) -> float:
    return 0.5 * (1.0 + math.erf(x / math.sqrt(2.0)))

def bs_call_price(S0, K, T, r, q, vol):
    if T <= 0:
        return max(S0 * math.exp(-q * T) - K * math.exp(-r * T), 0.0)
    vol = max(vol, 1e-8)
    sqrt_T = math.sqrt(T)
    d1 = (math.log(S0 / K) + (r - q + 0.5 * vol * vol) * T) / (vol * sqrt_T)
    d2 = d1 - vol * sqrt_T
    discount_dom = math.exp(-r * T)
    discount_for = math.exp(-q * T)
    return S0 * discount_for * normal_cdf(d1) - K * discount_dom * normal_cdf(d2)


In [4]:
feature_matrix = np.column_stack([
    df["T"].to_numpy(dtype=np.float32),
    df["K"].to_numpy(dtype=np.float32),
    df["S0"].to_numpy(dtype=np.float32),
    np.full(len(df), R_RATE, dtype=np.float32),
])
X = torch.from_numpy(feature_matrix)
y_prices = torch.from_numpy(df["C_mkt"].to_numpy(dtype=np.float32)).unsqueeze(1)

dataset = TensorDataset(X, y_prices)
batch_size = min(128, len(dataset))
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
print(f"Training samples: {len(dataset)} | Batch size: {batch_size}")


Training samples: 1294 | Batch size: 128


In [5]:
class ImpliedVolNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Softplus(),
        )

    def forward(self, x):
        return self.net(x)

model = ImpliedVolNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(f"Using device: {device}")


Using device: cuda


In [6]:
def bs_call_price_tensor(S0, K, T, r, q, vol):
    eps = 1e-8
    vol_clamped = torch.clamp(vol, min=eps)
    T_clamped = torch.clamp(T, min=eps)
    S0_clamped = torch.clamp(S0, min=eps)
    K_clamped = torch.clamp(K, min=eps)
    sqrt_T = torch.sqrt(T_clamped)
    d1 = (torch.log(S0_clamped / K_clamped) + (r - q + 0.5 * vol_clamped ** 2) * T_clamped) / (vol_clamped * sqrt_T)
    d2 = d1 - vol_clamped * sqrt_T
    normal_cdf = lambda x: 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
    discount_dom = torch.exp(-r * T_clamped)
    discount_for = torch.exp(-q * T_clamped)
    return S0_clamped * discount_for * normal_cdf(d1) - K_clamped * discount_dom * normal_cdf(d2)

epochs = 300
model.train()
for epoch in range(1, epochs + 1):
    epoch_loss = 0.0
    for batch_X, batch_price in train_loader:
        batch_X = batch_X.to(device)
        batch_price = batch_price.to(device)
        optimizer.zero_grad()
        iv_pred = model(batch_X)
        T_batch = batch_X[:, 0:1]
        K_batch = batch_X[:, 1:2]
        S0_batch = batch_X[:, 2:3]
        r_batch = batch_X[:, 3:4]
        q_batch = torch.zeros_like(r_batch)
        C_pred = bs_call_price_tensor(S0_batch, K_batch, T_batch, r_batch, q_batch, iv_pred)
        mse = torch.mean((C_pred - batch_price) ** 2)
        rmse = torch.sqrt(mse + 1e-12)
        rmse.backward()
        optimizer.step()
        epoch_loss += rmse.item()
    epoch_rmse = epoch_loss / len(train_loader)
    if epoch % 50 == 0 or epoch == 1:
        print(f"Epoch {epoch:04d} | Training RMSE = {epoch_rmse:.6f}")

model.eval()
with torch.no_grad():
    full_X = X.to(device)
    full_prices = y_prices.to(device)
    iv_pred_full = model(full_X)
    q_full = torch.zeros_like(full_X[:, 0:1])
    C_pred_full = bs_call_price_tensor(full_X[:, 2:3], full_X[:, 1:2], full_X[:, 0:1], full_X[:, 3:4], q_full, iv_pred_full)
    final_rmse = torch.sqrt(torch.mean((C_pred_full - full_prices) ** 2)).item()

print(f"Final training RMSE on entire set: {final_rmse:.6f}")
df["iv_nn"] = iv_pred_full.cpu().numpy().ravel()
display(df[["K", "T", "C_mkt", "iv_nn"]].head())


Epoch 0001 | Training RMSE = 125.050407
Epoch 0050 | Training RMSE = 126.017339
Epoch 0100 | Training RMSE = 122.387817
Epoch 0150 | Training RMSE = 126.709369
Epoch 0200 | Training RMSE = 120.907985
Epoch 0250 | Training RMSE = 124.526894
Epoch 0300 | Training RMSE = 125.319553
Final training RMSE on entire set: 124.725708


Unnamed: 0,K,T,C_mkt,iv_nn
0,525,0.1,152.33,1.496157e-32
1,530,0.1,144.93,1.489915e-32
2,535,0.1,143.39,1.483698e-32
3,540,0.1,135.4,1.477497e-32
4,545,0.1,131.81,1.471355e-32


In [7]:
K_grid = np.arange(np.floor((S0 - 100.0) / 10) * 10, np.floor((S0 + 100.0) / 10) * 10 + 1, 1.0)
T_grid = np.arange(0.1, 2.0, 0.1)
KK, TT = np.meshgrid(K_grid, T_grid)

grid_features = np.column_stack([
    TT.ravel().astype(np.float32),
    KK.ravel().astype(np.float32),
    np.full(TT.size, S0, dtype=np.float32),
    np.full(TT.size, R_RATE, dtype=np.float32),
])

model.eval()
with torch.no_grad():
    iv_surface = model(torch.from_numpy(grid_features).to(device)).cpu().numpy().reshape(TT.shape)

IV_surface_NN = iv_surface
print(f"IV surface shape: {IV_surface_NN.shape}")


IV surface shape: (19, 201)


In [8]:
fig = go.Figure(data=[go.Surface(x=KK, y=TT, z=IV_surface_NN, opacity=0.85)])
fig.update_layout(
    title="Neural Network Implied Volatility Surface (inputs: T, K, S0, r=0.4)",
    scene=dict(
        xaxis_title="Strike Price K",
        yaxis_title="Time to Maturity T (years)",
        zaxis_title="Implied Volatility (NN)",
    ),
    template="plotly_dark",
    width=900,
    height=600,
)
fig.show()


In [9]:
def invert_iv(price, S0, K, T, r=R_RATE, q=Q_RATE, tol=1e-6, max_iter=100):
    if T <= 0 or price <= 0:
        return float("nan")
    sigma = 0.3
    sqrt_T = math.sqrt(T)
    for _ in range(max_iter):
        model_price = bs_call_price(S0, K, T, r, q, sigma)
        diff = model_price - price
        if abs(diff) < tol:
            return sigma
        d1 = (math.log(S0 / K) + (r - q + 0.5 * sigma * sigma) * T) / (max(sigma, 1e-8) * sqrt_T)
        vega = S0 * math.exp(-q * T) * sqrt_T * (1.0 / math.sqrt(2 * math.pi)) * math.exp(-0.5 * d1 * d1)
        if vega < 1e-8:
            break
        sigma -= diff / vega
        sigma = max(sigma, 1e-6)
    return sigma

sample_rows = df.sample(min(5, len(df)), random_state=42).copy()
sample_rows["iv_market_bs"] = sample_rows.apply(lambda row: invert_iv(row["C_mkt"], row["S0"], row["K"], row["T"]), axis=1)
sample_rows["price_from_nn_iv"] = sample_rows.apply(lambda row: bs_call_price(row["S0"], row["K"], row["T"], R_RATE, Q_RATE, row["iv_nn"]), axis=1)
display(sample_rows[["K", "T", "C_mkt", "iv_market_bs", "iv_nn", "price_from_nn_iv"]])


Unnamed: 0,K,T,C_mkt,iv_market_bs,iv_nn,price_from_nn_iv
351,710,0.2,6.28,1e-06,1.305266e-31,17.237394
486,549,0.3,101.58,1e-06,1.470749e-32,185.73068
599,698,0.4,23.16,1e-06,1.1481640000000002e-31,77.853635
442,710,0.3,12.85,1e-06,1.30687e-31,42.93649
270,629,0.2,62.54,1e-06,3.791446e-32,92.009818
