In [1]:
"""
QML Model — Level 2: Swaption Surface Missing Data Reconstruction
Architecture: Photonic Quantum Circuit with MerLin CircuitBuilder

Problem:
    The test set will have certain maturity rows hidden (from our EDA: 1yr and 1.5yr).
    The training data is fully observed — so we artificially mask those rows during
    training to teach the model to reconstruct them from the remaining surface.

Pipeline:
    1. Load Level 2 training data (fully observed)
    2. Separate into:
         - Input  X: all columns EXCEPT the target maturities (196 values)
         - Target Y: the target maturity columns (28 values = 14 tenors × 2 maturities)
    3. Preprocess: MinMaxScaler → PCA (196 → 16) → MinMaxScaler
    4. Quantum circuit (CircuitBuilder, 16 modes, 3 photons):
         - entangling layer (trainable)
         - angle encoding (scale=π)
         - rotations (trainable)
         - superpositions (trainable)
    5. MeasurementStrategy.mode_expectations() → 16 compact features
       (one per mode — ideal for small regression targets, no LexGrouping needed)
    6. Classical readout → 28 reconstructed volatilities

Key difference from Level 1:
    - No LOOKBACK / time series component — purely spatial interpolation
    - Smaller output (28 vs 224 values)
    - mode_expectations() instead of probs() — more compact, faster
    - Input is a subset of the surface (not a time window)
"""


'\nQML Model — Level 2: Swaption Surface Missing Data Reconstruction\nArchitecture: Photonic Quantum Circuit with MerLin CircuitBuilder\n\nProblem:\n    The test set will have certain maturity rows hidden (from our EDA: 1yr and 1.5yr).\n    The training data is fully observed — so we artificially mask those rows during\n    training to teach the model to reconstruct them from the remaining surface.\n\nPipeline:\n    1. Load Level 2 training data (fully observed)\n    2. Separate into:\n         - Input  X: all columns EXCEPT the target maturities (196 values)\n         - Target Y: the target maturity columns (28 values = 14 tenors × 2 maturities)\n    3. Preprocess: MinMaxScaler → PCA (196 → 16) → MinMaxScaler\n    4. Quantum circuit (CircuitBuilder, 16 modes, 3 photons):\n         - entangling layer (trainable)\n         - angle encoding (scale=π)\n         - rotations (trainable)\n         - superpositions (trainable)\n    5. MeasurementStrategy.mode_expectations() → 16 compact featu

In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler 
from sklearn.metrics import mean_squared_error
from datasets import load_dataset

import merlin as ML
from merlin import MeasurementStrategy
from merlin.builder import CircuitBuilder


In [3]:
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────

# Target maturities to reconstruct (from EDA: these are the hidden ones)
TARGET_MATURITIES = [1.0, 1.5]

N_PCA_COMPONENTS  = 16    # PCA compress observed features → quantum-compatible size
N_MODES           = 16    # Quantum circuit modes (≤ 20 QPU hard limit)
N_PHOTONS         = 3     # Fewer photons than Level 1: smaller task, faster simulation
TRAIN_SPLIT       = 0.85
EPOCHS            = 100   # More epochs: smaller dataset (489 rows) and simpler task
LR                = 5e-4
BATCH_SIZE        = 16
DEVICE            = torch.device("cpu")


In [4]:
# ─────────────────────────────────────────────
# 1. LOAD DATA
# ─────────────────────────────────────────────

print("Loading Level 2 dataset...")
ds = load_dataset(
    "Quandela/Challenge_Swaptions",
    data_files="level-2_Missing_data_prediction/train_level2.csv",
    split="train",
)
df = ds.to_pandas()
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)
df = df.sort_values("Date").reset_index(drop=True)

all_feat_cols = [c for c in df.columns if c != "Date"]
print(f"Total columns: {len(all_feat_cols)}")


Loading Level 2 dataset...


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


Total columns: 224


In [5]:
# ─────────────────────────────────────────────
# 2. SPLIT COLUMNS: OBSERVED vs TARGET
# ─────────────────────────────────────────────
#
# Target columns = maturities we need to reconstruct (1yr and 1.5yr)
# Input columns  = everything else (used to predict the targets)

target_cols = [
    c for c in all_feat_cols
    if float(c.split("Maturity : ")[1]) in TARGET_MATURITIES
]
input_cols = [c for c in all_feat_cols if c not in target_cols]

print(f"\nInput columns  (observed surface): {len(input_cols)}")
print(f"Target columns (to reconstruct)  : {len(target_cols)}")
print(f"Target maturities: {TARGET_MATURITIES}")
print(f"Tenors covered   : {len(target_cols) // len(TARGET_MATURITIES)} "
      f"({len(target_cols)} total target cells)")



Input columns  (observed surface): 196
Target columns (to reconstruct)  : 28
Target maturities: [1.0, 1.5]
Tenors covered   : 14 (28 total target cells)


In [18]:
# ─────────────────────────────────────────────
# 3. PREPROCESS
# ─────────────────────────────────────────────

X_raw = df[input_cols].values.astype(np.float32)   # (489, 196)
Y_raw = df[target_cols].values.astype(np.float32)  # (489, 28)

# Scale inputs to [0,1]
x_scaler = MinMaxScaler()
X_scaled  = x_scaler.fit_transform(X_raw).astype(np.float32)

# Scale targets to [0,1] — stored separately for inverse transform at eval time
y_scaler = StandardScaler ()
Y_scaled  = y_scaler.fit_transform(Y_raw).astype(np.float32)

# PCA: 196 → 16 (to fit quantum mode limit)
pca = PCA(n_components=N_PCA_COMPONENTS)
X_pca = pca.fit_transform(X_scaled).astype(np.float32)

# Re-scale PCA outputs to [0,1] for stable angle encoding
pca_scaler = MinMaxScaler()
X_pca = pca_scaler.fit_transform(X_pca).astype(np.float32)

print(f"\nPCA explained variance: {pca.explained_variance_ratio_.sum()*100:.1f}%")
print(f"X after PCA: {X_pca.shape}")
print(f"Y shape    : {Y_scaled.shape}")



PCA explained variance: 100.0%
X after PCA: (489, 16)
Y shape    : (489, 28)


In [19]:
# ─────────────────────────────────────────────
# 4. TRAIN / VAL SPLIT  (chronological)
# ─────────────────────────────────────────────

split   = int(len(X_pca) * TRAIN_SPLIT)
X_train = torch.tensor(X_pca[:split],    device=DEVICE)
Y_train = torch.tensor(Y_scaled[:split], device=DEVICE)
X_val   = torch.tensor(X_pca[split:],    device=DEVICE)
Y_val   = torch.tensor(Y_scaled[split:], device=DEVICE)

train_loader = DataLoader(
    TensorDataset(X_train, Y_train), batch_size=BATCH_SIZE, shuffle=False
)

print(f"\nTrain: {len(X_train)} samples | Val: {len(X_val)} samples")



Train: 415 samples | Val: 74 samples


In [20]:
builder = CircuitBuilder(n_modes=N_MODES)
builder.add_entangling_layer(trainable=True, name="U1")
builder.add_angle_encoding(
    modes=list(range(N_MODES)),
    name="input",
    scale=np.pi,
)
builder.add_rotations(trainable=True, name="theta")
builder.add_superpositions(depth=2, trainable=True)   # depth=2 for richer interference

quantum_core = ML.QuantumLayer(
    input_size=N_MODES,
    builder=builder,
    n_photons=N_PHOTONS,
    measurement_strategy=MeasurementStrategy.mode_expectations(),
    # mode_expectations: output_size = n_modes = 16
    # One compact value per mode — perfect bridge to our 28-output readout
)

print(f"\nQuantum layer output size (mode expectations): {quantum_core.output_size}")



Quantum layer output size (mode expectations): 16


In [21]:
class QRCMissingData(nn.Module):
    def __init__(self, output_size: int):
        super().__init__()

        # Quantum feature extraction — no pre-compression needed here
        # PCA already outputs exactly N_MODES=16 features
        self.quantum = quantum_core

        # Classical readout: 16 quantum features → 28 missing values
        # Smaller network than Level 1 (task is simpler: 28 outputs vs 224)
        self.readout = nn.Sequential(
            nn.Linear(quantum_core.output_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p=0.3),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p=0.3),

            nn.Linear(64, output_size),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.quantum(x)    # (B, 16) → (B, 16) mode expectations
        return self.readout(x) # (B, 16) → (B, 28)


model = QRCMissingData(output_size=len(target_cols)).to(DEVICE)

total_params     = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal params     : {total_params:,}")
print(f"Trainable params : {trainable_params:,}")



Total params     : 12,952
Trainable params : 12,952


In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=10, 
)
loss_fn = nn.MSELoss()

best_val_loss = float("inf")
best_state    = None

print("\nTraining...")
for epoch in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = loss_fn(pred, yb)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * len(xb)

    epoch_loss /= len(X_train)

    model.eval()
    with torch.no_grad():
        val_loss = loss_fn(model(X_val), Y_val).item()

    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state = {k: v.clone() for k, v in model.state_dict().items()}

    
    print(f"  Epoch {epoch:3d}/{EPOCHS} | "
              f"train MSE: {epoch_loss:.6f} | "
              f"val MSE: {val_loss:.6f} | "
              f"best val: {best_val_loss:.6f}")

model.load_state_dict(best_state)
print(f"\nRestored best model (val MSE: {best_val_loss:.6f})")



Training...
  Epoch   1/100 | train MSE: 1.287937 | val MSE: 0.919370 | best val: 0.919370
  Epoch   2/100 | train MSE: 1.177521 | val MSE: 0.952854 | best val: 0.919370
  Epoch   3/100 | train MSE: 1.131689 | val MSE: 1.024751 | best val: 0.919370
  Epoch   4/100 | train MSE: 1.115083 | val MSE: 1.074739 | best val: 0.919370
  Epoch   5/100 | train MSE: 1.102538 | val MSE: 1.111048 | best val: 0.919370
  Epoch   6/100 | train MSE: 1.101274 | val MSE: 1.120526 | best val: 0.919370
  Epoch   7/100 | train MSE: 1.076132 | val MSE: 1.131563 | best val: 0.919370
  Epoch   8/100 | train MSE: 1.078146 | val MSE: 1.156233 | best val: 0.919370
  Epoch   9/100 | train MSE: 1.073286 | val MSE: 1.147725 | best val: 0.919370
  Epoch  10/100 | train MSE: 1.061164 | val MSE: 1.155216 | best val: 0.919370
  Epoch  11/100 | train MSE: 1.055896 | val MSE: 1.157140 | best val: 0.919370
  Epoch  12/100 | train MSE: 1.050262 | val MSE: 1.161992 | best val: 0.919370
  Epoch  13/100 | train MSE: 1.048559 |

In [23]:
# ─────────────────────────────────────────────
# 8. EVALUATE
# ─────────────────────────────────────────────

model.eval()
with torch.no_grad():
    val_pred_np = model(X_val).numpy()

val_true_np = Y_val.numpy()

# Inverse transform back to original volatility scale
val_pred_original = y_scaler.inverse_transform(val_pred_np)
val_true_original = y_scaler.inverse_transform(val_true_np)

rmse = np.sqrt(mean_squared_error(val_true_original, val_pred_original))
mae  = np.mean(np.abs(val_true_original - val_pred_original))

print(f"\n{'='*55}")
print(f"VALIDATION RESULTS (original volatility scale)")
print(f"{'='*55}")
print(f"  Overall RMSE : {rmse:.6f}")
print(f"  Overall MAE  : {mae:.6f}")
print(f"  (Volatility range ≈ 0.02 – 0.45)")

# Per-maturity breakdown
print(f"\n  Per-maturity breakdown:")
tenors_list = sorted(set(
    int(c.split("Tenor : ")[1].split(";")[0]) for c in target_cols
))
for mat in TARGET_MATURITIES:
    mat_cols_idx = [
        i for i, c in enumerate(target_cols)
        if float(c.split("Maturity : ")[1]) == mat
    ]
    mat_pred = val_pred_original[:, mat_cols_idx]
    mat_true = val_true_original[:, mat_cols_idx]
    mat_rmse = np.sqrt(mean_squared_error(mat_true, mat_pred))
    mat_mae  = np.mean(np.abs(mat_true - mat_pred))
    print(f"    Maturity {mat}yr → RMSE: {mat_rmse:.6f} | MAE: {mat_mae:.6f}")



VALIDATION RESULTS (original volatility scale)
  Overall RMSE : 0.017870
  Overall MAE  : 0.013245
  (Volatility range ≈ 0.02 – 0.45)

  Per-maturity breakdown:
    Maturity 1.0yr → RMSE: 0.016318 | MAE: 0.012519
    Maturity 1.5yr → RMSE: 0.019298 | MAE: 0.013971


In [26]:
# ─────────────────────────────────────────────
# 9. RECONSTRUCT A FULL ROW  (ready for test set)
# ─────────────────────────────────────────────
#
# At test time: you receive a row with 1yr and 1.5yr columns missing.
# Feed the observed columns through the pipeline to get predictions.

sample_row_raw    = X_raw[-1:]                                          # (1, 196)
sample_row_scaled = x_scaler.transform(sample_row_raw)
sample_row_pca    = pca.transform(sample_row_scaled)
sample_row_pca    = pca_scaler.transform(sample_row_pca).astype(np.float32)
sample_tensor     = torch.tensor(sample_row_pca, device=DEVICE)

model.eval()
with torch.no_grad():
    pred_scaled = model(sample_tensor).numpy()

pred_original = y_scaler.inverse_transform(pred_scaled)

print(f"\nExample reconstruction (last training row):")
for i, col in enumerate(target_cols):
    short = col.replace("Tenor : ", "T").replace("; Maturity : ", "/M")
    print(f"  {short:<12} → predicted: {pred_original[0, i]:.6f} "
          f"| true: {Y_raw[-1, i]:.6f}")
print(f"  ... (showing 5 of {len(target_cols)})")



Example reconstruction (last training row):
  T1/M1        → predicted: 0.158804 | true: 0.132854
  T2/M1        → predicted: 0.170556 | true: 0.135154
  T3/M1        → predicted: 0.165560 | true: 0.135435
  T4/M1        → predicted: 0.162083 | true: 0.132703
  T5/M1        → predicted: 0.169149 | true: 0.132891
  T6/M1        → predicted: 0.155651 | true: 0.127393
  T7/M1        → predicted: 0.153856 | true: 0.127258
  T8/M1        → predicted: 0.146991 | true: 0.124846
  T9/M1        → predicted: 0.144825 | true: 0.123276
  T10/M1       → predicted: 0.148325 | true: 0.124676
  T15/M1       → predicted: 0.144887 | true: 0.116959
  T20/M1       → predicted: 0.135959 | true: 0.116219
  T25/M1       → predicted: 0.132698 | true: 0.115657
  T30/M1       → predicted: 0.134548 | true: 0.115668
  T1/M1.5      → predicted: 0.208766 | true: 0.165595
  T2/M1.5      → predicted: 0.207043 | true: 0.164118
  T3/M1.5      → predicted: 0.204665 | true: 0.161617
  T4/M1.5      → predicted: 0.214113 

In [15]:
torch.save({
    "model_state" : model.state_dict(),
    "pca"         : pca,
    "pca_scaler"  : pca_scaler,
    "x_scaler"    : x_scaler,
    "y_scaler"    : y_scaler,
    "input_cols"  : input_cols,
    "target_cols" : target_cols,
    "config": {
        "TARGET_MATURITIES" : TARGET_MATURITIES,
        "N_PCA_COMPONENTS"  : N_PCA_COMPONENTS,
        "N_MODES"           : N_MODES,
        "N_PHOTONS"         : N_PHOTONS,
    }
}, "qrc_level2_model.pt")

print("\nModel saved → qrc_level2_model.pt")
print("Done!")







Model saved → qrc_level2_model.pt
Done!
