In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os

# ---------------------
# Load and preprocess data
# ---------------------
data = pd.read_csv('data.csv')  # Replace with your actual data file
assert data.shape[1] == 21, "Data must have 21 features."

X = data.values.astype(np.float32)

# Train/validation split
X_train, X_val = train_test_split(X, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert to torch tensors
train_data = torch.tensor(X_train, dtype=torch.float32)
val_data = torch.tensor(X_val, dtype=torch.float32)

train_dataset = TensorDataset(train_data)
val_dataset = TensorDataset(val_data)

# Hyperparameters
input_dim = 21
latent_dim = 4     # dimension of the latent space
hidden_dim = 64    # hidden dimension for encoder/decoder
batch_size = 128
lr = 1e-3
epochs = 100
patience = 10

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# ---------------------
# Define the VAE model with Gaussian decoder
# ---------------------
class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(VAE, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )
        
        self.mu_layer = nn.Linear(hidden_dim, latent_dim)
        self.logvar_layer = nn.Linear(hidden_dim, latent_dim)
        
        # Decoder: outputs mean and logvar for each input dimension
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            # Output: mean and logvar for each dimension => 2 * input_dim
            nn.Linear(hidden_dim, 2 * input_dim)
        )
        
    def encode(self, x):
        h = self.encoder(x)
        mu = self.mu_layer(h)
        logvar = self.logvar_layer(h)
        return mu, logvar
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        params = self.decoder(z)
        # split into mean and logvar
        mean = params[:, :input_dim]
        logvar = params[:, input_dim:]
        return mean, logvar
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        mean, logvar_out = self.decode(z)
        return mean, logvar_out, mu, logvar

# ---------------------
# Loss function (Gaussian decoder)
# ---------------------
def gaussian_nll(x, mean, logvar):
    # Negative log likelihood of x under Gaussian(mean, var=exp(logvar))
    # For each feature dimension:
    # NLL = 0.5*(log(2*pi) + logvar + (x-mean)^2/exp(logvar))
    # sum over features
    D = x.size(1)
    const = D * 0.5 * np.log(2 * np.pi)
    logvar_sum = 0.5 * torch.sum(logvar, dim=1)
    inv_var = torch.exp(-logvar)
    sq_error = (x - mean)**2
    mse_term = 0.5 * torch.sum(sq_error * inv_var, dim=1)
    nll = const + logvar_sum + mse_term
    return torch.sum(nll)  # sum over batch

def loss_function(mean, logvar_out, x, mu, logvar_enc):
    # Gaussian decoder NLL
    recon_loss = gaussian_nll(x, mean, logvar_out)
    # KL Divergence
    kld = -0.5 * torch.sum(1 + logvar_enc - mu.pow(2) - logvar_enc.exp())
    return recon_loss + kld

# ---------------------
# Training setup
# ---------------------
model = VAE(input_dim, hidden_dim, latent_dim)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

# Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5, verbose=True)

# Early stopping
best_val_loss = np.inf
waiting = 0
ckpt_path = 'vae_gaussian_decoder_checkpoint.pt'

# ---------------------
# Training loop
# ---------------------
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for batch in train_loader:
        x = batch[0].to(device)
        optimizer.zero_grad()
        mean, logvar_out, mu, logvar_enc = model(x)
        loss = loss_function(mean, logvar_out, x, mu, logvar_enc)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader.dataset)
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            x = batch[0].to(device)
            mean, logvar_out, mu, logvar_enc = model(x)
            loss = loss_function(mean, logvar_out, x, mu, logvar_enc)
            val_loss += loss.item()
    val_loss /= len(val_loader.dataset)
    
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        waiting = 0
        torch.save(model.state_dict(), ckpt_path)
    else:
        waiting += 1
        if waiting > patience:
            print("Early stopping triggered.")
            break

# Load best model
model.load_state_dict(torch.load(ckpt_path))
model.eval()

# ---------------------
# Generate samples from VAE
# ---------------------
def sample_from_vae(model, num_samples=1000):
    z = torch.randn(num_samples, latent_dim).to(device)
    with torch.no_grad():
        mean, logvar_out = model.decode(z)
    # Sample from N(mean, exp(logvar_out))
    std = torch.exp(0.5 * logvar_out)
    eps = torch.randn_like(std)
    samples = mean + eps * std
    return samples.cpu().numpy()

gen_samples = sample_from_vae(model, num_samples=len(data))
gen_samples_orig = scaler.inverse_transform(gen_samples)

real_data = data.values

# ---------------------
# Visualization
# ---------------------
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Plot expiry distribution
sns.kdeplot(real_data[:,0], label='Real', ax=axes[0], shade=True)
sns.kdeplot(gen_samples_orig[:,0], label='Generated', ax=axes[0], shade=True)
axes[0].set_title('Expiry Distribution')
axes[0].legend()

# Plot forward distribution
sns.kdeplot(real_data[:,2], label='Real', ax=axes[1], shade=True)
sns.kdeplot(gen_samples_orig[:,2], label='Generated', ax=axes[1], shade=True)
axes[1].set_title('Forward Distribution')
axes[1].legend()

plt.tight_layout()
plt.show()

# Plot one strike and one vol dimension
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
strike_index = 3  # strikes start at index 3
vol_index = 3 + 9 # after strikes come the vols

sns.kdeplot(real_data[:, strike_index], label='Real Strike', ax=axes[0], shade=True)
sns.kdeplot(gen_samples_orig[:, strike_index], label='Generated Strike', ax=axes[0], shade=True)
axes[0].set_title(f'Strike {strike_index - 2} Distribution')
axes[0].legend()

sns.kdeplot(real_data[:, vol_index], label='Real Vol', ax=axes[1], shade=True)
sns.kdeplot(gen_samples_orig[:, vol_index], label='Generated Vol', ax=axes[1], shade=True)
axes[1].set_title(f'Vol {vol_index - 11} Distribution')
axes[1].legend()

plt.tight_layout()
plt.show()
