In [None]:
# imports
import pandas as pd
import numpy as np
import wandb

In [None]:
from data_handler import preprocessData, fitSurface

In [None]:
paths = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv",
    "../data/processed/pca/predicted_iv21.csv",
    "../data/processed/pca/predicted_iv22.csv"
]
df = pd.concat([pd.read_csv(path) for path in paths])
val_df = pd.read_csv("../data/processed/pca/predicted_iv23.csv")

In [None]:
from models.vae_master import IVSFeatureExtractor

In [None]:
extractor = IVSFeatureExtractor(
        hidden_dim=4096,
        latent_dim=16,
        beta=1.0,
        learning_rate=0.001
)
# 4096_64_1_001

In [None]:
processed_data = extractor.prepare_data(df)
print(f"Processed data shape: {processed_data.shape}")

In [None]:
extractor.train(processed_data, val_df, batch_size=256, n_epochs=1000)

In [None]:
features = extractor.extract_features(processed_data)

In [None]:
features.shape

In [None]:
df_features = pd.DataFrame(features, columns=[f'feature_{i}' for i in range(features.shape[1])])

In [None]:
# for each date in df, append the features

unique_dates = df['date'].unique()
len(unique_dates)
df_features['date'] = df['date'].unique()

In [None]:
df_features.head()

In [None]:
df_features.to_csv('../data/processed/vae/features_vae_iv16_22_16.csv', index=False)

## LSTM

In [None]:
from models.lstm import CustomLSTMCell, CustomLSTMModel, ModelManager, DatasetManager

In [None]:
feature_cols = [f'feature_{i}' for i in range(16)]

In [None]:
import pandas as pd
vae_path = '../data/processed/vae/features_vae_iv16_22_16.csv'
df = pd.read_csv(vae_path)

In [None]:
df.head()

In [None]:
vae_path = '../data/processed/vae/features_vae_iv16_22_16.csv'
dataset = DatasetManager(vae_path)
features, targets = dataset.make_train_target_pairs(feature_cols)
print('Features shape:', features.shape)
print('Targets shape:', targets.shape)

In [None]:
#Split the dataset into training and validation sets
# split = int(0.8 * len(features))
# train_features, val_features = features[:split], features[split:]
# train_targets, val_targets = targets[:split], targets[split:]

# no split
train_features, train_targets = features, targets

In [None]:
print('Initializing model...')
model_path = './ckpts/lstm_vae_1622_512.pth'
model = ModelManager(input_dim=48, hidden_dim=512, output_dim=16, model_path=model_path, learning_rate=0.01, project='LSTM_VAE')
model.train(train_features, train_targets, epochs=50000)

#val_loader = DataLoader(TensorDataset(val_features, val_targets), batch_size=1, shuffle=False)
#model.validate(val_loader)

model.save_model()

## DNN

In [None]:
import pandas as pd
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from models.lstm import CustomLSTMCell, CustomLSTMModel, ModelManager, DatasetManager

In [None]:
lstm_model_path = "./ckpts/lstm_vae_1622_512.pth"
#lstm_model_path = './ckpts/test_bilstm256.pth'
lstm_model = CustomLSTMModel(input_dim=48, hidden_dim=512, output_dim=16)
lstm_model.load_model(model_path=lstm_model_path)

In [None]:
features = pd.read_csv('../data/processed/vae/features_vae_iv16_22_16.csv')
features.head()

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
feature_cols = [f'feature_{i}' for i in range(16)]
for i in range(22,len(features)):
    ma1 = torch.tensor(features.iloc[i-1][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    ma2 = torch.tensor(features.iloc[i-2][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    ma3 = torch.tensor(features.iloc[i-3][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    feature = torch.cat((ma1, ma2, ma3), dim=0).to(device)
    out = lstm_model.predict(feature)
    for j in range(16):
        features.at[i, f'feature_{j}'] = out[0][j].item()

In [None]:
features = features[22:].reset_index(drop=True)
features.head()

In [None]:
df_iv_path_list = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv",
    "../data/processed/pca/predicted_iv21.csv",
    "../data/processed/pca/predicted_iv22.csv"
]

In [None]:
merged_df = pd.DataFrame()
for path in df_iv_path_list:
    df = pd.read_csv(path)
    merged_df = pd.concat([merged_df, df], axis=0)

merged_df = merged_df.reset_index(drop=True)
print(len(merged_df))
merged_df.head()

In [None]:
df = pd.merge(merged_df, features, on='date')
df.head()

In [None]:
df = df[:30000]

In [None]:
from models.dnn import IVDataset, IVSDNN, train_model, large_moneyness_penalty, butterfly_arbitrage_penalty, calendar_spread_penalty, safe_divide

In [None]:
dataset = IVDataset(df, feature_cols)

In [None]:
print(dataset.get_input_size())

In [None]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
dnn = IVSDNN(input_size=dataset.get_input_size(), hidden_size=512)

In [None]:
lambda_penalty=1
num_epochs=500

In [None]:
import wandb
wandb.init(project="vae-dnn")
train_model(dnn, train_loader, 100, 0.001, 1, wandb)

In [None]:
def verify_data_ranges(train_loader):
    m_min, m_max = float('inf'), -float('inf')
    tau_min, tau_max = float('inf'), -float('inf')
    iv_min, iv_max = float('inf'), -float('inf')
    
    for batch_inputs, batch_targets in train_loader:
        m = batch_inputs[:, -2]
        tau = batch_inputs[:, -1]
        
        m_min = min(m_min, m.min().item())
        m_max = max(m_max, m.max().item())
        tau_min = min(tau_min, tau.min().item())
        tau_max = max(tau_max, tau.max().item())
        iv_min = min(iv_min, batch_targets.min().item())
        iv_max = max(iv_max, batch_targets.max().item())
    
    print(f"Data ranges:")
    print(f"Moneyness: [{m_min:.3f}, {m_max:.3f}]")
    print(f"Tau: [{tau_min:.3f}, {tau_max:.3f}]")
    print(f"IV: [{iv_min:.3f}, {iv_max:.3f}]")

verify_data_ranges(train_loader)