In [None]:
# imports
import pandas as pd
import numpy as np
import wandb

In [None]:
from data_handler import preprocessData, fitSurface

In [None]:
paths = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv",
    "../data/processed/pca/predicted_iv21.csv",
    "../data/processed/pca/predicted_iv22.csv"
]
df = pd.concat([pd.read_csv(path) for path in paths])
val_df = pd.read_csv("../data/processed/pca/predicted_iv23.csv")

In [None]:
from models.vae_master import IVSFeatureExtractor

In [None]:
extractor = IVSFeatureExtractor(
        hidden_dim=4096,
        latent_dim=16,
        beta=1.0,
        learning_rate=0.001
)
# 4096_64_1_001

In [None]:
processed_data = extractor.prepare_data(df)
print(f"Processed data shape: {processed_data.shape}")

In [None]:
extractor.train(processed_data, val_df, batch_size=256, n_epochs=1000)

In [None]:
features = extractor.extract_features(processed_data)

In [None]:
features.shape

In [None]:
df_features = pd.DataFrame(features, columns=[f'feature_{i}' for i in range(features.shape[1])])

In [None]:
# for each date in df, append the features

unique_dates = df['date'].unique()
len(unique_dates)
df_features['date'] = df['date'].unique()

In [None]:
df_features.head()

In [None]:
df_features.to_csv('../data/processed/vae/features_vae_iv16_22_16.csv', index=False)

## LSTM

In [None]:
from models.lstm import CustomLSTMCell, CustomLSTMModel, ModelManager, DatasetManager

In [None]:
feature_cols = [f'feature_{i}' for i in range(16)]

In [None]:
import pandas as pd
vae_path = '../data/processed/vae/features_vae_iv16_22_16.csv'
df = pd.read_csv(vae_path)

In [None]:
df.head()

In [None]:
vae_path = '../data/processed/vae/features_vae_iv16_22_16.csv'
dataset = DatasetManager(vae_path)
features, targets = dataset.make_train_target_pairs(feature_cols)
print('Features shape:', features.shape)
print('Targets shape:', targets.shape)

In [None]:
#Split the dataset into training and validation sets
# split = int(0.8 * len(features))
# train_features, val_features = features[:split], features[split:]
# train_targets, val_targets = targets[:split], targets[split:]

# no split
train_features, train_targets = features, targets

In [None]:
print('Initializing model...')
model_path = './ckpts/lstm_vae_1622_512.pth'
model = ModelManager(input_dim=48, hidden_dim=512, output_dim=16, model_path=model_path, learning_rate=0.01, project='LSTM_VAE')
model.train(train_features, train_targets, epochs=50000)

#val_loader = DataLoader(TensorDataset(val_features, val_targets), batch_size=1, shuffle=False)
#model.validate(val_loader)

model.save_model()

## DNN

In [1]:
import pandas as pd
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from models.lstm import CustomLSTMCell, CustomLSTMModel, ModelManager, DatasetManager

In [2]:
lstm_model_path = "./ckpts/lstm_vae_1622_512.pth"
#lstm_model_path = './ckpts/test_bilstm256.pth'
lstm_model = CustomLSTMModel(input_dim=48, hidden_dim=512, output_dim=16)
lstm_model.load_model(model_path=lstm_model_path)

Model loaded from ./ckpts/lstm_vae_1622_512.pth


  self.load_state_dict(torch.load(model_path))


In [3]:
features = pd.read_csv('../data/processed/vae/features_vae_iv16_22_16.csv')
features.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,date
0,0.01349,0.016726,0.001015,0.006635,0.355671,0.26843,0.000592,-0.002008,-0.01745,0.014705,-0.000923,0.006342,0.012619,-0.003463,-0.007455,0.00114,2016-01-04
1,0.016278,0.016848,0.01175,0.01699,0.366535,0.197472,0.000607,-0.000559,-0.015649,0.017074,0.003308,0.009717,-0.080449,-0.001823,-0.003148,-0.00442,2016-01-05
2,0.022743,0.018066,0.015866,0.019294,0.463473,0.262002,-0.003702,0.005221,-0.015472,0.018113,-0.003555,0.007367,-0.235385,-0.001429,-0.010315,-0.0025,2016-01-06
3,-0.002543,0.029971,-0.040378,-0.018382,0.127714,0.492194,0.015958,-0.003395,0.023379,0.03776,-0.003421,-0.008119,0.672027,0.011833,0.008432,-0.028344,2016-01-07
4,0.001311,0.02665,-0.023601,-0.021134,0.192549,0.603569,0.00492,0.000244,0.019321,0.034228,-0.00384,-0.013453,0.603969,0.013828,-0.002908,-0.01929,2016-01-08


In [4]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
feature_cols = [f'feature_{i}' for i in range(16)]
for i in range(22,len(features)):
    ma1 = torch.tensor(features.iloc[i-1][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    ma2 = torch.tensor(features.iloc[i-2][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    ma3 = torch.tensor(features.iloc[i-3][feature_cols].astype(float).values, dtype=torch.float32).to(device)
    feature = torch.cat((ma1, ma2, ma3), dim=0).to(device)
    out = lstm_model.predict(feature)
    for j in range(16):
        features.at[i, f'feature_{j}'] = out[0][j].item()

In [6]:
features = features[22:].reset_index(drop=True)
features.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,date
0,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158,2016-02-04
1,-0.030909,-0.083262,-0.090908,-0.090189,0.114601,1.687865,0.11573,0.030573,-0.022653,0.055866,-0.122147,0.042756,1.085049,-0.040849,0.012105,0.042296,2016-02-05
2,-0.518011,-1.111243,-1.099482,-0.40283,-0.3982,8.175017,-0.050984,0.45056,-0.083833,0.556023,-0.937338,-0.606066,6.774503,0.271807,-0.161398,1.075073,2016-02-08
3,-15.487729,-19.178459,-1.488301,-23.767061,40.833786,85.244415,50.424183,-16.558826,-30.011923,7.000886,-40.644005,20.819012,28.950424,-23.403748,-9.569206,5.922505,2016-02-09
4,3.130387,-2.605597,-18.109007,21.915497,11.640251,40.706768,-5.848877,9.805226,25.235212,45.495163,-20.516996,-46.011906,10.775976,-16.367229,5.316827,-10.334608,2016-02-10


In [7]:
df_iv_path_list = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv",
    "../data/processed/pca/predicted_iv21.csv",
    "../data/processed/pca/predicted_iv22.csv"
]

In [8]:
merged_df = pd.DataFrame()
for path in df_iv_path_list:
    df = pd.read_csv(path)
    merged_df = pd.concat([merged_df, df], axis=0)

merged_df = merged_df.reset_index(drop=True)
print(len(merged_df))
merged_df.head()

270886


Unnamed: 0,date,tau,m,IV
0,2016-01-04,0.027397,-0.510826,0.326153
1,2016-01-04,0.027397,-0.223144,0.291228
2,2016-01-04,0.027397,-0.105361,0.286565
3,2016-01-04,0.027397,-0.051293,0.286299
4,2016-01-04,0.027397,-0.025318,0.286591


In [9]:
df = pd.merge(merged_df, features, on='date')
df.head()

Unnamed: 0,date,tau,m,IV,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15
0,2016-02-04,0.027397,-0.510826,0.391197,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158
1,2016-02-04,0.027397,-0.223144,0.329391,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158
2,2016-02-04,0.027397,-0.105361,0.312298,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158
3,2016-02-04,0.027397,-0.051293,0.306049,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158
4,2016-02-04,0.027397,-0.025318,0.303404,-0.008144,-0.004826,0.000819,-0.013507,0.615582,0.646299,0.006843,0.053013,-0.001125,0.027941,-0.031726,-0.03257,0.37632,0.027101,0.008045,0.028158


In [10]:
from models.dnn import IVDataset, IVSDNN, train_model, large_moneyness_penalty, butterfly_arbitrage_penalty, calendar_spread_penalty, safe_divide

In [11]:
dataset = IVDataset(df, feature_cols)


Tensor shapes:
Features: torch.Size([267498, 16])
m: torch.Size([267498, 1])
tau: torch.Size([267498, 1])
iv: torch.Size([267498, 1])

Checking for NaN values:
Features NaN: False
m NaN: False
tau NaN: False
iv NaN: False


In [12]:
print(dataset.get_input_size())

18


In [13]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)
dnn = IVSDNN(input_size=dataset.get_input_size(), hidden_size=512)

In [14]:
lambda_penalty=1
num_epochs=500

In [15]:
import wandb
wandb.init(project="ivs-dnn")
train_model(dnn, train_loader, 300, 0.001, 1, wandb)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mabhigyanshanker[0m ([33mabx-group[0m). Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1 || Loss = 0.313731 || Penalty = 0.248671 || Calendar Penalty = 0.315644 || Butterfly Penalty = 0.214365 || Large Moneyness Penalty = 0.713345 || MAPE = 99.901703
Epoch 2 || Loss = 0.065056 || Penalty = 0.000011 || Calendar Penalty = 0.000026 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000 || MAPE = 100.000000
Epoch 3 || Loss = 0.065056 || Penalty = 0.000011 || Calendar Penalty = 0.000018 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000 || MAPE = 100.000000
Epoch 4 || Loss = 0.065056 || Penalty = 0.000011 || Calendar Penalty = 0.000013 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000 || MAPE = 100.000000
Epoch 5 || Loss = 0.065055 || Penalty = 0.000011 || Calendar Penalty = 0.000011 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000 || MAPE = 100.000000
Epoch 6 || Loss = 0.065055 || Penalty = 0.000011 || Calendar Penalty = 0.000011 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000

KeyboardInterrupt: 

In [None]:
def verify_data_ranges(train_loader):
    m_min, m_max = float('inf'), -float('inf')
    tau_min, tau_max = float('inf'), -float('inf')
    iv_min, iv_max = float('inf'), -float('inf')
    
    for batch_inputs, batch_targets in train_loader:
        m = batch_inputs[:, -2]
        tau = batch_inputs[:, -1]
        
        m_min = min(m_min, m.min().item())
        m_max = max(m_max, m.max().item())
        tau_min = min(tau_min, tau.min().item())
        tau_max = max(tau_max, tau.max().item())
        iv_min = min(iv_min, batch_targets.min().item())
        iv_max = max(iv_max, batch_targets.max().item())
    
    print(f"Data ranges:")
    print(f"Moneyness: [{m_min:.3f}, {m_max:.3f}]")
    print(f"Tau: [{tau_min:.3f}, {tau_max:.3f}]")
    print(f"IV: [{iv_min:.3f}, {iv_max:.3f}]")

verify_data_ranges(train_loader)