# Bi-LSTM for Feature Prediction

In [None]:
# imports
import pandas as pd
import numpy as np
import wandb

In [None]:
from models.bilstm import BiLSTMDatasetManager, BiLSTMModelManager

In [None]:
data_path = "../data/processed/features_pca_iv16-20.csv"

In [None]:
dataset = BiLSTMDatasetManager(data_path)
features, targets = dataset.make_train_target_pairs()
print('Features shape:', features.shape)
print('Targets shape:', targets.shape)

In [None]:
print('Initializing BiLSTM model...')
model_path = './ckpts/test_bilstm256.pth'
model = BiLSTMModelManager(input_dim=9, hidden_dim=256, output_dim=3, learning_rate=0.01, model_path=model_path)
model.train(features, targets, epochs=250000)

## DNN

In [1]:
import pandas as pd
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from models.bilstm import CustomBiLSTMModel

In [2]:
model_path = './ckpts/test_bilstm256.pth'
bilstm_model = CustomBiLSTMModel(input_dim=9, hidden_dim=256, output_dim=3)
bilstm_model.load_state_dict(torch.load(model_path))

  bilstm_model.load_state_dict(torch.load(model_path))


<All keys matched successfully>

In [3]:
features = pd.read_csv("../data/processed/features_pca_iv16-20.csv")
features.head()

Unnamed: 0,date,feature1,feature2,feature3
0,2016-01-05,-2.946036,0.399305,2.333101
1,2016-01-06,-2.890993,0.153763,2.388737
2,2016-01-07,-4.476846,2.50646,2.701483
3,2016-01-08,-3.925725,1.810529,2.540009
4,2016-01-11,-4.277851,2.453446,2.562114


In [4]:
for i in range(22,len(features)):
    ma1 = torch.tensor(features.iloc[i-1][['feature1', 'feature2', 'feature3']].astype(float).values, dtype=torch.float32)
    ma5 = torch.tensor(features.iloc[i-5:i][['feature1', 'feature2', 'feature3']].mean(axis=0).values, dtype=torch.float32)
    ma22 = torch.tensor(features.iloc[i-22:i][['feature1', 'feature2', 'feature3']].mean(axis=0).values, dtype=torch.float32)
    feature = torch.cat((ma1, ma5, ma22), dim=0).to(device)
    out = bilstm_model.predict(feature)
    for obj in out:
        features.at[i, "F1"] = obj[0].item()
        features.at[i, "F2"] = obj[1].item()
        features.at[i, "F3"] = obj[2].item()

In [5]:
features = features.dropna().reset_index(drop=True)
features.head()

Unnamed: 0,date,feature1,feature2,feature3,F1,F2,F3
0,2016-02-05,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
1,2016-02-08,-2.426245,-0.018148,2.072642,-2.711939,0.211495,2.214754
2,2016-02-09,-2.978782,0.438694,2.333295,-3.057659,0.473622,2.400317
3,2016-02-10,-3.273419,0.806418,2.418158,-3.468703,1.069636,2.478444
4,2016-02-11,-2.607269,0.123359,2.159899,-2.746451,0.177474,2.30525


In [6]:
df_iv_path_list = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv"
]

In [7]:
merged_df = pd.DataFrame()
for path in df_iv_path_list:
    df = pd.read_csv(path)
    merged_df = pd.concat([merged_df, df], axis=0)

merged_df = merged_df.reset_index(drop=True)
print(len(merged_df))
merged_df.head()

193424


Unnamed: 0,date,tau,m,IV
0,2016-01-04,0.027397,-0.510826,0.326153
1,2016-01-04,0.027397,-0.223144,0.291228
2,2016-01-04,0.027397,-0.105361,0.286565
3,2016-01-04,0.027397,-0.051293,0.286299
4,2016-01-04,0.027397,-0.025318,0.286591


In [8]:
# join the two dataframes using the date column so that we have the corresponding F1, F2, F3 values for each date
df = pd.merge(merged_df, features, on='date')
df.head()

Unnamed: 0,date,tau,m,IV,feature1,feature2,feature3,F1,F2,F3
0,2016-02-05,0.027397,-0.510826,0.346719,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
1,2016-02-05,0.027397,-0.223144,0.309534,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
2,2016-02-05,0.027397,-0.105361,0.304773,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
3,2016-02-05,0.027397,-0.051293,0.304623,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
4,2016-02-05,0.027397,-0.025318,0.305006,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066


In [9]:
feature_cols = ['F1', 'F2', 'F3']
from models.dnn import IVDataset, IVSDNN, train_model, large_moneyness_penalty, butterfly_arbitrage_penalty, calendar_spread_penalty, safe_divide

In [10]:
dataset = IVDataset(df, feature_cols)


Tensor shapes:
Features: torch.Size([189882, 3])
m: torch.Size([189882, 1])
tau: torch.Size([189882, 1])
iv: torch.Size([189882, 1])

Checking for NaN values:
Features NaN: False
m NaN: False
tau NaN: False
iv NaN: False


In [11]:
print(dataset.get_input_size())

5


In [12]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset, batch_size=512, shuffle=True)
dnn = IVSDNN(input_size=dataset.get_input_size(), hidden_size=512)

In [13]:
lambda_penalty=1
num_epochs=500

In [14]:
import wandb
wandb.init(project="ivs-dnn")
train_model(dnn, train_loader, 200, 0.001, 1, wandb)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mabhigyanshanker[0m ([33mabx-group[0m). Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1 || Loss = 11.582334 || Penalty = 11.503100 || Calendar Penalty = 14.281682 || Butterfly Penalty = 12.526200 || Large Moneyness Penalty = 30.707616
Epoch 2 || Loss = 0.057450 || Penalty = 0.000158 || Calendar Penalty = 0.000384 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000011
Epoch 3 || Loss = 0.057395 || Penalty = 0.000103 || Calendar Penalty = 0.000170 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000002
Epoch 4 || Loss = 0.057370 || Penalty = 0.000077 || Calendar Penalty = 0.000096 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000001
Epoch 5 || Loss = 0.057353 || Penalty = 0.000060 || Calendar Penalty = 0.000060 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000
Epoch 6 || Loss = 0.057338 || Penalty = 0.000044 || Calendar Penalty = 0.000044 || Butterfly Penalty = 0.000000 || Large Moneyness Penalty = 0.000000
Epoch 7 || Loss = 0.057331 || Penalty = 0.000038 || Calendar Penalty = 0.000038 || Butterfly Pe

KeyboardInterrupt: 