# Bi-LSTM for Feature Prediction

In [None]:
# imports
import pandas as pd
import numpy as np
import wandb

In [None]:
from models.bilstm import BiLSTMDatasetManager, BiLSTMModelManager

In [None]:
data_path = "../data/processed/features_pca_iv16-20.csv"

In [None]:
dataset = BiLSTMDatasetManager(data_path)
features, targets = dataset.make_train_target_pairs()
print('Features shape:', features.shape)
print('Targets shape:', targets.shape)

In [None]:
print('Initializing BiLSTM model...')
model_path = './ckpts/test_bilstm256.pth'
model = BiLSTMModelManager(input_dim=9, hidden_dim=256, output_dim=3, learning_rate=0.01, model_path=model_path)
model.train(features, targets, epochs=250000)

## DNN

In [1]:
import pandas as pd
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from models.bilstm import CustomBiLSTMModel

In [2]:
model_path = './ckpts/test_bilstm256.pth'
bilstm_model = CustomBiLSTMModel(input_dim=9, hidden_dim=256, output_dim=3)
bilstm_model.load_state_dict(torch.load(model_path))

  bilstm_model.load_state_dict(torch.load(model_path))


<All keys matched successfully>

In [3]:
features = pd.read_csv("../data/processed/features_pca_iv16-20.csv")
features.head()

Unnamed: 0,date,feature1,feature2,feature3
0,2016-01-05,-2.946036,0.399305,2.333101
1,2016-01-06,-2.890993,0.153763,2.388737
2,2016-01-07,-4.476846,2.50646,2.701483
3,2016-01-08,-3.925725,1.810529,2.540009
4,2016-01-11,-4.277851,2.453446,2.562114


In [4]:
for i in range(22,len(features)):
    ma1 = torch.tensor(features.iloc[i-1][['feature1', 'feature2', 'feature3']].astype(float).values, dtype=torch.float32)
    ma5 = torch.tensor(features.iloc[i-5:i][['feature1', 'feature2', 'feature3']].mean(axis=0).values, dtype=torch.float32)
    ma22 = torch.tensor(features.iloc[i-22:i][['feature1', 'feature2', 'feature3']].mean(axis=0).values, dtype=torch.float32)
    feature = torch.cat((ma1, ma5, ma22), dim=0).to(device)
    out = bilstm_model.predict(feature)
    for obj in out:
        features.at[i, "F1"] = obj[0].item()
        features.at[i, "F2"] = obj[1].item()
        features.at[i, "F3"] = obj[2].item()

In [5]:
features = features.dropna().reset_index(drop=True)
features.head()

Unnamed: 0,date,feature1,feature2,feature3,F1,F2,F3
0,2016-02-05,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
1,2016-02-08,-2.426245,-0.018148,2.072642,-2.711939,0.211495,2.214754
2,2016-02-09,-2.978782,0.438694,2.333295,-3.057659,0.473622,2.400317
3,2016-02-10,-3.273419,0.806418,2.418158,-3.468703,1.069636,2.478444
4,2016-02-11,-2.607269,0.123359,2.159899,-2.746451,0.177474,2.30525


In [6]:
df_iv_path_list = [
    "../data/processed/pca/predicted_iv16.csv",
    "../data/processed/pca/predicted_iv17.csv",
    "../data/processed/pca/predicted_iv18.csv",
    "../data/processed/pca/predicted_iv19.csv",
    "../data/processed/pca/predicted_iv20.csv"
]

In [7]:
merged_df = pd.DataFrame()
for path in df_iv_path_list:
    df = pd.read_csv(path)
    merged_df = pd.concat([merged_df, df], axis=0)

merged_df = merged_df.reset_index(drop=True)
print(len(merged_df))
merged_df.head()

193424


Unnamed: 0,date,tau,m,IV
0,2016-01-04,0.027397,-0.510826,0.326153
1,2016-01-04,0.027397,-0.223144,0.291228
2,2016-01-04,0.027397,-0.105361,0.286565
3,2016-01-04,0.027397,-0.051293,0.286299
4,2016-01-04,0.027397,-0.025318,0.286591


In [8]:
# join the two dataframes using the date column so that we have the corresponding F1, F2, F3 values for each date
df = pd.merge(merged_df, features, on='date')
df.head()

Unnamed: 0,date,tau,m,IV,feature1,feature2,feature3,F1,F2,F3
0,2016-02-05,0.027397,-0.510826,0.346719,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
1,2016-02-05,0.027397,-0.223144,0.309534,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
2,2016-02-05,0.027397,-0.105361,0.304773,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
3,2016-02-05,0.027397,-0.051293,0.304623,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066
4,2016-02-05,0.027397,-0.025318,0.305006,-2.78585,0.208685,2.273442,-2.866897,0.1228,2.436066


In [9]:
len(df)

189882

In [11]:
df_train = df[:10000]
df_val = df[-2000:]

In [12]:
feature_cols = ['F1', 'F2', 'F3']
from models.dnn import IVDataset, IVSDNN, train_model, large_moneyness_penalty, butterfly_arbitrage_penalty, calendar_spread_penalty, safe_divide

In [13]:
dataset_train = IVDataset(df_train, feature_cols)
dataset_val = IVDataset(df_val, feature_cols)


Tensor shapes:
Features: torch.Size([10000, 3])
m: torch.Size([10000, 1])
tau: torch.Size([10000, 1])
iv: torch.Size([10000, 1])

Checking for NaN values:
Features NaN: False
m NaN: False
tau NaN: False
iv NaN: False

Tensor shapes:
Features: torch.Size([2000, 3])
m: torch.Size([2000, 1])
tau: torch.Size([2000, 1])
iv: torch.Size([2000, 1])

Checking for NaN values:
Features NaN: False
m NaN: False
tau NaN: False
iv NaN: False


In [14]:
print(dataset_train.get_input_size())
print(dataset_val.get_input_size())

5
5


In [15]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset_train, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=64, shuffle=False)
dnn = IVSDNN(input_size=dataset_train.get_input_size(), hidden_size=512)

In [16]:
import wandb
wandb.init(project="ivs-dnn")
train_model(dnn, train_loader, val_loader, 200, 0.001, 1, wandb)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mabhigyanshanker[0m ([33mabx-group[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/200 - Training: 100%|██████████| 157/157 [08:50<00:00,  3.38s/it]
Epoch 1/200 - Validation: 100%|██████████| 32/32 [00:34<00:00,  1.07s/it]



Epoch 1 Results:
Training || Loss: 5467199.631508 | MSE: 0.063333 | MAPE: 99.532249 | Penalty: 27335995.961902
Validation || Loss: 0.431532 | MSE: 0.078837 | MAPE: 99.999380 | Penalty: 0.000000
Learning Rate: 0.001000


Epoch 2/200 - Training: 100%|██████████| 157/157 [08:41<00:00,  3.32s/it]
Epoch 2/200 - Validation: 100%|██████████| 32/32 [00:33<00:00,  1.05s/it]



Epoch 2 Results:
Training || Loss: 0.423771 | MSE: 0.059184 | MAPE: 99.995059 | Penalty: 0.000585
Validation || Loss: 0.439814 | MSE: 0.078821 | MAPE: 99.984962 | Penalty: 0.041727
Learning Rate: 0.001000


Epoch 3/200 - Training: 100%|██████████| 157/157 [08:44<00:00,  3.34s/it]
Epoch 3/200 - Validation: 100%|██████████| 32/32 [00:34<00:00,  1.09s/it]



Epoch 3 Results:
Training || Loss: 0.263448 | MSE: 0.034971 | MAPE: 59.735072 | Penalty: 0.052597
Validation || Loss: 0.547008 | MSE: 0.078564 | MAPE: 99.804328 | Penalty: 0.581826
Learning Rate: 0.001000


Epoch 4/200 - Training: 100%|██████████| 157/157 [08:45<00:00,  3.35s/it]
Epoch 4/200 - Validation: 100%|██████████| 32/32 [00:35<00:00,  1.10s/it]



Epoch 4 Results:
Training || Loss: 0.134098 | MSE: 0.012658 | MAPE: 31.612752 | Penalty: 0.012916
Validation || Loss: 0.491882 | MSE: 0.078832 | MAPE: 99.995416 | Penalty: 0.301835
Learning Rate: 0.001000


Epoch 5/200 - Training: 100%|██████████| 157/157 [08:43<00:00,  3.33s/it]
Epoch 5/200 - Validation: 100%|██████████| 32/32 [00:32<00:00,  1.02s/it]



Epoch 5 Results:
Training || Loss: 0.115206 | MSE: 0.010236 | MAPE: 27.339693 | Penalty: 0.008765
Validation || Loss: 1.505898 | MSE: 0.078664 | MAPE: 99.797431 | Penalty: 5.376211
Learning Rate: 0.001000


Epoch 6/200 - Training: 100%|██████████| 157/157 [08:19<00:00,  3.18s/it]
Epoch 6/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.01it/s]



Epoch 6 Results:
Training || Loss: 0.096580 | MSE: 0.007726 | MAPE: 23.141202 | Penalty: 0.004621
Validation || Loss: 9.748625 | MSE: 0.038326 | MAPE: 64.748475 | Penalty: 47.371503
Learning Rate: 0.001000


Epoch 7/200 - Training: 100%|██████████| 157/157 [08:09<00:00,  3.12s/it]
Epoch 7/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.00it/s]



Epoch 7 Results:
Training || Loss: 0.087317 | MSE: 0.006705 | MAPE: 21.059622 | Penalty: 0.001982
Validation || Loss: 38.748662 | MSE: 0.082727 | MAPE: 94.767582 | Penalty: 191.682498
Learning Rate: 0.001000


Epoch 8/200 - Training: 100%|██████████| 157/157 [08:05<00:00,  3.10s/it]
Epoch 8/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.01it/s]



Epoch 8 Results:
Training || Loss: 0.080935 | MSE: 0.005851 | MAPE: 19.544406 | Penalty: 0.002084
Validation || Loss: 0.438671 | MSE: 0.078837 | MAPE: 100.000000 | Penalty: 0.035683
Learning Rate: 0.001000


Epoch 9/200 - Training: 100%|██████████| 157/157 [08:09<00:00,  3.12s/it]
Epoch 9/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.03it/s]



Epoch 9 Results:
Training || Loss: 0.086328 | MSE: 0.005485 | MAPE: 18.110702 | Penalty: 0.058455
Validation || Loss: 9.673665 | MSE: 0.107410 | MAPE: 108.317399 | Penalty: 45.987155
Learning Rate: 0.001000


Epoch 10/200 - Training: 100%|██████████| 157/157 [08:05<00:00,  3.09s/it]
Epoch 10/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.03it/s]



Epoch 10 Results:
Training || Loss: 0.072571 | MSE: 0.004817 | MAPE: 17.591181 | Penalty: 0.001395
Validation || Loss: 20.947423 | MSE: 0.078837 | MAPE: 99.999996 | Penalty: 102.579437
Learning Rate: 0.000300


Epoch 11/200 - Training: 100%|██████████| 157/157 [08:10<00:00,  3.13s/it]
Epoch 11/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.00it/s]



Epoch 11 Results:
Training || Loss: 0.064088 | MSE: 0.004097 | MAPE: 15.574320 | Penalty: 0.000760
Validation || Loss: 75.342328 | MSE: 0.459203 | MAPE: 152.053284 | Penalty: 372.752156
Learning Rate: 0.000300


Epoch 12/200 - Training: 100%|██████████| 157/157 [08:09<00:00,  3.12s/it]
Epoch 12/200 - Validation: 100%|██████████| 32/32 [00:31<00:00,  1.00it/s]



Epoch 12 Results:
Training || Loss: 0.062577 | MSE: 0.003851 | MAPE: 15.211253 | Penalty: 0.000956
Validation || Loss: 240.901704 | MSE: 0.094408 | MAPE: 103.787280 | Penalty: 1202.243944
Learning Rate: 0.000300


Epoch 13/200 - Training: 100%|██████████| 157/157 [08:35<00:00,  3.28s/it]
Epoch 13/200 - Validation: 100%|██████████| 32/32 [00:33<00:00,  1.05s/it]



Epoch 13 Results:
Training || Loss: 0.062981 | MSE: 0.003825 | MAPE: 15.334997 | Penalty: 0.000557
Validation || Loss: 328.187800 | MSE: 0.078837 | MAPE: 100.000000 | Penalty: 1638.781304
Learning Rate: 0.000300


Epoch 14/200 - Training: 100%|██████████| 157/157 [08:36<00:00,  3.29s/it]
Epoch 14/200 - Validation: 100%|██████████| 32/32 [00:33<00:00,  1.06s/it]



Epoch 14 Results:
Training || Loss: 0.061112 | MSE: 0.003606 | MAPE: 14.874307 | Penalty: 0.000862
Validation || Loss: 739.240942 | MSE: 0.078837 | MAPE: 100.000000 | Penalty: 3694.046949
Learning Rate: 0.000090


Epoch 15/200 - Training: 100%|██████████| 157/157 [08:33<00:00,  3.27s/it]
Epoch 15/200 - Validation: 100%|██████████| 32/32 [00:33<00:00,  1.03s/it]



Epoch 15 Results:
Training || Loss: 0.058640 | MSE: 0.003395 | MAPE: 14.291230 | Penalty: 0.000588
Validation || Loss: 1154.604391 | MSE: 0.198981 | MAPE: 110.058356 | Penalty: 5770.422707
Learning Rate: 0.000090


Epoch 16/200 - Training: 100%|██████████| 157/157 [08:35<00:00,  3.28s/it]
Epoch 16/200 - Validation: 100%|██████████| 32/32 [00:34<00:00,  1.08s/it]



Epoch 16 Results:
Training || Loss: 0.058680 | MSE: 0.003369 | MAPE: 14.312030 | Penalty: 0.000423
Validation || Loss: 995.923870 | MSE: 0.078837 | MAPE: 100.000000 | Penalty: 4977.461597
Learning Rate: 0.000090


Epoch 17/200 - Training: 100%|██████████| 157/157 [08:36<00:00,  3.29s/it]
Epoch 17/200 - Validation: 100%|██████████| 32/32 [00:33<00:00,  1.06s/it]



Epoch 17 Results:
Training || Loss: 0.058411 | MSE: 0.003327 | MAPE: 14.247517 | Penalty: 0.000452
Validation || Loss: 1245.095800 | MSE: 0.078837 | MAPE: 100.000000 | Penalty: 6223.321190
Learning Rate: 0.000090


Epoch 18/200 - Training:  93%|█████████▎| 146/157 [08:04<00:36,  3.32s/it]


KeyboardInterrupt: 

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7972869b1b50>> (for post_run_cell), with arguments args (<ExecutionResult object at 7972869b3510, execution_count=16 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 797293521510, raw_cell="import wandb
wandb.init(project="ivs-dnn")
train_m.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/home/shankerabhigyan/code/thesis/thesis-IVS/baseline/pipeline_bilstm.ipynb#X31sZmlsZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe