In [1]:
import os
import sys
import random
from pathlib import Path

from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
import statsmodels.api as sm
from statsmodels.tsa.vector_ar.var_model import VAR

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import xgboost as xgb

SEED = 2103
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.common import ListDataset
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.util import to_pandas

from pts.model.time_grad import TimeGradEstimator
from diffusers import DEISMultistepScheduler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def eval_fn(y_pred, y_target):
    return mean_absolute_error(y_pred, y_target)

# Load Train/Test Data

In [2]:
train_df = pd.read_pickle('./data_postprocessing/train_df.pkl')
test_df = pd.read_pickle('./data_postprocessing/test_df.pkl')

In [3]:
train_df.head(2)

Unnamed: 0,window_id,ask_price_0,ask_size_0,bid_price_0,bid_size_0,far_price_0,imbalance_buy_sell_flag_0,imbalance_size_0,matched_size_0,near_price_0,...,bid_size_198,far_price_198,imbalance_buy_sell_flag_198,imbalance_size_198,matched_size_198,near_price_198,reference_price_198,target_198,wap_198,last_5_mins
0,0,1.000026,8493.03,0.999812,60651.5,0.0,1,3180602.69,13380276.64,0.0,...,54300.05,0.0,1,15249373.9,26134518.94,0.0,0.999775,-0.510216,1.0,0
1,0,1.000026,23519.16,0.999812,13996.5,0.0,1,1299772.7,15261106.63,0.0,...,153691.34,0.0,1,13496480.93,27604966.3,0.0,1.000288,-1.419783,1.000222,0


In [3]:
test_df.head(2)

Unnamed: 0,window_id,ask_price_0,ask_size_0,bid_price_0,bid_size_0,far_price_0,imbalance_buy_sell_flag_0,imbalance_size_0,matched_size_0,near_price_0,...,bid_size_198,far_price_198,imbalance_buy_sell_flag_198,imbalance_size_198,matched_size_198,near_price_198,reference_price_198,target_198,wap_198,last_5_mins
0,433,1.000066,2765.73,0.999697,12685.14,0.0,-1,5128680.68,10543243.05,0.0,...,5753.28,0.0,-1,26806621.39,83364970.7,0.0,1.000486,5.459785,1.0,0
1,433,0.999882,78944.32,0.999697,2331.03,0.0,-1,3901807.9,11733675.67,0.0,...,77927.44,0.0,-1,26782684.93,83388907.16,0.0,0.999944,3.089905,1.000051,0


In [4]:
feature_cols = [col for col in train_df if not col.startswith("target") and not col.startswith("window_id")]
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(train_df[feature_cols].values)

In [5]:
class StockDataset(Dataset):
    """
    To preserve original window structure, do not shuffle, and set batch size to be 55.
    """
    def __init__ (self, df):
        self.target_cols = [col for col in df if col.startswith("target")] # we have to change this when we add lags
        self.feature_cols = [col for col in df if not col.startswith("target") and not col.startswith("window_id")]
        self.labels = df[self.target_cols] # labels for all stocks at all times
        self.features = df[self.feature_cols]
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        return self.features.iloc[idx].values, self.labels.iloc[idx].values

In [6]:
train_dataset = torch.load('./data_postprocessing/train.pt')
test_dataset = torch.load('./data_postprocessing/test.pt')
train_dataloader = DataLoader(train_dataset, batch_size=55, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=55, shuffle=False)

In [7]:
x, y = train_dataset[0]
print(x.shape, y.shape)

(2036,) (185,)


# XGBoost

In [8]:
temp_train_loader = DataLoader(train_dataset, batch_size=len(train_dataset))
X, y = next(iter(temp_train_loader))
X, y = X.numpy(), y.numpy()

In [9]:
print(X.shape, y.shape)

(23815, 2036) (23815, 185)


In [10]:
temp_test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))
test_X, test_y = next(iter(temp_test_loader))
test_X, test_y = test_X.numpy(), test_y.numpy()

In [11]:
print(test_X.shape, test_y.shape)

(2640, 2036) (2640, 185)


In [12]:
xgb_model = xgb.XGBRegressor(device="cuda")
xgb_model.fit(X, y)

In [13]:
train_y_hat = xgb_model.predict(X)
print(f'Train MAE: {eval_fn(train_y_hat, y)})')

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




Train MAE: 2.33017348998886)


In [14]:
test_y_hat = xgb_model.predict(test_X)
print(f'Test MAE: {eval_fn(test_y_hat, test_y)})')

Test MAE: 6.913688415179943)


# VARMAX

In this part, we do a rolling prediction using VARMAX as follows (the `|` operator is a concatenation along the 0th axis, and each `train/test[i]` refers to a window of 55 samples): \
`(WAP | Target)_train1 | .. | (WAP | Target)_trainN | WAP_test1 -> Pred_Target_test1` \
`(WAP | Target)_train1 | .. | (WAP | Target)_trainN | (WAP | Pred_Target)_test1 | WAP_test2 -> Pred_Target_test2` \
and so on... \
\
We then just use the last 100 pairs of (WAP | Target) from this big array, plus the current WAP_test appended at the end, to predict the current Target_test \
\
This may perform poorly, but serves as a good baseline for other models to beat

In [8]:
wap_cols = [col for col in train_df if col.startswith("wap")]
target_cols = [col for col in train_df if col.startswith("target")]
exo_cols = [col for col in train_df if not col.startswith("wap") and not col.startswith("target")]

num_train_samples = len(train_df)
wap_indices = [i * 55 + j for i in range(0, (num_train_samples * 2) // 55, 2) for j in range(55)]
target_indices = [55 + i * 55 + j for i in range(0, (num_train_samples * 2) // 55, 2) for j in range(55)]
concat_train_arr = np.empty((num_train_samples * 2, len(target_cols)))
concat_train_arr[wap_indices, :] = train_df[wap_cols].values
concat_train_arr[target_indices, :] = train_df[target_cols].values

# exogenous variables are just duplicated
exo_train_arr = np.empty((num_train_samples * 2, len(exo_cols)))
exo_train_arr[wap_indices, :] = train_df[exo_cols].values
exo_train_arr[target_indices, :] = train_df[exo_cols].values

In [13]:
num_test_samples = len(test_df)

curr_test_wap_and_target_arr = test_df[wap_cols][:55]
curr_test_exo_arr = test_df[exo_cols][:55]

total_MAE = 0.0

for i in tqdm(range(num_test_samples // 55)):
    # train inputs
    curr_concat_train_arr = np.concatenate([concat_train_arr, curr_test_wap_and_target_arr])
    curr_exo_train_arr = np.concatenate([exo_train_arr, curr_test_exo_arr])

    # scale inputs
    curr_scaler = MinMaxScaler()
    curr_scaler.fit(curr_concat_train_arr)
    curr_concat_train_arr = curr_scaler.transform(curr_concat_train_arr)

    # Fit the VAR model
    model = VAR(endog=curr_concat_train_arr[-55*100-55*101:])
    model_fit = model.fit()

    # Make prediction on validation
    curr_pred = model_fit.forecast(model.endog, steps=55)

    # evaluate
    curr_true_target = test_df[target_cols][i*55:(i+1)*55].values
    curr_MAE = mean_absolute_error(curr_pred, curr_true_target)
    total_MAE += curr_MAE

    # concat new inputs
    if i != num_test_samples // 55 - 1:
        curr_test_wap_and_target_arr = np.concatenate([
            curr_test_wap_and_target_arr,
            curr_pred,
            test_df[wap_cols][(i+1)*55:(i+2)*55]
        ])
        curr_test_exo_arr = np.concatenate([curr_test_exo_arr, test_df[exo_cols][(i+1)*55:(i+2)*55], test_df[exo_cols][(i+1)*55:(i+2)*55]])

    print(curr_MAE)

print(f'Test MAE:', total_MAE / (num_test_samples // 55))

  2%|▏         | 1/48 [00:00<00:12,  3.67it/s]

5.626262182701441


  4%|▍         | 2/48 [00:00<00:12,  3.75it/s]

6.199536437138377


  6%|▋         | 3/48 [00:00<00:12,  3.71it/s]

6.017054281416677


  8%|▊         | 4/48 [00:01<00:11,  3.73it/s]

6.1177923941989025


 10%|█         | 5/48 [00:01<00:11,  3.72it/s]

6.028102483441858


 12%|█▎        | 6/48 [00:01<00:11,  3.73it/s]

6.229354085890746


 15%|█▍        | 7/48 [00:01<00:11,  3.67it/s]

7.416029570724428


 17%|█▋        | 8/48 [00:02<00:11,  3.62it/s]

6.213474132797025


 19%|█▉        | 9/48 [00:02<00:10,  3.65it/s]

5.919268747283808


 21%|██        | 10/48 [00:02<00:10,  3.68it/s]

5.649721362770189


 23%|██▎       | 11/48 [00:02<00:10,  3.70it/s]

5.883322399699134


 25%|██▌       | 12/48 [00:03<00:09,  3.72it/s]

6.248745724406667


 27%|██▋       | 13/48 [00:03<00:09,  3.75it/s]

5.685401908647022


 29%|██▉       | 14/48 [00:03<00:08,  3.78it/s]

5.318636627176094


 31%|███▏      | 15/48 [00:04<00:08,  3.75it/s]

5.372311657808444


 33%|███▎      | 16/48 [00:04<00:08,  3.75it/s]

5.258559826693388


 35%|███▌      | 17/48 [00:04<00:08,  3.75it/s]

5.590861889870388


 38%|███▊      | 18/48 [00:04<00:08,  3.66it/s]

5.754411445697447


 40%|███▉      | 19/48 [00:05<00:07,  3.67it/s]

5.262738301294624


 42%|████▏     | 20/48 [00:05<00:07,  3.68it/s]

5.2854818312367335


 44%|████▍     | 21/48 [00:05<00:07,  3.71it/s]

5.493222665531559


 46%|████▌     | 22/48 [00:05<00:07,  3.62it/s]

5.934616690198129


 48%|████▊     | 23/48 [00:06<00:07,  3.39it/s]

6.4125353092475965


 50%|█████     | 24/48 [00:06<00:06,  3.45it/s]

5.757635257674721


 52%|█████▏    | 25/48 [00:06<00:06,  3.50it/s]

5.923644343597231


 54%|█████▍    | 26/48 [00:07<00:06,  3.54it/s]

9.359717009623


 56%|█████▋    | 27/48 [00:07<00:05,  3.57it/s]

6.269975450432604


 58%|█████▊    | 28/48 [00:07<00:05,  3.59it/s]

5.672876660039966


 60%|██████    | 29/48 [00:07<00:05,  3.57it/s]

5.828086856501665


 62%|██████▎   | 30/48 [00:08<00:05,  3.57it/s]

5.829092129637158


 65%|██████▍   | 31/48 [00:08<00:04,  3.58it/s]

6.025820050863868


 67%|██████▋   | 32/48 [00:08<00:04,  3.55it/s]

5.581841985011598


 69%|██████▉   | 33/48 [00:09<00:04,  3.55it/s]

5.129937306706472


 71%|███████   | 34/48 [00:09<00:03,  3.54it/s]

5.004912432849947


 73%|███████▎  | 35/48 [00:09<00:03,  3.54it/s]

5.56809162807789


 75%|███████▌  | 36/48 [00:09<00:03,  3.53it/s]

6.35659884898354


 77%|███████▋  | 37/48 [00:10<00:03,  3.52it/s]

5.683609448930777


 79%|███████▉  | 38/48 [00:10<00:02,  3.51it/s]

8.172022366636876


 81%|████████▏ | 39/48 [00:10<00:02,  3.49it/s]

5.1809255557209255


 83%|████████▎ | 40/48 [00:11<00:02,  3.49it/s]

5.567672432617637


 85%|████████▌ | 41/48 [00:11<00:02,  3.48it/s]

4.855197323266491


 88%|████████▊ | 42/48 [00:11<00:01,  3.47it/s]

8.135826620625844


 90%|████████▉ | 43/48 [00:11<00:01,  3.44it/s]

5.703407243200001


 92%|█████████▏| 44/48 [00:12<00:01,  3.42it/s]

5.268876048371801


 94%|█████████▍| 45/48 [00:12<00:00,  3.42it/s]

5.203573569168554


 96%|█████████▌| 46/48 [00:12<00:00,  3.41it/s]

5.117107175166288


 98%|█████████▊| 47/48 [00:13<00:00,  3.39it/s]

5.592916167068363


100%|██████████| 48/48 [00:13<00:00,  3.58it/s]

4.798367229914929
Test MAE: 5.8855244395116415





# LSTM
In this part we train a simple bi-directional LSTM

In [8]:
class LSTM(nn.Module):
    def __init__(self, hidden=256, out_dim=185):
        super().__init__()
        self.hidden = hidden
        self.out_dim = out_dim
        self.lstm = nn.LSTM(2036, hidden_size=self.hidden, num_layers=2, bidirectional=True, batch_first=True)
        self.linear = nn.Linear(self.hidden * 2, self.out_dim)
    
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

In [9]:
lstm_model = LSTM().to(device)
lstm_optimizer = torch.optim.Adam(lstm_model.parameters())
loss_fn = nn.L1Loss()

for epoch in range(30):
    print(f"--- Epoch [{epoch + 1}/{30}] ---")
    total_epoch_loss = 0.
    for step, batch in enumerate(train_dataloader):
        features, targets = batch
        features = torch.from_numpy(scaler.transform(features))
        features, targets = features.float(), targets.float()
        features, targets = features.to(device), targets.to(device)
        targets = targets.unsqueeze(0)
        features = features.unsqueeze(0)

        preds = lstm_model(features)
        loss = loss_fn(targets, preds)
        total_epoch_loss += loss.detach().cpu()

        if step % 100 == 0:
            print("Loss:", loss.item())

        lstm_optimizer.zero_grad()
        loss.backward()
        lstm_optimizer.step()
    print(f"Epoch Loss: {total_epoch_loss / len(train_dataloader)}")

--- Epoch [1/30] ---
Loss: 5.549416542053223
Loss: 5.237961292266846
Loss: 8.095149040222168
Loss: 6.798412799835205
Loss: 5.570059776306152
Epoch Loss: 6.318953037261963
--- Epoch [2/30] ---
Loss: 5.544959545135498
Loss: 5.238502025604248
Loss: 8.09648609161377
Loss: 6.800079345703125
Loss: 5.575272560119629
Epoch Loss: 6.317306041717529
--- Epoch [3/30] ---
Loss: 5.541306495666504
Loss: 5.239346504211426
Loss: 8.089181900024414
Loss: 6.8014750480651855
Loss: 5.572729587554932
Epoch Loss: 6.3166728019714355
--- Epoch [4/30] ---
Loss: 5.542489528656006
Loss: 5.236428737640381
Loss: 8.09494400024414
Loss: 6.800201416015625
Loss: 5.568076133728027
Epoch Loss: 6.315825462341309
--- Epoch [5/30] ---
Loss: 5.5440497398376465
Loss: 5.2335333824157715
Loss: 8.132625579833984
Loss: 6.76423978805542
Loss: 5.565145492553711
Epoch Loss: 6.310324668884277
--- Epoch [6/30] ---
Loss: 5.535722732543945
Loss: 5.228562355041504
Loss: 8.079418182373047
Loss: 6.7209882736206055
Loss: 5.56702184677124
Epo

In [10]:
lstm_model.eval()
total_loss = 0.
total_elems = 0
for step, batch in enumerate(test_dataloader):
    features, targets = batch
    features = torch.from_numpy(scaler.transform(features))
    features, targets = features.float(), targets.float()
    features, targets = features.to(device), targets.to(device)
    targets = targets.unsqueeze(0)
    features = features.unsqueeze(0)
    with torch.inference_mode():
        preds = lstm_model(features)
        total_loss += torch.sum(torch.abs(preds - targets)).item()
        total_elems += torch.numel(targets)
print(f"MAE: {total_loss / total_elems}")

MAE: 5.760513931011978


# Conditional DDPM
In this part we treat the inputs and outputs as 1-channel images and apply conditional DDPM

In [10]:
from diffusion_model import Unet, p_losses, sample
U = Unet(dim=64, channels=1, dim_mults=(1, 2, 4), self_condition=True)
if torch.cuda.is_available():
    U.cuda()
    print('Models moved to GPU.')
u_optimizer = torch.optim.Adam(U.parameters(), 0.0002, [0.5, 0.999])

Models moved to GPU.


In [11]:
num_epc = 5
for epoch in range(num_epc):
    print(f"--- Epoch [{epoch+1}/{num_epc}] ---")

    total_epoch_loss = 0.

    for step, batch in enumerate(train_dataloader):
        cond, targets = batch
        cond = torch.from_numpy(scaler.transform(cond))
        cond, targets = cond.float(), targets.float()
        cond, targets = cond.to(device), targets.to(device)
        targets = targets.unsqueeze(0).unsqueeze(0)
        cond = cond.unsqueeze(0)
        targets = torch.nn.functional.pad(targets, (0, 71, 0, 9), "constant", 0)

        # 1. Sample t uniformally for every example in the batch
        t = torch.randint(low=0, high=500, size=(1,), device=device).long()

        # 2. Get l1 loss
        loss = p_losses(U, targets, t, loss_type='l1', time_cond=cond)

        if step % 100 == 0:
            print("Loss:", loss.item())
    
        total_epoch_loss += loss.detach().item()

        u_optimizer.zero_grad()
        loss.backward()
        u_optimizer.step()
    
    print("Epoch Loss:", total_epoch_loss / len(train_dataloader))

    # Save every epoch
    print("Saving...")
    torch.save(U.state_dict(), "diffusion.pth")

--- Epoch [1/5] ---
Loss: 0.8664166331291199
Loss: 0.342109739780426
Loss: 0.5751746892929077
Loss: 0.36596542596817017
Loss: 0.6054816246032715
Epoch Loss: 0.5488351859479248
Saving...
--- Epoch [2/5] ---
Loss: 0.48535293340682983
Loss: 0.5322597026824951
Loss: 0.36875081062316895
Loss: 0.5387202501296997
Loss: 0.27861088514328003
Epoch Loss: 0.4736612992528955
Saving...
--- Epoch [3/5] ---
Loss: 0.2649272382259369
Loss: 0.243866428732872
Loss: 0.5191173553466797
Loss: 0.293118953704834
Loss: 0.5198401212692261
Epoch Loss: 0.4562738733946864
Saving...
--- Epoch [4/5] ---
Loss: 0.7090773582458496
Loss: 0.5887994766235352
Loss: 0.47108373045921326
Loss: 0.5159890055656433
Loss: 0.32491758465766907
Epoch Loss: 0.44573564989186876
Saving...
--- Epoch [5/5] ---
Loss: 0.4543801248073578
Loss: 0.4828573763370514
Loss: 0.42861083149909973
Loss: 0.48692965507507324
Loss: 0.3912566900253296
Epoch Loss: 0.44426095021101397
Saving...


In [7]:
# sampling loop
U = Unet(dim=64, channels=1, dim_mults=(1, 2, 4), self_condition=True)
if torch.cuda.is_available():
    U.cuda()
U.load_state_dict(torch.load('diffusion.pth'))

U.eval()
total_loss = 0.
total_elems = 0
for step, batch in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
    cond, targets = batch
    cond = torch.from_numpy(scaler.transform(cond))
    cond, targets = cond.float(), targets.float()
    cond, targets = cond.to(device), targets.to(device)
    targets = targets.unsqueeze(0).unsqueeze(0)
    cond = cond.unsqueeze(0)
    targets = torch.nn.functional.pad(targets, (0, 71, 0, 9), "constant", 0)
    # torch inference_mode already annotated for sample() function
    samples = sample(U, (64, 256), batch_size=1, channels=1, time_cond=cond)
    total_loss += torch.sum(torch.abs(torch.from_numpy(samples[-1]) - targets.detach().cpu())).item()
    total_elems += torch.numel(targets)
    print(total_loss / total_elems)
print(f"MAE: {total_loss / total_elems}")

  0%|          | 0/48 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

  2%|▏         | 1/48 [01:11<56:12, 71.76s/it]

4.201510429382324


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

  4%|▍         | 2/48 [02:24<55:18, 72.13s/it]

4.367911338806152


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

  6%|▋         | 3/48 [03:36<54:11, 72.25s/it]

4.455802122751872


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

  8%|▊         | 4/48 [04:48<52:54, 72.14s/it]

4.494023323059082


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 10%|█         | 5/48 [06:00<51:40, 72.10s/it]

4.475032806396484


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 12%|█▎        | 6/48 [07:12<50:25, 72.03s/it]

4.495413939158122


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 15%|█▍        | 7/48 [08:24<49:11, 72.00s/it]

4.604225022452218


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 17%|█▋        | 8/48 [09:36<47:58, 71.97s/it]

4.6070767641067505


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 19%|█▉        | 9/48 [10:48<46:45, 71.95s/it]

4.579028765360515


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 21%|██        | 10/48 [12:00<45:33, 71.95s/it]

4.528861236572266


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 23%|██▎       | 11/48 [13:12<44:23, 71.99s/it]

4.523338491266424


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 25%|██▌       | 12/48 [14:24<43:10, 71.97s/it]

4.542020678520203


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 27%|██▋       | 13/48 [15:36<41:58, 71.95s/it]

4.533690856053279


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 29%|██▉       | 14/48 [16:47<40:45, 71.94s/it]

4.5020498888833185


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 31%|███▏      | 15/48 [17:59<39:33, 71.93s/it]

4.477288659413656


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 33%|███▎      | 16/48 [19:11<38:22, 71.94s/it]

4.447746425867081


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 35%|███▌      | 17/48 [20:23<37:10, 71.94s/it]

4.434576567481546


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 38%|███▊      | 18/48 [21:35<35:58, 71.94s/it]

4.418250693215264


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 40%|███▉      | 19/48 [22:47<34:47, 71.97s/it]

4.397729823463841


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 42%|████▏     | 20/48 [23:59<33:34, 71.95s/it]

4.382056307792664


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 44%|████▍     | 21/48 [25:11<32:22, 71.95s/it]

4.371080920809791


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 46%|████▌     | 22/48 [26:23<31:10, 71.94s/it]

4.367576534097845


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 48%|████▊     | 23/48 [27:35<29:58, 71.93s/it]

4.389462533204452


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 50%|█████     | 24/48 [28:47<28:46, 71.93s/it]

4.385650336742401


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 52%|█████▏    | 25/48 [29:59<27:34, 71.95s/it]

4.382399520874023


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 54%|█████▍    | 26/48 [31:11<26:22, 71.93s/it]

4.449407980992244


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 56%|█████▋    | 27/48 [32:23<25:10, 71.93s/it]

4.458234769326669


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 58%|█████▊    | 28/48 [33:35<23:58, 71.93s/it]

4.451662080628531


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 60%|██████    | 29/48 [34:47<22:46, 71.93s/it]

4.447116868249301


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 62%|██████▎   | 30/48 [35:58<21:34, 71.93s/it]

4.448305098215739


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 65%|██████▍   | 31/48 [37:10<20:22, 71.93s/it]

4.445637072286298


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 67%|██████▋   | 32/48 [38:22<19:10, 71.93s/it]

4.434021577239037


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 69%|██████▉   | 33/48 [39:34<17:58, 71.93s/it]

4.41458933281176


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 71%|███████   | 34/48 [40:46<16:47, 71.93s/it]

4.402802467346191


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 73%|███████▎  | 35/48 [41:58<15:35, 71.93s/it]

4.392192963191441


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 75%|███████▌  | 36/48 [43:10<14:23, 71.94s/it]

4.40048282676273


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 77%|███████▋  | 37/48 [44:22<13:11, 71.95s/it]

4.390925587834539


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 79%|███████▉  | 38/48 [45:34<11:59, 71.95s/it]

4.423069376694529


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 81%|████████▏ | 39/48 [46:46<10:47, 71.96s/it]

4.408555727738601


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 83%|████████▎ | 40/48 [47:58<09:35, 71.94s/it]

4.403669440746308


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 85%|████████▌ | 41/48 [49:10<08:23, 71.95s/it]

4.387494308192555


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 88%|████████▊ | 42/48 [50:22<07:11, 71.94s/it]

4.414678380602882


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 90%|████████▉ | 43/48 [51:34<05:59, 71.94s/it]

4.416277574938397


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 92%|█████████▏| 44/48 [52:46<04:47, 71.94s/it]

4.409924279559743


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 94%|█████████▍| 45/48 [53:58<03:35, 71.94s/it]

4.399718639585707


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 96%|█████████▌| 46/48 [55:09<02:23, 71.93s/it]

4.390979839407879


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

 98%|█████████▊| 47/48 [56:21<01:11, 71.94s/it]

4.384790430677698


sampling loop time step:   0%|          | 0/500 [00:00<?, ?it/s]

100%|██████████| 48/48 [57:33<00:00, 71.96s/it]

4.376183301210403
MAE: 4.376183301210403





# TimeGrad (Not working)

In this part we apply time grad to our time series, trying to transform the WAP from our data into the target

(Could not for the life of me get this to work, the repo isn't well documented and has many reproducibility bugs)

In [36]:
arb_time_series = pd.date_range("2024-01-01", "2024-01-20", freq='min')
train_df_w_timestamp = train_df.copy()
train_df_w_timestamp['timestamp'] = arb_time_series[:len(train_df)]
train_df_w_timestamp = train_df_w_timestamp.set_index('timestamp')
test_df_w_timestamp = test_df.copy()
test_df_w_timestamp['timestamp'] = arb_time_series[len(train_df):len(train_df) + len(test_df)]
test_df_w_timestamp = test_df_w_timestamp.set_index('timestamp')

In [38]:
data_list = [{"start": train_df_w_timestamp.index[0], "target": train_df_w_timestamp[c].values[:100]} for c in target_cols]

train_dataset = ListDataset(
    [data_list[0]],
    freq = "1min"
)
print(train_dataset[0]['target'].shape)

(100,)


In [39]:
train_grouper = MultivariateGrouper(max_target_dim=1)
train_dataset = train_grouper(train_dataset)

In [23]:
multiple_ts = [train_df_w_timestamp[['timestamp', t]].rename(columns={t: "target"}).set_index('timestamp') for w, t in zip(wap_cols, target_cols)]

In [24]:
ds = PandasDataset(multiple_ts, target="target", freq='1min')

In [40]:
scheduler = DEISMultistepScheduler(
    num_train_timesteps=150,
    beta_end=0.1,
)
estimator = TimeGradEstimator(
    input_size=100,
    hidden_size=40,
    num_layers=2,
    dropout_rate=0.1,
    # lags_seq=[1],
    scheduler=scheduler,
    num_inference_steps=150,
    prediction_length=55,
    freq="min",
    scaling="mean",
    trainer_kwargs=dict(max_epochs=1, accelerator="gpu", devices="1"),
)
predictor = estimator.train(train_dataset) 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/home/mingchonglim/anaconda3/envs/mlip/lib/python3.12/site-packages/pytorch_lightning/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name  | Type          | Params | In sizes                                                                | Out sizes        
--------------------------------------------------------------------------------------------------------------------------------------
0 | model | TimeGradModel | 427 K  | [[1, 1], [1, 1], [1, 236, 6], [1, 236, 100], [1, 236, 100], [1, 55, 6]] | [1, 100, 55, 100]
--------------------------------------------------------------------------------------------------------------------------------------
427 K     Trainable params
0         Non-trainable params
427 K     Total params
1.710     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


RuntimeError: shape '[5120, 1]' is invalid for input of size 385280