In [1]:
import numpy as np
import polars as pl
from pathlib import Path
import gc
import os
from typing import List, Union, Dict, Any

from prj.data.data_loader import DataLoader as BaseDataLoader
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from prj.model.torch.datasets.base import JaneStreetBaseDataset
from prj.model.torch.losses import WeightedMSELoss
from prj.model.torch.models.mlp import Mlp
from prj.model.torch.wrappers.base import JaneStreetModelWrapper
from prj.model.torch.utils import train
from prj.config import DATA_DIR

2024-12-14 02:23:18.156318: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-14 02:23:18.156352: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-14 02:23:18.157817: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-14 02:23:18.164628: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_ds = pl.concat([
    pl.scan_parquet(DATA_DIR / f'partition_id={i}' / 'part-0.parquet')
    for i in range(8, 9)
])
val_ds = pl.scan_parquet(DATA_DIR / 'partition_id=9' / 'part-0.parquet')

In [3]:
train_dataset = JaneStreetBaseDataset(train_ds, ffill=True, num_days_batch=250)
val_dataset = JaneStreetBaseDataset(val_ds, shuffle=False, ffill=True, num_days_batch=250)

# shuffle is not needed as it's already done in the dataset
batch_size = 256
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [4]:
import time
from prj.config import EXP_DIR


scheduler = 'ReduceLROnPlateau'
scheduler_cfg = dict(mode='min', factor=0.1, patience=3, verbose=True, min_lr=1e-8)
base_model3 = Mlp(input_dim=(79,), hidden_dims=[256, 128], use_dropout=False, use_bn=False)
model_no_bn_and_dropout = JaneStreetModelWrapper(base_model3, [WeightedMSELoss()], [1], 
                                              scheduler=scheduler, scheduler_cfg=scheduler_cfg)


# dir_path = str(EXP_DIR / 'tmp' / f'model3_{time.time()}')
# os.makedirs(dir_path, exist_ok=True)
# ckpt_config = {'dirpath': dir_path, 'filename': 'baseline_no_all', 'save_top_k': 1,
#                'monitor': 'val_wr2', 'verbose': True, 'mode': 'max'}
early_stopping = {'monitor': 'val_wr2', 'min_delta': 0.00, 'patience': 5, 'verbose': True, 'mode': 'max'}
model_no_bn_and_dropout = train(model_no_bn_and_dropout, train_dataloader, val_dataloader, accelerator='auto',
                                max_epochs=1, precision='32-true', use_model_ckpt=False, 
                                gradient_clip_val=20, use_early_stopping=False, 
                                early_stopping_cfg=early_stopping, compile=False)

# base_model3 = Mlp(79, hidden_dims=[256, 128], use_dropout=False, use_bn=False)
# model_no_bn_and_dropout = JaneStreetModelWrapper.load_from_checkpoint(
#     f"{dir_path}/baseline_no_all.ckpt", 
#     model=base_model3,
#     losses=[WeightedMSELoss()], 
#     loss_weights=[1])

model_no_bn_and_dropout

Seed set to 42


Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type            | Params | Mode 
----------------------------------------------------------
0 | model         | Mlp             | 53.5 K | train
1 | model.model   | Sequential      | 53.5 K | train
2 | model.model.0 | Linear          | 20.5 K | train
3 | model.model.1 | LeakyReLU       | 0      | train
4 | model.model.2 | Linear          | 32.9 K | train
5 | model.model.3 | LeakyReLU       | 0      | train
6 | model.model.4 | Linear          | 129    | train
7 | losses        | ModuleList      | 0      | train
8 | losses.0      | WeightedMSELoss | 0      | train
----------------------------------------------------------
53.5 K    Traina

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/lorecampa/projects/jane_street_forecasting/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.



[Epoch 0 - Validation]
val_wmse: 7.3915
val_wmse_epoch: 7.3915
val_wmae: 2.1831
val_wmae_epoch: 2.1831
val_wr2: -2.7058
val_wr2_epoch: -2.7058
val_loss: 7.3915
val_loss_epoch: 7.3915


/home/lorecampa/projects/jane_street_forecasting/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.



[Epoch 0 - Validation]
val_wmse: 0.6201
val_wmse_epoch: 0.6201
val_wmae: 0.5167
val_wmae_epoch: 0.5167
val_wr2: 0.0004
val_wr2_epoch: 0.0004
val_loss: 0.6201
val_loss_epoch: 0.6201

[Epoch 0 - Training]
train_wmse: 0.6868
train_wmse_step: 0.5372
train_wmae: 0.5339
train_wmae_step: 0.5255
train_wr2: 0.0031
train_wr2_step: -0.0177
train_loss: 0.6868
train_loss_step: 0.5372
train_wmse_epoch: 0.6868
train_wmae_epoch: 0.5339
train_wr2_epoch: 0.0031
train_loss_epoch: 0.6868


JaneStreetModelWrapper(
  (model): Mlp(
    (model): Sequential(
      (0): Linear(in_features=79, out_features=256, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=256, out_features=128, bias=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Linear(in_features=128, out_features=1, bias=True)
    )
  )
  (losses): ModuleList(
    (0): WeightedMSELoss()
  )
)

In [8]:
from sklearn.metrics import r2_score
from tqdm import tqdm

from prj.model.torch.metrics import weighted_r2_score

val_dataset = JaneStreetBaseDataset(val_ds, ffill=True, num_days_batch=250)
val_dataloader = DataLoader(val_dataset, batch_size=2048, shuffle=False)

model = model_no_bn_and_dropout
y_hat_all = []
y = []
weights = []
model.eval()
for x, targets, w in tqdm(iter(val_dataloader)):
    with torch.no_grad():
        preds_all = model(x)
    y_hat_all.append(preds_all.numpy().flatten())
    y.append(targets.numpy().flatten())
    weights.append(w.numpy().flatten())

y = np.concatenate(y)
y_hat_all = np.concatenate(y_hat_all)
weights = np.concatenate(weights)
weighted_r2_score(y_hat_all, y, weights)

100%|██████████| 3064/3064 [03:13<00:00, 15.87it/s]


0.001715540885925293

In [9]:
val_dataset._load_batch()

In [12]:
X, y, w = val_dataset.X, val_dataset.y, val_dataset.weights
X.shape, y.shape, w.shape, y_hat_all.shape

((6274576, 79), (6274576,), (6274576,), (6274576,))

In [None]:
y_hat_x = model(torch.from_numpy(X))
y_hat_x

In [None]:
weighted_r2_score(y_hat_x, y, weights)