In [1]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader
from utils.fi2010_loader import Dataset_fi2010
from utils.crypto_loader import load_crypto
from models.deepLOB.deepLOB_model import Deeplob
from models.transLOB.transLOB_model import TransLOB

from utils.fi2010_loader import Dataset_fi2010

%load_ext autoreload
%autoreload 2

In [20]:
normalizations = ["Zscore", "DecPre", "MinMax"]
horizons = [0, 1, 2, 3, 4]
dataset_type = "fi2010"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
crypto_path = './data/crypto/BTC_1sec.csv'

BATCH_SIZE = 128
WINDOW_SIZE = 100
LEVELS = 10
HORIZONS=[10, 20, 30, 50, 100]
TARGET_HORIZON = 20
LABEL_ALPHA = 5e-5

LEARNING_RATE = 1e-4
ADAM_B1 = 0.9
ADAM_B2 = 0.999
WEIGHT_DECAY= 1e-5

normalization = "DecPre"
test_ratio = 0.3
mode = False



Train shape: (280000, 100, 40), (280000,)
Test shape: (120000, 100, 40), (120000,)


In [4]:

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [14]:
results_crypto = []
data_type = "crypto"
for model_name in ["deeplob", "translob"]:
    mode = model_name == "deeplob"
    for normalization in ['DecPre','Zscore']:
        print(f"Evaluating {model_name} | {normalization}")
        train_dataset, test_dataset = load_crypto(crypto_path, LEVELS, HORIZONS, TARGET_HORIZON, LABEL_ALPHA, 
                                     normalization, test_ratio, 400000, WINDOW_SIZE, mode, False) # run fro 300k
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
        print('Test Shape: ', print(next(iter(train_loader))[0].shape))
        
        model_class = get_model_class(model_name)
        metrics = evaluate_model(model_class, model_name, data_type, normalization, test_loader)

        results_crypto.append({
            "model": model_name,
            "normalization": normalization,
            **metrics
        })

True
Evaluating deeplob | DecPre
Train shape: (280000, 100, 40), (280000,)
Test shape: (120000, 100, 40), (120000,)
torch.Size([128, 1, 100, 40])
./outputs/crypto/deeplob_DecPre.pth
Evaluating deeplob | Zscore
Train shape: (280000, 100, 40), (280000,)
Test shape: (120000, 100, 40), (120000,)
torch.Size([128, 1, 100, 40])
./outputs/crypto/deeplob_Zscore.pth
False
Evaluating translob | DecPre
Train shape: (280000, 100, 40), (280000,)
Test shape: (120000, 100, 40), (120000,)
torch.Size([128, 100, 40])
./outputs/crypto/translob_DecPre.pth
Evaluating translob | Zscore
Train shape: (280000, 100, 40), (280000,)
Test shape: (120000, 100, 40), (120000,)
torch.Size([128, 100, 40])
./outputs/crypto/translob_Zscore.pth


In [16]:
results_crypto = pd.DataFrame(results_crypto)
results_crypto

Unnamed: 0,model,normalization,accuracy,f1,precision,recall
0,deeplob,DecPre,0.6655,0.653012,0.666453,0.651511
1,deeplob,Zscore,0.662042,0.650448,0.665461,0.648428
2,translob,DecPre,0.460442,0.410492,0.478884,0.434053
3,translob,Zscore,0.411858,0.321025,0.501102,0.380747


In [9]:
# Dummy mapping from model name to constructor
def get_model_class(name):
    if name == "deeplob":
        return Deeplob
    elif name == "translob":
        return TransLOB
    else:
        raise ValueError("Unknown model name")

def create_test_loader(model_name, normalization):
    stock = [0, 1, 2, 3, 4]
    days = [8, 9, 10]
    T = 100
    unsqueeze = model_name=="deeplob"
    dataset_test = Dataset_fi2010(False, normalization, stock, days, T, 4, unsqueeze, False)
    return DataLoader(dataset=dataset_test, batch_size=128, shuffle=False, num_workers=4)

def evaluate_model(model_class, model_name, data_type, normalization, test_loader):
    path = f"./outputs/{data_type}/{model_name}_{normalization}.pth"
    print(path)
    if not os.path.exists(path):
        print(f"Model not found: {path}, skipping...")
        return {
            "accuracy": np.nan,
            "f1": np.nan,
            "precision": np.nan,
            "recall": np.nan
        }
    model = model_class()
    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)
    model.eval()

    all_y_true, all_y_pred = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            print(X_batch.shape)
            outputs = model(X_batch)
            preds = outputs.argmax(dim=1)
            all_y_true.append(y_batch.cpu().numpy())
            all_y_pred.append(preds.cpu().numpy())

    y_true = np.concatenate(all_y_true)
    y_pred = np.concatenate(all_y_pred)

    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred, average="macro"),
        "recall": recall_score(y_true, y_pred, average="macro"),
    }

In [21]:
results_fi2010 = []
data_type = "fi2010"
model_names = ["deeplob", "translob"]
for model_name in model_names:
    for normalization in normalizations:
        print(f"Evaluating {model_name} | {normalization}")
        test_loader = create_test_loader(model_name, normalization)
        model_class = get_model_class(model_name)
        metrics = evaluate_model(model_class, model_name, data_type, normalization, test_loader)

        results_fi2010.append({
            "model": model_name,
            "normalization": normalization,
            **metrics
        })

Evaluating deeplob | Zscore
./outputs/fi2010/deeplob_Zscore.pth
Evaluating deeplob | DecPre
./outputs/fi2010/deeplob_DecPre.pth
Evaluating deeplob | MinMax
./outputs/fi2010/deeplob_MinMax.pth
Evaluating translob | Zscore
./outputs/fi2010/translob_Zscore.pth
Evaluating translob | DecPre
./outputs/fi2010/translob_DecPre.pth
Evaluating translob | MinMax
./outputs/fi2010/translob_MinMax.pth
Model not found: ./outputs/fi2010/translob_MinMax.pth, skipping...


In [23]:
results_fi2010 = pd.DataFrame(results_fi2010)
results_fi2010

Unnamed: 0,model,normalization,accuracy,f1,precision,recall
0,deeplob,Zscore,0.737795,0.736996,0.737827,0.736496
1,deeplob,DecPre,0.748114,0.746521,0.746699,0.746598
2,deeplob,MinMax,0.736492,0.735622,0.736583,0.735113
3,translob,Zscore,0.665964,0.666651,0.670932,0.665874
4,translob,DecPre,0.634632,0.634573,0.650233,0.632358
5,translob,MinMax,,,,


In [None]:
results = []

def crypto_test_loader(normalization):
    DATA_PATH = './data/crypto/BTC_1sec.csv'

    
    data_df = pd.read_csv(DATA_PATH, index_col=0)
    
    stock = [0, 1, 2, 3, 4]
    days = [8, 9, 10]
    T = 100
    unsqueeze = model_name=="deeplob"
    dataset_test = Dataset_fi2010(False, normalization, stock, days, T, 4, unsqueeze, False)
    return DataLoader(dataset=dataset_test, batch_size=128, shuffle=False, num_workers=4)


for data_types in ["crypto","fi2010"]:
    if data_types == "crypto":

        
    for model_name in model_names:
        for normalization in normalizations:
            print(f"Evaluating {model_name} | {normalization}")
            test_loader = create_test_loader(model_name, normalization)
            model_class = get_model_class(model_name)
            metrics = evaluate_model(model_class, model_name, normalization, test_loader)
    
            results.append({
                "model": model_name,
                "normalization": normalization,
                **metrics
            })
            # print(results)

In [6]:

# results_df.to_csv("./outputs/fi2010_model_results.csv", index=False)

# Plot example: Accuracy
plt.figure(figsize=(12, 6))
sns.lineplot(data=results_df, x="horizon_k", y="accuracy", hue="model", style="normalization", markers=True)
plt.title("Accuracy across Horizons (FI-2010)")
plt.grid(True)
plt.tight_layout()
plt.show()


ValueError: Could not interpret value `horizon_k` for `x`. An entry with this name does not appear in `data`.

<Figure size 1200x600 with 0 Axes>