In [24]:
import os, glob, math, argparse
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

In [25]:
class CausalCNN(nn.Module):
    def __init__(self, input_size, embed_size):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(input_size, 64, kernel_size=3, padding=1),
            nn.GELU(),
            nn.Conv1d(64, 64, kernel_size=3, padding=2, dilation=2),
            nn.GELU(),
            nn.Conv1d(64, 64, kernel_size=3, padding=4, dilation=4),
            nn.GELU(),
            nn.Conv1d(64, embed_size, kernel_size=1)
        )
        
    def forward(self, x):
        return self.cnn(x)

def WindowDataset(metrics_npy, price_npy, seq_len=168):
    seq_len = seq_len
    X, y = [], []
    
    metrics = np.load(metrics_npy).astype(np.float32)
    prices = np.load(price_npy).astype(np.float32)
    returns = np.diff(prices, axis=0) / prices[:-1]
    
    for t in range(len(metrics) - seq_len):
        X.append(metrics[t:t+seq_len])
        y.append(returns[t+seq_len-1])
    
    X = np.array(X) # (N, T-1, F)
    y = np.array(y) # (N, 1)
    return X, y            

def metrics_table(labels, preds):
    mse  = mean_squared_error(labels, preds)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(labels, preds)
    r2   = r2_score(labels, preds)
    dir_acc = (np.sign(labels) == np.sign(preds)).mean()
    
    metrics = pd.DataFrame(
        {
            "MSE":  [mse],
            "RMSE": [rmse],
            "MAE":  [mae],
            "R2":   [r2],
            "Dir-Acc": [dir_acc],
        }
    )

    print("\nValidation metrics:")
    print(metrics.to_string(index=False, float_format="%.4f"))

In [40]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} (device)")
    
# load and split data
metrics_path = os.path.join("data/dataset", "metrics_outfile.npy")
price_path = os.path.join("data/dataset", "price_outfile.npy")
X, y = WindowDataset(metrics_npy=metrics_path, price_npy=price_path)
N = len(X)
split = int(0.9*N)
    
# compute normalizers and normalize data
mean = X.mean(axis=1, keepdims=True)
std= X.std(axis=1, keepdims=True) + 1e-8 # avoid zero division
    
X_norm = (X - mean) / std
y_scaled = y*100
    
# create tensors and dataloader
X_tensor = torch.from_numpy(X_norm)
y_tensor = torch.from_numpy(y_scaled)
    
train_ds = TensorDataset(X_tensor[:split], y_tensor[:split])
val_ds = TensorDataset(X_tensor[split:], y_tensor[split:])
    
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True, num_workers=4)
val_dl = DataLoader(val_ds, batch_size=256, shuffle=False, num_workers=2)

Using cuda (device)


In [42]:
print(X_norm)

[[[-0.5        -1.6613247  -2.6390338  ... -1.7537822  -1.2867182
   -1.9957759 ]
  [-0.5        -1.5168617  -2.5205042  ... -1.7537822  -1.2867182
   -1.9957759 ]
  [-0.5        -1.3723986  -2.658102   ... -1.7537822  -1.2867182
   -1.9957759 ]
  ...
  [-1.          1.3723986   1.8928024  ...  0.5409217   1.580523
   -0.5013846 ]
  [-1.          1.5168617   1.7776009  ...  0.5409217   1.580523
   -0.5013846 ]
  [-1.          1.6613247   1.7390019  ...  0.5409217   1.580523
   -0.5013846 ]]

 [[-0.5        -1.5168617  -2.5773244  ... -1.7822441  -1.2962015
   -2.0335119 ]
  [-0.5        -1.3723986  -2.7165759  ... -1.7822441  -1.2962015
   -2.0335119 ]
  [-0.5        -1.2279357  -2.5605347  ... -1.7822441  -1.2962015
   -2.0335119 ]
  ...
  [-1.          1.5168617   1.7724386  ...  0.5299629   1.547163
   -0.5216466 ]
  [-1.          1.6613247   1.7333757  ...  0.5299629   1.547163
   -0.5216466 ]
  [-0.5        -1.6613247   1.7591015  ...  0.75199634  2.0971296
    0.3857139 ]]

 [[-0

In [36]:
print(X_norm.mean(axis=(0,1 )))

[ 1.18574565e-02  5.05526438e-02  2.43383154e-01  2.43127331e-01
  2.42502630e-01  2.43417189e-01 -3.96566093e-02 -1.20875090e-02
  4.74527973e-04 -4.24364628e-03  1.13949172e-01  2.45431781e-01
  2.42013186e-01  2.41761446e-01 -2.46664807e-02 -4.49222134e-04
  1.29094347e-02 -7.16643641e-04  1.39448997e-02 -1.43379811e-02
  2.43015260e-01  2.41779119e-01  2.47037336e-01  2.43980393e-01
 -6.46462813e-02 -2.10039765e-02  2.30407435e-02 -1.70792919e-02
  1.42090335e-01  2.89067984e-01  2.29168653e-01  2.58068591e-01
 -5.11922240e-02  1.85156520e-02  8.04387704e-02 -6.97675743e-04
  8.47828761e-02  1.50678158e-02  7.98721835e-02  7.82134533e-02
  7.99726769e-02  7.98504725e-02 -1.07794127e-03 -3.60800093e-03
 -5.19889174e-03  1.25883974e-03  3.99495512e-02  8.24708045e-02
  8.01139474e-02  7.80687556e-02 -1.86742246e-02 -1.73857505e-03
 -2.92938901e-03 -2.79082771e-04 -2.97847018e-03 -1.34687508e-02
  7.94012174e-02  7.73113817e-02  8.30916911e-02  7.91589990e-02
 -8.96650134e-04 -3.93067