In [1]:
# if "preprocessing" folder in current folders -> cd back to original folder
%cd /content
import os
if os.path.exists("bsc-thesis"):
  # if bsc-thesis folder already exists; completely remove
  !rm -rf bsc-thesis

# this makes sure cached files are readily available (for calling e.g. `gather_data_cached`)
!apt-get install git-lfs
!git lfs install

# cloning repo
branch = "main"
!git clone --branch $branch https://github.com/maviddoerdijk/bsc-thesis.git

# moving into project dir
%cd bsc-thesis/src
%ls

/content
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.3).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Git LFS initialized.
Cloning into 'bsc-thesis'...
remote: Enumerating objects: 893, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 893 (delta 26), reused 25 (delta 25), pack-reused 860 (from 2)[K
Receiving objects: 100% (893/893), 28.04 MiB | 37.63 MiB/s, done.
Resolving deltas: 100% (510/510), done.
Filtering content: 100% (32/32), 1.75 GiB | 104.52 MiB/s, done.
/content/bsc-thesis/src
[0m[01;34mbacktesting[0m/  [01;34mdata[0m/      main.ipynb  [01;34mmodels[0m/         [01;34mutils[0m/
[01;34mconfig[0m/       [01;34mexternal[0m/  main.py     [01;34mpreprocessing[0m/


In [2]:
!pip install numpy==1.26.3 # necessary for bug fix
!pip install peft==0.10.0
!pip install ta
!pip install pykalman
!pip install PyWavelets
!pip install curl-cffi

## specific packages for time moe
# need a different version of accelerate because of bug "ImportError: cannot import name 'clear_device_cache' from 'accelerate.utils.memory'"
!pip install -U accelerate==0.32.0 # standard google colab version is 1.6.0 (apr 1, 2025), but for stability, we use time moe's 0.28.0 (mar 12, 2024)
!pip install transformers==4.40.1 # standard google colab version is 4.51.3, but time moe repo requirements mention/prefer 4.40.1 for stability
!pip install datasets==2.18.0
FLASH_ATTN = False # set to true if using this
if FLASH_ATTN:
  !pip install flash-attn==2.6.3 # optional but recommended by the repo



In [3]:
# Module imports
import pandas as pd
import numpy as np
from typing import Optional, Callable, Dict, Any
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm.auto import tqdm # note: using tqdm.auto usually automatically chooses the right import based on whether you're in CLI, notebook or somewhere else
import torch.nn as nn
import itertools
from pykalman import KalmanFilter
import ast
import re
from tabulate import tabulate
from datetime import datetime

# Custom Imports
from models.statistical_models import create_dataset, default_normalize, rmse_metric, acc_metric, kalman_filter_average, kalman_filter_regression, kalman_filter_regression_multivariate
from preprocessing.cointegration import find_cointegrated_pairs
from preprocessing.data_preprocessing import filter_pairs_data
from preprocessing.technical_indicators import combine_pairs_data
from preprocessing.wavelet_denoising import wav_den
from preprocessing.filters import step_1_filter_remove_nans, step_2_filter_liquidity
from backtesting.trading_strategy import trade, get_gt_yoy_returns_test_dev
from backtesting.utils import calculate_return_uncertainty
from utils.visualization import plot_return_uncertainty, plot_comparison
from utils.helpers import _get_train_dev_frac

# important for time moe
import wandb
wandb.login()

## workflow imports
from models.statistical_models import execute_kalman_workflow
from models.transformer_model import execute_transformer_workflow
# from models.time_moe_model import execute_timemoe_workflow

## specific caching imports (should be changed in case you want to gather data live)
from data.scraper import load_cached_etf_tickers
from data.data_collection_cache import gather_data_cached, _get_filename, gather_pairs_data_cached, gather_data_cached_using_truncate

# Any other changes to be made throughout the entire notebook
plt.style.use('seaborn-v0_8')

inspect_func = False
if inspect_func:
  import inspect
  print(inspect.getsource(trade)) # in this case, check whether the new trade function  is imported

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdavidmoerdijk[0m ([33mdavidmoerdijk-smart-backoffice[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
startDateStr = '2008-01-01'
end_year = 2024
endDateStr = f'{end_year}-12-31'
startDateStrTest = f'{end_year}-01-01' # possibly change to 07-01 (option 1; dev data in end_year - 1 (e.g. 2023), test data in end_year (e.g. 2024) // option 2; dev data 1st half end_year, test data 2nd half end_year)
endDateStrTest = f'{end_year}-12-31'
train_frac, dev_frac = _get_train_dev_frac(startDateStr, endDateStr, startDateStrTest, endDateStrTest)

instrumentIdsNASDAQandNYSE = load_cached_etf_tickers()
data = gather_data_cached_using_truncate(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE, cache_dir='../src/data/cache')
data_close_filtered_1, data_open_filtered_1, data_high_filtered_1, data_low_filtered_1, data_vol_filtered_1, data_original_format_filtered_1 = step_1_filter_remove_nans(data['close'], data['open'], data['high'], data['low'], data['vol'], data)
data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, data_original_format_filtered_2 = step_2_filter_liquidity(data_close_filtered_1, data_open_filtered_1, data_high_filtered_1, data_low_filtered_1, data_vol_filtered_1, data_original_format_filtered_1)

pairs_data_filtered = gather_pairs_data_cached(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE, cache_dir='../src/data/cache')
if pairs_data_filtered is None:
  scores, pvalues, pairs = find_cointegrated_pairs(data_original_format_filtered_2)
  pairs_data = {key:value[1]  for (key, value) in pairs.items()}
  pairs_data = sorted(pairs_data.items(), key=lambda x: x[1])
  pairs_data_filtered = filter_pairs_data(pairs_data) # filter based on cointegration in such a way that we can simply pick the highest pair of stocks in the list.


In [8]:
import pandas as pd
import numpy as np
import os
import json
from typing import Dict, Any
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
from transformers import AutoModelForCausalLM, AutoConfig
import random

# custom imports
from utils.visualization import plot_return_uncertainty, plot_comparison
from external.time_moe_repo.training_wrapper import train_time_moe
from backtesting.trading_strategy import trade
from backtesting.utils import calculate_return_uncertainty

## semi-custom
from external.time_moe_repo.time_moe.models.modeling_time_moe import TimeMoeForPrediction

In [17]:
### Time MoE Workflow Code ###


def execute_timemoe_workflow(
  pairs_timeseries: pd.DataFrame,
  target_col: str = "Spread_Close",
  burn_in: int = 30, # we remove the first 30 elements, because the largest window used for technical indicators is
  train_frac: float = 0.90,
  dev_frac: float = 0.05,   # remaining part is test
  seed: int = 3178749, # for reproducibility, my student number
  look_back: int = 20,
  batch_size: int = 8,
  verbose: bool = True,
  load_finetuned = True,
  result_parent_dir: str = "data/results",
  filename_base: str = "data_begindate_enddate_hash.pkl",
  pair_tup_str: str = "(?,?)", # Used for showing which tuple was used in plots, example: "(QQQ, SPY)"
  return_predicted_spread: bool = False
):
  # Set seeds
  torch.manual_seed(seed)
  np.random.seed(seed)
  random.seed(seed)

  # For GPU (if used)
  if torch.cuda.is_available():
      torch.cuda.manual_seed(seed)
      torch.cuda.manual_seed_all(seed)
      torch.backends.cudnn.deterministic = True
      torch.backends.cudnn.benchmark = False  # Might slow down, but ensures determinism

  if not target_col in pairs_timeseries.columns:
    raise KeyError(f"pairs_timeseries must contain {target_col}")
  FLASH_ATTN = False

  # burn the first 30 elements
  pairs_timeseries_burned = pairs_timeseries.iloc[burn_in:].copy()

  total_len = len(pairs_timeseries_burned)
  train_size = int(total_len * train_frac)
  dev_size   = int(total_len * dev_frac)
  test_size  = total_len - train_size - dev_size # not used, but for clarity

  # Standard version of the Time-MoE model can only take in univariate time series. Therefore, we will train only on the target_col
  # TODO: Convert to using multivariate again, a certain type of "multivariate" processing is possible according to the original time-moe paper, but not the version we would want to use. It is not possible to use many different features to enhance the prediction of the target column
  pairs_timeseries_burned_univariate = pairs_timeseries_burned[target_col]

  train = pairs_timeseries_burned_univariate[:train_size]
  dev   = pairs_timeseries_burned_univariate[train_size:train_size+dev_size] # aka validation
  test  = pairs_timeseries_burned_univariate[train_size+dev_size:]

  train_multivariate = pairs_timeseries_burned.iloc[:train_size]
  dev_multivariate = pairs_timeseries_burned.iloc[train_size:train_size+dev_size]
  test_multivariate = pairs_timeseries_burned.iloc[train_size+dev_size:]


  if verbose:
      print(f"Split sizes — train: {len(train)}, dev: {len(dev)}, test: {len(test)}")

  # def create_sequences(series, look_back):
  #     X_raw = series[:batch_size * look_back].to_numpy() # .reshape(batch_size, look_back)
  #     X_raw = torch.tensor(X_raw, dtype=torch.float32)

  #     # normalize devX_raw
  #     mean, std = devX_raw.mean(dim=-1, keepdim=True), devX_raw.std(dim=-1, keepdim=True)
  #     X_scaled = (devX_raw - mean) / std
  #     return X_raw, X_scaled, None, None, mean, std
  DEVICE = "cpu" #  "cuda" if torch.cuda.is_available() else "cpu"


  def create_sequences_rolling(series, look_back):
      X = []
      y = []
      for i in range(len(series) - look_back):
          seq = series.iloc[i:i+look_back].values
          target = series.iloc[i+look_back]
          X.append(seq)
          y.append(target) # TODO: check whether target really is Spread_Close, or whether it is S1_close or S2_close

      X = torch.tensor(X, dtype=torch.float32)
      X = X.to(DEVICE)

      # normalize
      mean = X.mean(dim=-1, keepdim=True)
      std = X.std(dim=-1, keepdim=True)
      X_scaled = (X - mean) / (std + 1e-8)

      y = torch.tensor(y, dtype=torch.float32)
      y = y.to(DEVICE)
      return X, X_scaled, y, None, mean, std

  devX_raw, devX_scaled, devY_raw, devY_scaled, dev_mean, dev_std = create_sequences_rolling(dev, look_back)
  trainX_raw, trainX_scaled, trainY_raw, trainY_scaled, train_mean, train_std = create_sequences_rolling(train, look_back)
  testX_raw, testX_scaled, testY_raw, testY_scaled, test_mean, test_std = create_sequences_rolling(test, look_back)
  if verbose:
    print(f"devX_raw Shape: {devX_raw.shape}") # entire devX_raw has that shape before dataset and dev_loader logic

  dev_ds = TensorDataset(devX_scaled, devY_raw) # goal of TensorDataset class: loading and processing dataset lazily
  train_ds = TensorDataset(trainX_raw, trainY_raw)
  test_ds = TensorDataset(testX_raw, testY_raw)

  dev_loader = DataLoader(dev_ds, batch_size=batch_size, shuffle=False) # DataLoader takes care of shuffling/sampling/weigthed sampling, batching, using multiprocessing to load the data, use pinned memory etc. (source; https://discuss.pytorch.org/t/what-do-tensordataset-and-dataloader-do/107017)
  train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

  if verbose:
    print(f"dev_loader tensor Shape: {next(iter(dev_loader))[0].shape}, with a total of {len(dev_loader)} batches") # a single tensor in dev_loader now has shape [batch_size, look_back] as expected

  if load_finetuned:
    ## Training (only train in the case where we actually also want to load finetuned :D )
    # save contents of trainX_scaled to jsonl using _get_filename {"sequence": [1.7994326779272853, 2.554412431241829,
    filename_jsonl = filename_base.replace(".pkl", ".jsonl")
    filepath_parent = os.path.join("data", "datasets")
    os.makedirs(filepath_parent, exist_ok=True)
    filepath_jsonl = os.path.join(filepath_parent, filename_jsonl)
    # first method: try to train it on unnormalized sequence
    with open(filepath_jsonl, "w") as f:
        for train in all_train:
          # use all train variables rather than a single one
          json_line = json.dumps({"sequence": train.to_list()})
          f.write(json_line + "\n")

    model_dir = "logs/time_moe"
    train_time_moe(
        data_path=filepath_jsonl,
        dataloader_num_workers=2,
        # output_path="model_dir"
    ) # after this, model is saved to logs/time_moe as model.safetensors (400+ MB)
    config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
    model = TimeMoeForPrediction.from_pretrained(model_dir, config=config, torch_dtype=torch.float32)
    model.eval()
  else:
    model = AutoModelForCausalLM.from_pretrained(
        'Maple728/TimeMoE-50M',
        device_map=DEVICE,
        trust_remote_code=True,
    )
    if FLASH_ATTN: # if FLASH_ATTN, we assume the flash-attention module is installed, and adapt the model to use that
      model = AutoModelForCausalLM.from_pretrained('Maple728/TimeMoE-50M', device_map="auto", attn_implementation='flash_attention_2', trust_remote_code=True)

  prediction_length = 1 # TODO: rather than hardcoding prediction length, make a strategy where we can pick and choose different prediction lengths, and see what is affected by this (returns, std dev, ..)

  # forecast in batches from dev dataset
  all_predictions = []
  for i, batch in enumerate(test_loader):
    inputs = batch[0] # is devX_scaled, for now [1] will return error, later [1] will return devY_scaled :D

    yvals = batch[1]
    # means = batch[2]
    # stds = batch[3]

    output = model.generate(inputs, max_new_tokens=prediction_length)  # shape is [batch_size, look_back + prediction_length]
    normed_predictions = output[:, -prediction_length:]

    # from returned test_mean and test_std, slice the appropriate slices from the series
    input_size_current = inputs.size()
    batch_size_current = input_size_current[0]
    local_means = test_mean[batch_size*i : batch_size*i + batch_size_current]
    local_stds = test_std[batch_size*i : batch_size*i + batch_size_current]

    preds = normed_predictions * local_stds + local_means
    all_predictions.append(preds)

  # Concatenate all predictions
  predictions = torch.cat(all_predictions, dim=0)
  predictions = predictions.squeeze(-1)
  predictions = predictions.detach().numpy()

  # Also get dev/val predictions
  dev_predictions = []
  for i, batch in enumerate(dev_loader):
    inputs = batch[0]

    output = model.generate(inputs, max_new_tokens=prediction_length)  # shape is [batch_size, look_back + prediction_length]
    normed_predictions = output[:, -prediction_length:]
    input_size_current = inputs.size()
    batch_size_current = input_size_current[0]
    # get dev rather than test
    local_means = dev_mean[batch_size*i : batch_size*i + batch_size_current]
    local_stds = dev_std[batch_size*i : batch_size*i + batch_size_current]

    preds = normed_predictions * local_stds + local_means
    dev_predictions.append(preds)
  dev_predictions = torch.cat(dev_predictions, dim=0)
  dev_predictions = dev_predictions.squeeze(-1)
  dev_predictions = dev_predictions.detach().numpy()

  ## Trading
  test_s1_shortened = test_multivariate['S1_close'].iloc[look_back:]
  test_s2_shortened = test_multivariate['S2_close'].iloc[look_back:] # use multivariate versions, so we can still access cols like 'S1_close' and 'S2_close'
  test_index_shortened = test_multivariate.index[look_back:] # officially doesn't really matter whether to use `test_multivariate` or `test`, but do it like this for consistency
  forecast_test_shortened_series = pd.Series(predictions, index=test_index_shortened)
  gt_test_shortened_series = pd.Series(testY_raw.numpy(), index=test_index_shortened)

  gt_returns = trade(
      S1 = test_s1_shortened,
      S2 = test_s2_shortened,
      spread = gt_test_shortened_series,
      window_long = 30,
      window_short = 5,
      position_threshold = 1.0,
      clearing_threshold = 0.5
  )
  gt_yoy = ((gt_returns[-1] / gt_returns[0])**(365 / len(gt_returns)) - 1)

  ## Trading: Mean YoY
  min_position = 2.00
  max_position = 4.00
  min_clearing = 0.30
  max_clearing = 0.70
  position_thresholds = np.linspace(min_position, max_position, num=10)
  clearing_thresholds = np.linspace(min_clearing, max_clearing, num=10)
  yoy_mean, yoy_std = calculate_return_uncertainty(test_s1_shortened, test_s2_shortened, forecast_test_shortened_series, position_thresholds=position_thresholds, clearing_thresholds=clearing_thresholds)

  if load_finetuned:
    current_result_dir = filename_base.replace(".pkl", "_timemoe")
  else:
    current_result_dir = filename_base.replace(".pkl", "_timemoe_only_pretrained")
  result_dir = os.path.join(result_parent_dir, current_result_dir)
  if not os.path.exists(result_dir):
      os.makedirs(result_dir)

  ### Plotting #####
  # (no train/val loss plot, as Time-MoE repo did not support plots or even supplying val losses during training)
  train_val_loss_filename = None

  # 1. yoy returns
  yoy_returns_filename = plot_return_uncertainty(test_s1_shortened, test_s2_shortened, forecast_test_shortened_series, test_index_shortened, look_back, position_thresholds=position_thresholds, clearing_thresholds=clearing_thresholds, verbose=verbose, result_dir=result_dir, filename_base=filename_base)

  # 2. predicted vs actual spread plot
  predicted_vs_actual_spread_filename = plot_comparison(gt_test_shortened_series, forecast_test_shortened_series, test_index_shortened, workflow_type="Time-MoE", pair_tup_str=pair_tup_str, verbose=verbose, result_dir=result_dir, filename_base=filename_base)

  ### Plotting #####

  dev_mse = mean_squared_error(devY_raw.numpy(), dev_predictions)
  test_mse = mean_squared_error(testY_raw.numpy(), predictions)
  dev_variance = devY_raw.numpy().var()
  dev_nmse = dev_mse / dev_variance if dev_variance != 0 else float('inf')
  test_variance = testY_raw.numpy().var()
  test_nmse = test_mse / test_variance if test_variance != 0 else float('inf')

  if return_predicted_spread:
    return forecast_test_shortened_series, test_nmse

  plot_filenames = {
      "yoy_returns": yoy_returns_filename,
      "predicted_vs_actual_spread": predicted_vs_actual_spread_filename,
      "train_val_loss": train_val_loss_filename
  }

  output: Dict[str, Any] = dict(
      val_mse=dev_nmse,
      test_mse=test_nmse,
      yoy_mean=yoy_mean,
      yoy_std=yoy_std,
      gt_yoy=gt_yoy,
      result_parent_dir=result_parent_dir,
      plot_filenames=plot_filenames
  )

  results_str = f"""
  Validation MSE: {output['val_mse']}
  Test MSE: {output['test_mse']}
  YOY Returns: {output['yoy_mean'] * 100:.2f}%
  YOY Std: +- {output['yoy_std'] * 100:.2f}%
  GT Yoy: {output['gt_yoy'] * 100:.2f}%
  Plot filepath parent dir: {output['result_parent_dir']}
  Plot filenames: {output['plot_filenames']}
  """

  with open(os.path.join(result_dir, "results.txt"), "w") as f:
      f.write(results_str)
  if verbose:
    print(results_str)
  return output


### Time MoE Workflow Code ###

In [19]:
verbose = True

results_timemoe = []
num_results = min(len(pairs_data_filtered), 30)
for i in tqdm(range(num_results), desc = "Gathering [...]"):
    ticker_a, ticker_b = pairs_data_filtered[i][0][0], pairs_data_filtered[i][0][1]
    pair_tup_str_current = f"({ticker_a},{ticker_b})"
    pairs_timeseries_df = combine_pairs_data(data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, ticker_a, ticker_b)
    output_returns = get_gt_yoy_returns_test_dev(pairs_timeseries_df, dev_frac, train_frac, look_back=20)
    gt_yoy, gt_yoy_for_dev_dataset = output_returns['gt_yoy_test'], output_returns['gt_yoy_dev']

    # model-specific call
    output_model = execute_timemoe_workflow(pairs_timeseries_df, verbose=verbose, result_parent_dir="data/results", filename_base=_get_filename(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE), pair_tup_str=f"({ticker_a},{ticker_b})", load_finetuned=True, train_frac=train_frac, dev_frac=dev_frac)

    yoy_str = f"{output_model['yoy_mean'] * 100:.2f}% +- {output_model['yoy_std'] * 100:.2f}%"
    returns_score = return_score(output_model['yoy_mean'], gt_yoy)
    cointegration_score = pairs_data_filtered[i][1]
    results_timemoe.append((pair_tup_str_current, cointegration_score, output_model['val_mse'], output_model['test_mse'], yoy_str, gt_yoy, returns_score)) # (pair, val, test, yoy_str, gt_yoy, returns_score)

Gathering [...]:   0%|          | 0/30 [00:00<?, ?it/s]

Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 150422.46it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.10883668272360729
  Test MSE: 0.48143777460431886
  YOY Returns: -35.63%
  YOY Std: +- 47.84%
  GT Yoy: 3.81%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 127437.56it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.19307949711421168
  Test MSE: 3.556063573970028
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 211875.09it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.09621654786052036
  Test MSE: 1.6540389701170892
  YOY Returns: -90.42%
  YOY Std: +- 56.51%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 246301.77it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.0750359748427156
  Test MSE: 0.2552065463419213
  YOY Returns: 0.91%
  YOY Std: +- 0.68%
  GT Yoy: 3.04%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 201781.09it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.051700777658355374
  Test MSE: 23.625822527193517
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 220078.10it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.09776221262688799
  Test MSE: 1.0866635629984545
  YOY Returns: -17.40%
  YOY Std: +- 4.30%
  GT Yoy: -82.79%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 196102.28it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.15542790127874895
  Test MSE: 4.470834645900972
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 183913.71it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.019505118081706234
  Test MSE: 41.32358216168295
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 3284.18%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 179110.00it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.028364624470369394
  Test MSE: 66.71824403446405
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 1430.14%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 156526.56it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.03037680305278636
  Test MSE: 10.993945161033942
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 269839.67it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.15976001770207207
  Test MSE: 0.4565266555782496
  YOY Returns: 14.20%
  YOY Std: +- 0.66%
  GT Yoy: 11.87%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 118750.22it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.019562940877547598
  Test MSE: 33.51428698458714
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 215467.99it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.03755714441987145
  Test MSE: 32.82601057718935
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 624.11%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 247856.17it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.010224447959370046
  Test MSE: 19.1628775569641
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 173290.54it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.1464663658653257
  Test MSE: 0.803134290759264
  YOY Returns: 4.49%
  YOY Std: +- 0.92%
  GT Yoy: 7.98%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 101245.21it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.09665956376322472
  Test MSE: 3.286897998592766
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 197898.91it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.011985509101995376
  Test MSE: 14.968233751075834
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 196369.69it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.020152112615975343
  Test MSE: 52.65441401503422
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 213339.91it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.04791695854766715
  Test MSE: 5.983630431376823
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 207101.30it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.05365903460683156
  Test MSE: 6.679670863992358
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 112620.78it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.027063401770383588
  Test MSE: 11.030418841759857
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 1300.99%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 266346.06it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.031486622867516356
  Test MSE: 15.010195980079017
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 155232.95it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.026329189926002857
  Test MSE: 38.6417755036848
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 133750.25it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.10476330563053757
  Test MSE: 0.17462459091923446
  YOY Returns: -43.86%
  YOY Std: +- 15.99%
  GT Yoy: -51.19%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 131671.23it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.14559967806203222
  Test MSE: 6.137583259462576
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 213867.98it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.03262074619453748
  Test MSE: 15.463479236790787
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 1153.60%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 254424.80it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.1131269540213303
  Test MSE: 2.275572172497843
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 206408.65it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.07083293760484047
  Test MSE: 8.556745808404818
  YOY Returns: -65.94%
  YOY Std: +- 77.48%
  GT Yoy: -100.00%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 194776.06it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.06602158659611425
  Test MSE: 21.434143651096022
  YOY Returns: -100.00%
  YOY Std: +- 0.00%
  GT Yoy: 230.21%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  
Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches



100%|██████████| 103/103 [00:00<00:00, 187099.75it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

  Validation MSE: 0.03800387353222017
  Test MSE: 0.687647801338406
  YOY Returns: 23.04%
  YOY Std: +- 4.78%
  GT Yoy: 38.44%
  Plot filepath parent dir: data/results
  Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}
  


In [20]:
def results_to_latex(results):
    headers = [
        "Pair",
        "Cointegration Score",
        "val MSE",
        "test MSE",
        "YoY Returns (std)",
        "\makecell{Theoretical Return\\\\Under Perfect\\\\Information}",
        "Return Score"
    ]
    # Latex column alignment: l for first col, c for others
    align_str = "l" + "c" * (len(headers)-1)
    # Begin building latex table string
    lines = []
    lines.append("\\begin{table}[h]")
    lines.append("\\centering")
    lines.append("\\small")
    lines.append("\\resizebox{\\textwidth}{!}{")
    lines.append("\\begin{tabular}{" + align_str + "}")
    lines.append("\\toprule")
    lines.append(" & ".join(headers) + " \\\\")
    lines.append("\\midrule")
    for idx, row in enumerate(results):
        row_out = []
        for col_idx, cell in enumerate(row):
            # Add numbering for pairs
            if col_idx == 0:
                cell = f"{idx+1}. {cell}"
            # Format cointegration score as scientific in latex
            elif col_idx == 1 and isinstance(cell, float):
                base, exp = f"{cell:.2e}".split("e")
                exp = int(exp)
                cell = f"${base}\\times 10^{{{exp}}}$"
            # Theoretical return: show as percent if small, otherwise keep as float
            elif col_idx == 5 and isinstance(cell, float):
                cell = f"{cell*100:.2f}\\%"
            # Format YoY Returns (std) as $a\% \pm b\%$
            elif col_idx == 4 and isinstance(cell, str) and "%" in cell:
                # Convert e.g. '-82.63% +- 30.20%' to latex: $-82.63\% \pm 30.20\%$
                cell = cell.replace("%", "\\%")
                cell = cell.replace("+-", "\\pm")
                cell = f"${cell}$"
            elif col_idx == 6 and isinstance(cell, float):
              cell = f"{cell:.2f}"
            # General float formatting
            elif isinstance(cell, float):
                cell = f"{cell:.5f}"
            # Replace % in any string field (needed for e.g. theoretical return if not float)
            elif isinstance(cell, str) and "%" in cell:
                cell = cell.replace("%", "\\%")
            row_out.append(cell)
        # Join and add row
        lines.append(" & ".join(str(x) for x in row_out) + " \\\\")
    lines.append("\\bottomrule")
    lines.append("\\end{tabular}")
    lines.append("}")
    lines.append("\\caption{Model performance and return statistics for all tested pairs.}")
    lines.append("\\end{table}")
    return "\n".join(lines)



print(results_to_latex(results_timemoe))

\begin{table}[h]
\centering
\small
\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccc}
\toprule
Pair & Cointegration Score & val MSE & test MSE & YoY Returns (std) & \makecell{Theoretical Return\\Under Perfect\\Information} & Return Score \\
\midrule
1. (PFF,EMB) & $1.38\times 10^{-4}$ & 0.10884 & 0.48144 & $-35.63\% \pm 47.84\%$ & 3.31\% & 0.62 \\
2. (IFGL,MBB) & $4.45\times 10^{-4}$ & 0.19308 & 3.55606 & $-100.00\% \pm 0.00\%$ & -100.00\% & -1 \\
3. (IFGL,EMB) & $7.27\times 10^{-4}$ & 0.09622 & 1.65404 & $-90.42\% \pm 56.51\%$ & 11.61\% & 0.09 \\
4. (IGIB,IEI) & $7.97\times 10^{-4}$ & 0.07504 & 0.25521 & $0.91\% \pm 0.68\%$ & 1.38\% & 1.00 \\
5. (IGF,DVY) & $8.40\times 10^{-4}$ & 0.05170 & 23.62582 & $-100.00\% \pm 0.00\%$ & -100.00\% & -1 \\
6. (USIG,IEI) & $1.09\times 10^{-3}$ & 0.09776 & 1.08666 & $-17.40\% \pm 4.30\%$ & -63.21\% & 2.25 \\
7. (IFGL,BND) & $1.20\times 10^{-3}$ & 0.15543 & 4.47083 & $-100.00\% \pm 0.00\%$ & -100.00\% & -1 \\
8. (IFGL,SMH) & $1.64\times 10^{-3}$ & 0.

In [13]:
all_train = []
# get all train variables
for i in range(len(pairs_data_filtered)):
  ticker_a, ticker_b = pairs_data_filtered[i][0][0], pairs_data_filtered[i][0][1]
  pairs_timeseries_df = combine_pairs_data(data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, ticker_a, ticker_b)
  # burn the first 30 elements
  pairs_timeseries_burned = pairs_timeseries.iloc[burn_in:].copy()

  total_len = len(pairs_timeseries_burned)
  train_size = int(total_len * train_frac)
  dev_size   = int(total_len * dev_frac)
  test_size  = total_len - train_size - dev_size # not used, but for clarity

  # Standard version of the Time-MoE model can only take in univariate time series. Therefore, we will train only on the target_col
  # TODO: Convert to using multivariate again, a certain type of "multivariate" processing is possible according to the original time-moe paper, but not the version we would want to use. It is not possible to use many different features to enhance the prediction of the target column
  pairs_timeseries_burned_univariate = pairs_timeseries_burned[target_col]

  train = pairs_timeseries_burned_univariate[:train_size]
  dev   = pairs_timeseries_burned_univariate[train_size:train_size+dev_size] # aka validation
  test  = pairs_timeseries_burned_univariate[train_size+dev_size:]
  all_train.append(train)

In [14]:
len(all_train)

103

In [15]:
current_idx = 2
ticker_a, ticker_b = pairs_data_filtered[current_idx][0][0], pairs_data_filtered[current_idx][0][1]
pair_tup_str_current = f"({ticker_a},{ticker_b})"
pairs_timeseries_df = combine_pairs_data(data_close_filtered_2, data_open_filtered_2, data_high_filtered_2, data_low_filtered_2, data_vol_filtered_2, ticker_a, ticker_b)

### Time MoE Workflow Code Manual ###

pairs_timeseries: pd.DataFrame = pairs_timeseries_df
target_col: str = "Spread_Close"
burn_in: int = 30 # we remove the first 30 elements, because the largest window used for technical indicators is
train_frac: float = train_frac
dev_frac: float = dev_frac   # remaining part is test
seed: int = 3178749 # for reproducibility, my student number
look_back: int = 20
batch_size: int = 8
verbose: bool = True
load_finetuned = True
result_parent_dir: str = "data/results"
pair_tup_str=f"({ticker_a},{ticker_b})"
filename_base=_get_filename(startDateStr, endDateStr, instrumentIdsNASDAQandNYSE)
# Used for showing which tuple was used in plots, example: "(QQQ, SPY)"
return_predicted_spread: bool = False

# Set seeds
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# For GPU (if used)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # Might slow down, but ensures determinism

if not target_col in pairs_timeseries.columns:
  raise KeyError(f"pairs_timeseries must contain {target_col}")
FLASH_ATTN = False

# burn the first 30 elements
pairs_timeseries_burned = pairs_timeseries.iloc[burn_in:].copy()

total_len = len(pairs_timeseries_burned)
train_size = int(total_len * train_frac)
dev_size   = int(total_len * dev_frac)
test_size  = total_len - train_size - dev_size # not used, but for clarity

# Standard version of the Time-MoE model can only take in univariate time series. Therefore, we will train only on the target_col
# TODO: Convert to using multivariate again, a certain type of "multivariate" processing is possible according to the original time-moe paper, but not the version we would want to use. It is not possible to use many different features to enhance the prediction of the target column
pairs_timeseries_burned_univariate = pairs_timeseries_burned[target_col]

train = pairs_timeseries_burned_univariate[:train_size]
dev   = pairs_timeseries_burned_univariate[train_size:train_size+dev_size] # aka validation
test  = pairs_timeseries_burned_univariate[train_size+dev_size:]

train_multivariate = pairs_timeseries_burned.iloc[:train_size]
dev_multivariate = pairs_timeseries_burned.iloc[train_size:train_size+dev_size]
test_multivariate = pairs_timeseries_burned.iloc[train_size+dev_size:]


if verbose:
    print(f"Split sizes — train: {len(train)}, dev: {len(dev)}, test: {len(test)}")

# def create_sequences(series, look_back):
#     X_raw = series[:batch_size * look_back].to_numpy() # .reshape(batch_size, look_back)
#     X_raw = torch.tensor(X_raw, dtype=torch.float32)

#     # normalize devX_raw
#     mean, std = devX_raw.mean(dim=-1, keepdim=True), devX_raw.std(dim=-1, keepdim=True)
#     X_scaled = (devX_raw - mean) / std
#     return X_raw, X_scaled, None, None, mean, std
DEVICE = "cpu" #  "cuda" if torch.cuda.is_available() else "cpu"


def create_sequences_rolling(series, look_back):
    X = []
    y = []
    for i in range(len(series) - look_back):
        seq = series.iloc[i:i+look_back].values
        target = series.iloc[i+look_back]
        X.append(seq)
        y.append(target) # TODO: check whether target really is Spread_Close, or whether it is S1_close or S2_close

    X = torch.tensor(X, dtype=torch.float32)
    X = X.to(DEVICE)

    # normalize
    mean = X.mean(dim=-1, keepdim=True)
    std = X.std(dim=-1, keepdim=True)
    X_scaled = (X - mean) / (std + 1e-8)

    y = torch.tensor(y, dtype=torch.float32)
    y = y.to(DEVICE)
    return X, X_scaled, y, None, mean, std

devX_raw, devX_scaled, devY_raw, devY_scaled, dev_mean, dev_std = create_sequences_rolling(dev, look_back)
trainX_raw, trainX_scaled, trainY_raw, trainY_scaled, train_mean, train_std = create_sequences_rolling(train, look_back)
testX_raw, testX_scaled, testY_raw, testY_scaled, test_mean, test_std = create_sequences_rolling(test, look_back)
if verbose:
  print(f"devX_raw Shape: {devX_raw.shape}") # entire devX_raw has that shape before dataset and dev_loader logic

dev_ds = TensorDataset(devX_scaled, devY_raw) # goal of TensorDataset class: loading and processing dataset lazily
train_ds = TensorDataset(trainX_raw, trainY_raw)
test_ds = TensorDataset(testX_raw, testY_raw)

dev_loader = DataLoader(dev_ds, batch_size=batch_size, shuffle=False) # DataLoader takes care of shuffling/sampling/weigthed sampling, batching, using multiprocessing to load the data, use pinned memory etc. (source; https://discuss.pytorch.org/t/what-do-tensordataset-and-dataloader-do/107017)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

if verbose:
  print(f"dev_loader tensor Shape: {next(iter(dev_loader))[0].shape}, with a total of {len(dev_loader)} batches") # a single tensor in dev_loader now has shape [batch_size, look_back] as expected

if load_finetuned:
  ## Training (only train in the case where we actually also want to load finetuned :D )
  # save contents of trainX_scaled to jsonl using _get_filename {"sequence": [1.7994326779272853, 2.554412431241829,
  filename_jsonl = filename_base.replace(".pkl", ".jsonl")
  filepath_parent = os.path.join("data", "datasets")
  os.makedirs(filepath_parent, exist_ok=True)
  filepath_jsonl = os.path.join(filepath_parent, filename_jsonl)
  # first method: try to train it on unnormalized sequence
  with open(filepath_jsonl, "w") as f:
      for train in all_train:
        # use all train variables rather than a single one
        json_line = json.dumps({"sequence": train.to_list()})
        f.write(json_line + "\n")

  current_model_identifier = "test"
  model_dir = f"logs/time_moe/{current_model_identifier}"
  if not os.path.exists(model_dir):
    os.makedirs(model_dir)
  train_time_moe(
      data_path=filepath_jsonl,
      dataloader_num_workers=2,
      output_path=model_dir
  ) # after this, model is saved to logs/time_moe as model.safetensors (400+ MB)
  config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
  model = TimeMoeForPrediction.from_pretrained(model_dir, config=config, torch_dtype=torch.float32)
  model.eval()
else:
  model = AutoModelForCausalLM.from_pretrained(
      'Maple728/TimeMoE-50M',
      device_map=DEVICE,
      trust_remote_code=True,
  )
  if FLASH_ATTN: # if FLASH_ATTN, we assume the flash-attention module is installed, and adapt the model to use that
    model = AutoModelForCausalLM.from_pretrained('Maple728/TimeMoE-50M', device_map="auto", attn_implementation='flash_attention_2', trust_remote_code=True)

prediction_length = 1 # TODO: rather than hardcoding prediction length, make a strategy where we can pick and choose different prediction lengths, and see what is affected by this (returns, std dev, ..)

# forecast in batches from dev dataset
all_predictions = []
for i, batch in enumerate(test_loader):
  inputs = batch[0] # is devX_scaled, for now [1] will return error, later [1] will return devY_scaled :D

  yvals = batch[1]
  # means = batch[2]
  # stds = batch[3]

  output = model.generate(inputs, max_new_tokens=prediction_length)  # shape is [batch_size, look_back + prediction_length]
  normed_predictions = output[:, -prediction_length:]

  # from returned test_mean and test_std, slice the appropriate slices from the series
  input_size_current = inputs.size()
  batch_size_current = input_size_current[0]
  local_means = test_mean[batch_size*i : batch_size*i + batch_size_current]
  local_stds = test_std[batch_size*i : batch_size*i + batch_size_current]

  preds = normed_predictions * local_stds + local_means
  all_predictions.append(preds)

# Concatenate all predictions
predictions = torch.cat(all_predictions, dim=0)
predictions = predictions.squeeze(-1)
predictions = predictions.detach().numpy()

# Also get dev/val predictions
dev_predictions = []
for i, batch in enumerate(dev_loader):
  inputs = batch[0]

  output = model.generate(inputs, max_new_tokens=prediction_length)  # shape is [batch_size, look_back + prediction_length]
  normed_predictions = output[:, -prediction_length:]
  input_size_current = inputs.size()
  batch_size_current = input_size_current[0]
  # get dev rather than test
  local_means = dev_mean[batch_size*i : batch_size*i + batch_size_current]
  local_stds = dev_std[batch_size*i : batch_size*i + batch_size_current]

  preds = normed_predictions * local_stds + local_means
  dev_predictions.append(preds)
dev_predictions = torch.cat(dev_predictions, dim=0)
dev_predictions = dev_predictions.squeeze(-1)
dev_predictions = dev_predictions.detach().numpy()

## Trading
test_s1_shortened = test_multivariate['S1_close'].iloc[look_back:]
test_s2_shortened = test_multivariate['S2_close'].iloc[look_back:] # use multivariate versions, so we can still access cols like 'S1_close' and 'S2_close'
test_index_shortened = test_multivariate.index[look_back:] # officially doesn't really matter whether to use `test_multivariate` or `test`, but do it like this for consistency
forecast_test_shortened_series = pd.Series(predictions, index=test_index_shortened)
gt_test_shortened_series = pd.Series(testY_raw.numpy(), index=test_index_shortened)

gt_returns = trade(
    S1 = test_s1_shortened,
    S2 = test_s2_shortened,
    spread = gt_test_shortened_series,
    window_long = 30,
    window_short = 5,
    position_threshold = 1.0,
    clearing_threshold = 0.5
)
gt_yoy = ((gt_returns[-1] / gt_returns[0])**(365 / len(gt_returns)) - 1)

## Trading: Mean YoY
min_position = 2.00
max_position = 4.00
min_clearing = 0.30
max_clearing = 0.70
position_thresholds = np.linspace(min_position, max_position, num=10)
clearing_thresholds = np.linspace(min_clearing, max_clearing, num=10)
yoy_mean, yoy_std = calculate_return_uncertainty(test_s1_shortened, test_s2_shortened, forecast_test_shortened_series, position_thresholds=position_thresholds, clearing_thresholds=clearing_thresholds)

if load_finetuned:
  current_result_dir = filename_base.replace(".pkl", "_timemoe")
else:
  current_result_dir = filename_base.replace(".pkl", "_timemoe_only_pretrained")
result_dir = os.path.join(result_parent_dir, current_result_dir)
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

### Plotting #####
# (no train/val loss plot, as Time-MoE repo did not support plots or even supplying val losses during training)
train_val_loss_filename = None

# 1. yoy returns
yoy_returns_filename = plot_return_uncertainty(test_s1_shortened, test_s2_shortened, forecast_test_shortened_series, test_index_shortened, look_back, position_thresholds=position_thresholds, clearing_thresholds=clearing_thresholds, verbose=verbose, result_dir=result_dir, filename_base=filename_base)

# 2. predicted vs actual spread plot
predicted_vs_actual_spread_filename = plot_comparison(gt_test_shortened_series, forecast_test_shortened_series, test_index_shortened, workflow_type="Time-MoE", pair_tup_str=pair_tup_str, verbose=verbose, result_dir=result_dir, filename_base=filename_base)

### Plotting #####

dev_mse = mean_squared_error(devY_raw.numpy(), dev_predictions)
test_mse = mean_squared_error(testY_raw.numpy(), predictions)
dev_variance = devY_raw.numpy().var()
dev_nmse = dev_mse / dev_variance if dev_variance != 0 else float('inf')
test_variance = testY_raw.numpy().var()
test_nmse = test_mse / test_variance if test_variance != 0 else float('inf')

# if return_predicted_spread:
#   return forecast_test_shortened_series, test_nmse

plot_filenames = {
    "yoy_returns": yoy_returns_filename,
    "predicted_vs_actual_spread": predicted_vs_actual_spread_filename,
    "train_val_loss": train_val_loss_filename
}

output: Dict[str, Any] = dict(
    val_mse=dev_nmse,
    test_mse=test_nmse,
    yoy_mean=yoy_mean,
    yoy_std=yoy_std,
    gt_yoy=gt_yoy,
    result_parent_dir=result_parent_dir,
    plot_filenames=plot_filenames
)

results_str = f"""
Validation MSE: {output['val_mse']}
Test MSE: {output['test_mse']}
YOY Returns: {output['yoy_mean'] * 100:.2f}%
YOY Std: +- {output['yoy_std'] * 100:.2f}%
GT Yoy: {output['gt_yoy'] * 100:.2f}%
Plot filepath parent dir: {output['result_parent_dir']}
Plot filenames: {output['plot_filenames']}
"""

with open(os.path.join(result_dir, "results.txt"), "w") as f:
    f.write(results_str)
if verbose:
  print(results_str)


### Time MoE Workflow Code Manual ###

Split sizes — train: 3747, dev: 249, test: 251
devX_raw Shape: torch.Size([229, 20])
dev_loader tensor Shape: torch.Size([8, 20]), with a total of 29 batches


100%|██████████| 103/103 [00:00<00:00, 170285.11it/s]


Step,Training Loss
1,0.0953
2,0.0717
3,0.067
4,0.0614
5,0.0594




Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png
Saved plot to data/results/data_2008_01_01_2024_12_31_4416cb3b_timemoe/data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png

Validation MSE: 0.09621654786052036
Test MSE: 1.6540389701170892
YOY Returns: -90.42%
YOY Std: +- 56.51%
GT Yoy: -100.00%
Plot filepath parent dir: data/results
Plot filenames: {'yoy_returns': 'data_2008_01_01_2024_12_31_4416cb3b_plot_thresholds.png', 'predicted_vs_actual_spread': 'data_2008_01_01_2024_12_31_4416cb3b_groundtruth_comparison.png', 'train_val_loss': None}



In [None]:
plt.plot(forecast_test_shortened_series)
plt.plot(gt_test_shortened_series)
test_mse = mean_squared_error(forecast_test_shortened_series, gt_test_shortened_series)
# Make sure to set ddof=0 for population variance
test_variance = gt_test_shortened_series.var(ddof=0)
test_nmse = test_mse / test_variance if test_variance != 0 else float('inf')
test_nmse

In [None]:
first_param = next(model.parameters()).data
print("First 10 weights of first param:", first_param.flatten()[:10].cpu().numpy())
# pairs_timeseries_df of latest element [-1] gave the weight: [ 0.04013866 -0.04795108  0.03871797 -0.03122762 -0.05527523 -0.03969453 0.03159382 -0.04067107 -0.0423359  -0.05141316]
# pairs_timeseries_df of element [-2] gave the weights: [ 0.04013865 -0.04795108  0.03891797 -0.03122762 -0.05527523 -0.03969454 0.03159382 -0.04067108 -0.04213591 -0.05141316]
# Although we are unsure if this is supposed to change, we can definitely check whether it changes when we change the output_path
# pairs_timeseries_df of element [-2] AFTER modifying output path gave the weights [ 0.04013865 -0.04795108  0.03891797 -0.03122762 -0.05527523 -0.03969454 0.03159382 -0.04067108 -0.04213591 -0.05141316]
# the same weights,


In [None]:
import hashlib

def file_hash(filepath):
    with open(filepath, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

model_new_dir = "logs/time_moe/test/model.safetensors"
model_old_dir = "logs/time_moe/model.safetensors"

hash_new = file_hash(model_new_dir)
hash_old = file_hash(model_old_dir)

print("New model hash:", hash_new)
print("Old model hash:", hash_old)

if hash_new == hash_old:
    print("The files are identical.")
else:
    print("The files are different.")

In [None]:
# goal: first 10 results more than one positive yoy return

print("Hello, World!")
model_new_dir = "logs/time_moe/test/model.safetensors"
model_old_dir = "logs/time_moe/model.safetensors"

In [5]:
def return_score(yoy_mean, gt_yoy):
  if gt_yoy == -1:
    return -1
  return round((1 + yoy_mean) / (1 + gt_yoy), 2)

def results_to_ascii_table(results):
    # Define column headers
    headers = [
        "Pair",
        "Cointegration Score",
        "val MSE",
        "test MSE",
        "YoY Returns (std)",
        "Theoretical Return Under Perfect Information",
        "Return Score"
    ]
    # Calculate the max width for each column
    cols = list(zip(*results))
    col_widths = [
        max(len(str(h)), *(len(str(cell)) for cell in col))
        for h, col in zip(headers, cols)
    ]
    # Helper to create a line separator
    def make_sep(char="+", pad="-"):
        return char + char.join(pad * (w + 2) for w in col_widths) + char

    def format_row(row, col_widths, row_idx=None, number_pair=False):
        formatted_cells = []
        for col_idx, (cell, w) in enumerate(zip(row, col_widths)):
            # Add numbering if required and it's the first column (Pair)
            if number_pair and col_idx == 0 and row_idx is not None:
                cell = f"{row_idx + 1}. {cell}"
            if col_idx == 1 and isinstance(cell, float):
                # format cointegration score in the format "x.x*10^x"
                cell = f"{cell:.2e}".replace("e","*10^")
            if col_idx == 5 and isinstance(cell, float):
                cell = f"{cell*100:.2f}%"
            if isinstance(cell, float):
                cell = f"{cell:.5f}"
            formatted_cells.append(f"{str(cell):<{w}}")
        return "| " + " | ".join(formatted_cells) + " |"

    # Build table
    lines = []
    lines.append(make_sep())
    lines.append(format_row(headers, col_widths))
    lines.append(make_sep(char="+", pad="="))
    for idx, row in enumerate(results):
        lines.append(format_row(row, col_widths, row_idx=idx, number_pair=True))
        lines.append(make_sep())
    return "\n".join(lines)
