In [1]:
import datetime as dt
import sys
import numpy as np
from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn
import pandas as pd
# from pandas_datareader import data as web
import seaborn as sns
from pylab import rcParams 
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from arch import arch_model
from numpy.linalg import LinAlgError
from scipy import stats
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, q_stat, adfuller
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import probplot, moment
from arch import arch_model
from arch.univariate import ConstantMean, GARCH, Normal
from sklearn.model_selection import TimeSeriesSplit

from statsmodels.tsa.arima.model import ARIMA
from itertools import product
from numpy.lib.stride_tricks import sliding_window_view
from sklearn.model_selection import train_test_split

import tqdm

import itertools
import json
import os

import warnings

In [2]:
from utils import read_txn_data, preprocess_txn_data, compute_lob_features, create_lob_dataset, merge_txn_and_lob

In [3]:
%matplotlib inline
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')
sns.set(style="darkgrid", color_codes=True)
rcParams['figure.figsize'] = 8,4

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [5]:
trx_df = read_txn_data(use_load=False)
trx_df = preprocess_txn_data(trx_df, freq='1min')
trx_df['log_deseasoned_total_volume'] = np.log(trx_df['deseasoned_total_volume'] + 1e-07)

lob_df = create_lob_dataset(use_load=False)

df_merged = merge_txn_and_lob(trx_df, lob_df)
df_merged.head()

trx Data loaded successfully.
preprocessed lob Data loaded successfully.


Unnamed: 0,datetime,buy_volume,sell_volume,buy_txn,sell_txn,volume_imbalance,txn_imbalance,total_volume,mean_volume,deseasoned_total_volume,log_deseasoned_total_volume,ask_volume,bid_volume,ask_slope_1,ask_slope_5,ask_slope_10,bid_slope_1,bid_slope_5,bid_slope_10,spread,lob_volume_imbalance,slope_imbalance_1,slope_imbalance_5,slope_imbalance_10
5819,2018-06-04 22:00:05+00:00,0.059804,0.730357,5.0,10.0,0.670553,5.0,0.790162,4.380444,0.180384,-1.712667,2695.804973,586356.113693,1761.630667,2695.804973,2695.804973,3.972121,53.50245,160.246934,6.19,583660.30872,1757.658546,2642.302523,2535.55804
5820,2018-06-04 22:01:05+00:00,0.089359,0.849477,3.0,4.0,0.760118,1.0,0.938836,3.692009,0.254289,-1.369285,2699.165417,586350.938081,1765.312385,2699.165417,2699.165417,4.017044,52.408273,155.071322,4.97,583651.772664,1761.295341,2646.757144,2544.094095
5821,2018-06-04 22:02:05+00:00,0.313458,0.508952,2.0,4.0,0.195494,2.0,0.82241,3.3249,0.247349,-1.396955,2657.946212,586317.596946,1723.84318,2657.946212,2657.946212,3.831055,46.578294,158.19475,4.9,583659.650734,1720.012125,2611.367918,2499.751462
5822,2018-06-04 22:03:05+00:00,0.000992,0.199219,1.0,4.0,0.198227,3.0,0.200211,4.128645,0.048493,-3.026331,2650.599402,586308.612876,1718.061157,2650.599402,2650.599402,3.631836,51.036074,160.641345,4.32,583658.013474,1714.429321,2599.563327,2489.958056
5823,2018-06-04 22:04:05+00:00,0.172042,0.0,7.0,0.0,0.172042,7.0,0.172042,6.271124,0.027434,-3.595966,2650.082079,586314.173248,1715.979046,2650.082079,2650.082079,3.704804,51.092926,160.489197,4.32,583664.091169,1712.274243,2598.989153,2489.592882


## TME implementation

### Data loaders

In [6]:
h = 10  # window length
batch_size = 128

# -----------------------------
df = df_merged.sort_values('datetime').reset_index(drop=True)
# STEP 1: Create time-of-day feature
df['time_of_day'] = df['datetime'].dt.strftime('%H:%M')

# -----------------------------
# STEP 2: Split indices (AFTER creating lags!)
n_total = len(df)
n_train = int(0.7 * n_total)
n_val = int(0.1 * n_total)

# -----------------------------
# STEP 3: Create deseasonalizing map using per-time volume means from train only
train_deseason_df = df.iloc[:n_train]
mean_volume_by_time = train_deseason_df.groupby('time_of_day')['total_volume'].mean()
df['mean_volume'] = df['time_of_day'].map(mean_volume_by_time)

df['deseasoned_total_volume'] = df['total_volume'] / df['mean_volume']
df['log_deseasoned_total_volume'] = np.log(df['deseasoned_total_volume'] + 1e-7)
df['target'] = df['deseasoned_total_volume']

del train_deseason_df

# -----------------------------
# STEP 4: Define the source-specific features
source1_cols = ['buy_volume', 'sell_volume', 'buy_txn', 'sell_txn', 'volume_imbalance', 'txn_imbalance']
source2_cols = ['ask_volume', 'bid_volume', 'ask_slope_1', 'ask_slope_5', 'ask_slope_10', 'bid_slope_1', 'bid_slope_5', 'bid_slope_10', 'spread',
       'lob_volume_imbalance', 'slope_imbalance_1', 'slope_imbalance_5', 'slope_imbalance_10']
# target_col = 'log_deseasoned_total_volume'
target_col = 'target'
target_direct_col = 'total_volume'
weight_col = 'mean_volume'
datetime_col = 'datetime'

# # Normalize source1 and source2 features using training data only
# from sklearn.preprocessing import StandardScaler

# scaler1 = StandardScaler()
# scaler2 = StandardScaler()

# # Fit only on training portion
# source1_train_raw = df[source1_cols].iloc[:n_train]
# source2_train_raw = df[source2_cols].iloc[:n_train]

# scaler1.fit(source1_train_raw)
# scaler2.fit(source2_train_raw)

# # Apply normalization to the whole dataset
# df[source1_cols] = scaler1.transform(df[source1_cols])
# df[source2_cols] = scaler2.transform(df[source2_cols])

# --- Create rolling windows efficiently ---
source1_array = df[source1_cols].values  # shape (N, F1)
source2_array = df[source2_cols].values  # shape (N, F2)
target_array = df[target_col].values + 1e-7  # shape (N,)
target_direct_array = df[target_direct_col].values + 1e-7  # shape (N,)
weight_array = df[weight_col].values  # shape (N,)
timestamps_array = df[datetime_col].values


# Create sliding windows ([:-1] in windows and [h:] in targets make sure the targets are matched with corresponding features)
source1_windows = sliding_window_view(source1_array, window_shape=(h,), axis=0)[:-1]  # shape (N - h, F1, h)
source2_windows = sliding_window_view(source2_array, window_shape=(h,), axis=0)[:-1]  # shape (N - h, F2, h)
y = target_array[h:]
w = weight_array[h:]
timestamps = timestamps_array[h:]

# Convert to tensors
source1_tensor = torch.tensor(source1_windows, dtype=torch.float32)
source2_tensor = torch.tensor(source2_windows, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)
w_tensor = torch.tensor(w, dtype=torch.float32)

# --- Time-based split (preserving time order) ---
n_total = len(y_tensor)
n_train = int(n_total * 0.7)
n_val = int(n_total * 0.1)

source1_train, source1_val, source1_test = source1_tensor[:n_train], source1_tensor[n_train:n_train + n_val], source1_tensor[n_train + n_val:]
source2_train, source2_val, source2_test = source2_tensor[:n_train], source2_tensor[n_train:n_train + n_val], source2_tensor[n_train + n_val:]
y_train, y_val, y_test = y_tensor[:n_train], y_tensor[n_train:n_train + n_val], y_tensor[n_train + n_val:]
w_train, w_val, w_test = w_tensor[:n_train], w_tensor[n_train:n_train + n_val], w_tensor[n_train + n_val:]

# (Optional) timestamps split for tracking
timestamps_train = timestamps[:n_train]
timestamps_val = timestamps[n_train:n_train + n_val]
timestamps_test = timestamps[n_train + n_val:]

# Dataset ready for PyTorch training
train_dataset = torch.utils.data.TensorDataset(source1_train, source2_train, y_train, w_train)
val_dataset = torch.utils.data.TensorDataset(source1_val, source2_val, y_val, w_val)
test_dataset = torch.utils.data.TensorDataset(source1_test, source2_test, y_test, w_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## function for dataset creation for hyperparams search
def create_datasets(source1_array, source2_array, target_array, weight_array, batch_size, h):
       # Create sliding windows ([:-1] in windows and [h:] in targets make sure the targets are matched with corresponding features)
       source1_windows = sliding_window_view(source1_array, window_shape=(h,), axis=0)[:-1]  # shape (N - h, F1, h)
       source2_windows = sliding_window_view(source2_array, window_shape=(h,), axis=0)[:-1]  # shape (N - h, F2, h)
       y = target_array[h:]
       w = weight_array[h:]

       # Convert to tensors
       source1_tensor = torch.tensor(source1_windows, dtype=torch.float32)
       source2_tensor = torch.tensor(source2_windows, dtype=torch.float32)
       y_tensor = torch.tensor(y, dtype=torch.float32)
       w_tensor = torch.tensor(w, dtype=torch.float32)

       # --- Time-based split (preserving time order) ---
       n_total = len(y_tensor)
       n_train = int(n_total * 0.7)
       n_val = int(n_total * 0.1)

       source1_train, source1_val, source1_test = source1_tensor[:n_train], source1_tensor[n_train:n_train + n_val], source1_tensor[n_train + n_val:]
       source2_train, source2_val, source2_test = source2_tensor[:n_train], source2_tensor[n_train:n_train + n_val], source2_tensor[n_train + n_val:]
       y_train, y_val, y_test = y_tensor[:n_train], y_tensor[n_train:n_train + n_val], y_tensor[n_train + n_val:]
       w_train, w_val, w_test = w_tensor[:n_train], w_tensor[n_train:n_train + n_val], w_tensor[n_train + n_val:]

       # Dataset ready for PyTorch training
       train_dataset = torch.utils.data.TensorDataset(source1_train, source2_train, y_train, w_train)
       val_dataset = torch.utils.data.TensorDataset(source1_val, source2_val, y_val, w_val)
       test_dataset = torch.utils.data.TensorDataset(source1_test, source2_test, y_test, w_test)

       train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
       val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
       test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

       return train_loader, val_loader, test_loader


### TME components 

In [7]:
class BilinearRegressor(nn.Module):
    def __init__(self, d, h, latent_variable):
        """
        d: number of features in the source data
        h: number of lags in the source data
        latent_variable (bool): if True the class is devoted for modeling latent variable z, if False => y|s_i
        """
        super().__init__()
        self.latent_variable = latent_variable

        # Mean parameters
        self.L_mu = nn.Parameter(torch.empty(d))
        self.R_mu = nn.Parameter(torch.empty(h))
        self.b_mu = nn.Parameter(torch.zeros(1))

        # Xavier init for 1D weight tensors
        # nn.init.xavier_uniform_(self.L_mu.unsqueeze(0))
        # nn.init.xavier_uniform_(self.R_mu.unsqueeze(0))

        nn.init.xavier_normal_(self.L_mu.unsqueeze(0))
        nn.init.xavier_normal_(self.R_mu.unsqueeze(0))

        if not self.latent_variable:
            self.L_sigma = nn.Parameter(torch.empty(d))
            self.R_sigma = nn.Parameter(torch.empty(h))
            self.b_sigma = nn.Parameter(torch.zeros(1))

            # nn.init.xavier_uniform_(self.L_sigma.unsqueeze(0))
            # nn.init.xavier_uniform_(self.R_sigma.unsqueeze(0))

            nn.init.xavier_normal_(self.L_sigma.unsqueeze(0))
            nn.init.xavier_normal_(self.R_sigma.unsqueeze(0))

    def forward(self, x):  # x: (B, d, h)
        mu = torch.einsum('bdh,d,h->b', x, self.L_mu, self.R_mu) + self.b_mu  # [B]
        if self.latent_variable:
            return mu
        log_var = torch.einsum('bdh,d,h->b', x, self.L_sigma, self.R_sigma) + self.b_sigma
        log_var = torch.clamp(log_var, min=-10, max=10)
        var = torch.exp(log_var)  # Ensure positivity
        return mu, var
    

class TME(nn.Module):
    def __init__(self, d1, d2, h):
        super().__init__()
        self.target1 = BilinearRegressor(d1, h, latent_variable=False)
        self.target2 = BilinearRegressor(d2, h, latent_variable=False)
        self.latent1 = BilinearRegressor(d1, h, latent_variable=True)
        self.latent2 = BilinearRegressor(d2, h, latent_variable=True)

    def forward(self, x1, x2, return_all=False):
        # x1: (B, d1, h), x2: (B, d2, h)
        mu1, var1 = self.target1(x1)  # [B], [B]
        mu2, var2 = self.target2(x2)

        logit1 = self.latent1(x1)
        logit2 = self.latent2(x2)

        logits = torch.stack([logit1, logit2], dim=1)  # [B, num_sources]
        probs = F.softmax(logits, dim=1)     # [B, num_sources]

        if True:#not return_all:
            # Clamp to avoid numerical instability
            mu1 = torch.clamp(mu1, -10, 10)
            mu2 = torch.clamp(mu2, -10, 10)
            var1 = torch.clamp(var1, min=1e-5, max=10)
            var2 = torch.clamp(var2, min=1e-5, max=10)

        # Mixture of expected values under log-normal
        exp1 = torch.exp(mu1 + 0.5 * var1)
        exp2 = torch.exp(mu2 + 0.5 * var2)
        final_pred = probs[:, 0] * exp1 + probs[:, 1] * exp2  # [B]

        if return_all:
            return final_pred, mu1, var1, mu2, var2, probs
        return final_pred


### Training routine

![image.png](attachment:image.png)

In [8]:
def tme_loss(y, mu1, var1, mu2, var2, probs, model, l2_lambda=0.1):
    """
    Implements:
        -ln ∑_s [ lognormal(y_t | μ_s, σ_s^2) * P(z_t = s | x) ] + λ * ||θ||^2
    """

    eps = 1e-8  # for numerical stability
    log_y = torch.log(y + eps)

    # Log-normal density terms (not in log-space)
    def lognormal_pdf(y, log_y, mu, var):
        coef = 1.0 / (y * torch.sqrt(2 * torch.pi * var + eps))
        exponent = torch.exp(- (log_y - mu) ** 2 / (2 * var + eps))
        return coef * exponent

    p1 = lognormal_pdf(y, log_y, mu1, var1)
    p2 = lognormal_pdf(y, log_y, mu2, var2)

    # Combine with selector probabilities
    # print(probs[:,1])
    weighted_sum = probs[:,0] * p1 + probs[:,1] * p2

    # Negative log-likelihood (mean over batch)
    nll = -torch.log(weighted_sum + eps).mean() #maybe mean or sum

    # L2 Regularization (Gaussian prior on θ)
    l2_penalty = sum((p**2).sum() for p in model.parameters())
    reg = l2_lambda * l2_penalty

    return nll + reg


def train_tme_model(model, train_loader, val_loader, lr=5e-4, weight_decay=0.1, l2_lambda=0.1,
                    max_epochs=100, patience=10, device='cpu', adam=False, direct_target=False):
    model.to(device)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)
    # optimizer = torch.optim.SGD(model.parameters(), lr=lr)#, weight_decay=0.1)#, momentum=0.9)
    if adam:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)



    best_val_loss = float('inf')
    best_val_rmse = float('inf')
    best_state_dict = None
    patience_counter = 0
    
    for epoch in range(max_epochs):
        model.train()

        for x1, x2, y, w in train_loader:
            x1, x2, y, w = x1.to(device), x2.to(device), y.to(device), w.to(device)

            optimizer.zero_grad()

            final_pred, mu1, var1, mu2, var2, probs = model(x1, x2, return_all=True)

            loss = tme_loss(y, mu1, var1, mu2, var2, probs, model, l2_lambda=l2_lambda)
            loss.backward()

            # total_norm = 0
            # for p in model.parameters():
            #     if p.grad is not None:
            #         param_norm = p.grad.data.norm(2)
            #         total_norm += param_norm.item() ** 2
            # total_norm = total_norm ** 0.5
            # print(f"Gradient norm: {total_norm:.4f}")

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
            optimizer.step()

        # Validation loss
        model.eval()
        val_losses = []
        y_preds_val = []
        y_true_val = []
        w_val = []

        train_losses = []
        y_preds_train = []
        y_true_train = []
        w_train = []

        with torch.no_grad():
            for x1, x2, y, w in val_loader:
                x1, x2, y, w = x1.to(device), x2.to(device), y.to(device), w.to(device)

                final_pred, mu1, var1, mu2, var2, probs = model(x1, x2, return_all=True)

                val_loss = tme_loss(y, mu1, var1, mu2, var2, probs, model, l2_lambda=l2_lambda)
                val_losses.append(val_loss.item())
                y_preds_val.append(final_pred.detach().cpu())
                y_true_val.append(y.detach().cpu())
                w_val.append(w.detach().cpu())

            for x1, x2, y, w in train_loader:
                x1, x2, y, w = x1.to(device), x2.to(device), y.to(device), w.to(device)

                final_pred, mu1, var1, mu2, var2, probs = model(x1, x2, return_all=True)

                train_loss = tme_loss(y, mu1, var1, mu2, var2, probs, model, l2_lambda=l2_lambda)
                train_losses.append(train_loss.item())
                y_preds_train.append(final_pred.detach().cpu())
                y_true_train.append(y.detach().cpu())
                w_train.append(w.detach().cpu())
            

        avg_val_loss = sum(val_losses) / len(val_losses)
        y_preds_val = torch.cat(y_preds_val).numpy()
        y_true_val = torch.cat(y_true_val).numpy()
        w_val = torch.cat(w_val).numpy()
        if direct_target:
            rmse_val = np.sqrt(mean_squared_error(y_true_val, y_preds_val))
        else:
            rmse_val = np.sqrt(mean_squared_error(y_true_val*w_val, y_preds_val*w_val))

        avg_train_loss = sum(train_losses) / len(train_losses)
        y_preds_train = torch.cat(y_preds_train).numpy()
        y_true_train = torch.cat(y_true_train).numpy()
        w_train = torch.cat(w_train).numpy()
        if direct_target:
            rmse_train = np.sqrt(mean_squared_error(y_true_train, y_preds_train))
        else:    
            rmse_train = np.sqrt(mean_squared_error(y_true_train*w_train, y_preds_train*w_train))

        if (epoch+1) % 5 == 0 or epoch == 0:
            print(f"Epoch {epoch+1}.      Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}.      Train RMSE: {rmse_train:.4f}, Val RMSE: {rmse_val:.4f}")

        # Early stopping
        if rmse_val < best_val_rmse - 1e-4:#avg_val_loss < best_val_loss - 1e-4
            # best_val_loss = avg_val_loss
            best_val_rmse = rmse_val
            best_state_dict = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    # Restore best model
    model.load_state_dict(best_state_dict)
    return model, best_val_loss


def train_tme_ensemble(train_loader, val_loader, d1, d2, h, num_models=20, device='cpu', adam=False, direct_target=False, **train_kwargs):
    ensemble = []
    val_losses = []

    for i in range(num_models):
        print(f"\n🌱 Training ensemble model {i + 1}/{num_models}")

        # Set seed for reproducibility
        torch.manual_seed(i)
        model = TME(d1, d2, h)  # Initialize new model

        # Train the model using your function
        trained_model, best_val_loss = train_tme_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            adam=adam,
            direct_target=direct_target,
            **train_kwargs
        )

        # Save the model and its validation loss
        ensemble.append(trained_model)
        val_losses.append(best_val_loss)

    return ensemble, val_losses



In [120]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

ensemble, losses = train_tme_ensemble(
    train_loader=train_loader,
    val_loader=val_loader,
    d1=source1_tensor.shape[1],  # number of features of the 1st source
    d2=source2_tensor.shape[1],  # number of features of the 2nd source
    h=source1_tensor.shape[2],  # lag length
    num_models=1,#20,
    device=device,
    lr=1e-3,
    weight_decay=0.1,
    l2_lambda=5,
    max_epochs=60,
    patience=10,
    adam=True
)




🌱 Training ensemble model 1/1
Epoch 1.      Train Loss: 3.1812, Val Loss: 4.4807.      Train RMSE: 50.8647, Val RMSE: 53.8183
Epoch 5.      Train Loss: 2.5648, Val Loss: 4.0476.      Train RMSE: 32.3022, Val RMSE: 15.2410
Epoch 10.      Train Loss: 2.8046, Val Loss: 4.2756.      Train RMSE: 16.7381, Val RMSE: 15.3284
Epoch 15.      Train Loss: 4.1822, Val Loss: 2.8934.      Train RMSE: 17.4150, Val RMSE: 15.0587
Epoch 20.      Train Loss: 2.2755, Val Loss: 3.6473.      Train RMSE: 101.5434, Val RMSE: 66.7435
Early stopping triggered.


In [9]:
def evaluate_tme_ensemble(ensemble, test_loader, all_preds=False, device='cpu', direct_target=False):
    all_preds = []
    all_preds_median = []
    y_trues = []
    w_trues = []

    with torch.no_grad():
        for x1, x2, y, w in test_loader:
            x1, x2, y, w = x1.to(device).to(torch.float64), x2.to(device).to(torch.float64), y.to(device).to(torch.float64), w.to(device).to(torch.float64)
            batch_preds = []

            for model in ensemble:
                model.eval()
                model.to(device).to(torch.float64)
                pred = model(x1, x2)
                # print(pred)
                # return
                batch_preds.append(pred.cpu())

            # Average predictions from all models
            avg_pred = torch.stack(batch_preds).mean(dim=0)
            median_pred = torch.stack(batch_preds).median(dim=0).values
            # avg_pred = torch.stack(batch_preds).median(dim=0)
            all_preds.append(avg_pred)
            all_preds_median.append(median_pred)
            y_trues.append(y.cpu())
            w_trues.append(w.cpu())

    y_preds = torch.cat(all_preds).numpy()
    y_preds_median = torch.cat(all_preds_median).numpy()
    y_trues = torch.cat(y_trues).numpy()
    w_trues = torch.cat(w_trues).numpy()

    if direct_target:
        rmse = np.sqrt(mean_squared_error(y_trues, y_preds))
        mae = mean_absolute_error(y_trues, y_preds)
    else:
        rmse = np.sqrt(mean_squared_error(y_trues*w_trues, y_preds*w_trues))
        mae = mean_absolute_error(y_trues*w_trues, y_preds*w_trues)

    # print(f"📊 Ensemble Test RMSE: {rmse:.4f}")
    # print(f"📊 Ensemble Test MAE: {mae:.4f}")
    return y_preds, y_preds_median, rmse, mae


# y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, test_loader, device='cpu')


In [122]:
print(rmse, mae)

11.595749548023331 3.5011974121583433


Some things to consider:  
    
    - Model training takes a long time (full 20 model ensemble took me more than 3 hours)  
    - For hyperparams tuning we may use the smaller ensemble  
    - Adam vs SGD?  
    - Now I am clamping the values of the predicted variances and means when training and predicting (to avoid numerical blow up)  
    - The RMSE and MAE I got are reasonable (they are not extremely different). This is just an indication that probably the calculations are doing what they are supposed to do.

## Hyperparams search

In [None]:
# Directory containing your saved results
input_dir = 'validation_results'
# Make sure the folder exists
os.makedirs(input_dir, exist_ok=True)

# Prepare a list to collect existing files (i.e. corresponding parameters have been already validated)
existing_results = []

# Loop over all files in the directory
for filename in os.listdir(input_dir):
    if filename.endswith('.json'):
        filepath = os.path.join(input_dir, filename)
        
        # Open and load the JSON file
        with open(filepath, 'r') as f:
            result = json.load(f)
        
        # Parse hyperparameters from the filename
        name_parts = filename.replace('.json', '').split('_')
        h = int(name_parts[0][1:])  # strip the 'h'
        batch_size = int(name_parts[1][5:])  # strip 'batch'
        lr = float(name_parts[2][2:].replace('e', 'e'))  # scientific notation stays
        l2_lambda = float(name_parts[3][6:])  # strip 'lambda'

        existing_results.append([h,batch_size,lr,l2_lambda])

print(existing_results)

[[10, 128, 0.001, 0.1], [10, 128, 0.001, 1.0], [10, 128, 0.001, 3.0], [10, 128, 0.001, 5.0], [10, 128, 0.0001, 0.1], [10, 128, 0.0001, 1.0], [10, 128, 0.0001, 3.0], [10, 128, 0.0001, 5.0], [10, 128, 0.0005, 0.1], [10, 128, 0.0005, 1.0], [10, 128, 0.0005, 3.0], [10, 128, 0.0005, 5.0], [10, 128, 5e-05, 0.1], [10, 128, 5e-05, 1.0], [10, 128, 5e-05, 3.0], [10, 128, 5e-05, 5.0], [10, 256, 5e-05, 0.1], [10, 64, 0.001, 0.1], [10, 64, 0.001, 1.0], [10, 64, 0.001, 3.0], [10, 64, 0.001, 5.0], [10, 64, 0.0001, 0.1], [10, 64, 0.0001, 1.0], [10, 64, 0.0001, 3.0], [10, 64, 0.0001, 5.0], [10, 64, 0.0005, 0.1], [10, 64, 0.0005, 1.0], [10, 64, 0.0005, 3.0], [10, 64, 0.0005, 5.0], [10, 64, 5e-05, 0.1], [10, 64, 5e-05, 1.0], [10, 64, 5e-05, 3.0], [10, 64, 5e-05, 5.0]]


In [123]:
h_list = [10]#[4, 6, 8, 10]
batch_size_list = [128, 256]
lr_list = [5e-5, 1e-4, 5e-4, 1e-3]
l2_lambda_list = [0.1, 1, 3, 5]

d1=source1_tensor.shape[1]  # number of features of the 1st source
d2=source2_tensor.shape[1]  # number of features of the 2nd source

# Make sure the folder exists
output_dir = 'validation_results'
os.makedirs(output_dir, exist_ok=True)

for h, batch_size, lr, l2_lambda in tqdm.tqdm(itertools.product(h_list, batch_size_list, lr_list, l2_lambda_list)):
    # if [h, batch_size, lr, l2_lambda] in existing_results:
    #     continue

    # create dataset
    train_loader, val_loader, test_loader = create_datasets(source1_array, source2_array, target_direct_array, weight_array, batch_size, h)

    print(f"\n📊 h: {h}, batch_size:{batch_size}, lr: {lr}, l2_lambda:{l2_lambda}")
    
    ensemble, losses = train_tme_ensemble(
        train_loader=train_loader,
        val_loader=val_loader,
        d1=d1,  # number of features of the 1st source
        d2=d2,  # number of features of the 2nd source
        h=h,  # lag length
        num_models=3,#20,
        device=device,
        lr=lr,
        weight_decay=0.1,
        l2_lambda=l2_lambda,
        max_epochs=40,
        patience=5,
        adam=True,
        direct_target=True
    )

    y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, val_loader, device='cpu', direct_target=True)

    results_to_save = {
        'rmse': rmse,
        'mae': mae,
        'val_losses': losses
    }

    # Create a filename based on hyperparameters
    filename = f"h{h}_batch{batch_size}_lr{lr:.0e}_lambda{l2_lambda}.json"
    filepath = os.path.join(output_dir, filename)

    # Save the dictionary to a JSON file
    with open(filepath, 'w') as f:
        json.dump(results_to_save, f, indent=4)

0it [00:00, ?it/s]


📊 h: 10, batch_size:128, lr: 5e-05, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 20.3230, Val Loss: 20.3233.      Train RMSE: 26090.2323, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 19.4542, Val Loss: 19.4532.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 7.9277, Val Loss: 8.5330.      Train RMSE: 3268816.9958, Val RMSE: 3268697.9837
Epoch 15.      Train Loss: 3.1595, Val Loss: 4.2051.      Train RMSE: 22458.1866, Val RMSE: 1479.2523
Epoch 20.      Train Loss: 2.6098, Val Loss: 2.7804.      Train RMSE: 37695.2258, Val RMSE: 2092.4844
Epoch 25.      Train Loss: 2.3678, Val Loss: 1.5851.      Train RMSE: 43902.9533, Val RMSE: 3438.0080
Epoch 30.      Train Loss: 2.2986, Val Loss: 1.5700.      Train RMSE: 45554.0259, Val RMSE: 4301.2317
Epoch 35.      Train Loss: 2.1702, Val Loss: 1.4761.      Train RMSE: 100005.8162, Val RMSE: 100143.4057
Epoch 40.      Train Loss: 2.1561, Val Loss: 1.4559.      Train RMSE: 99832.3895, Val RMSE

1it [36:52, 2212.57s/it]


📊 h: 10, batch_size:128, lr: 5e-05, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 37.4464, Val Loss: 37.4467.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 28.7593, Val Loss: 28.7582.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 11.9729, Val Loss: 12.5778.      Train RMSE: 3268951.5609, Val RMSE: 3268907.6153
Epoch 15.      Train Loss: 4.4121, Val Loss: 3.8662.      Train RMSE: 21785.4263, Val RMSE: 67.1153
Epoch 20.      Train Loss: 3.5799, Val Loss: 2.9585.      Train RMSE: 16806.3826, Val RMSE: 579.3373
Epoch 25.      Train Loss: 2.8680, Val Loss: 2.1672.      Train RMSE: 19774.1714, Val RMSE: 546.3184
Epoch 30.      Train Loss: 2.4599, Val Loss: 1.8516.      Train RMSE: 26869.9267, Val RMSE: 641.6730
Epoch 35.      Train Loss: 2.3177, Val Loss: 1.6741.      Train RMSE: 31066.2013, Val RMSE: 688.1283
Epoch 40.      Train Loss: 2.2840, Val Loss: 1.6567.      Train RMSE: 37454.2366, Val RMSE: 1189.92

2it [1:25:31, 2627.97s/it]


📊 h: 10, batch_size:128, lr: 5e-05, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 75.3535, Val Loss: 75.3538.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 47.9568, Val Loss: 47.9558.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 18.9682, Val Loss: 17.4793.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 15.      Train Loss: 8.1663, Val Loss: 7.4971.      Train RMSE: 18588.3224, Val RMSE: 62.5602
Epoch 20.      Train Loss: 5.7918, Val Loss: 5.1531.      Train RMSE: 17447.8973, Val RMSE: 637.5747
Epoch 25.      Train Loss: 3.8199, Val Loss: 3.2093.      Train RMSE: 20664.2919, Val RMSE: 427.4115
Epoch 30.      Train Loss: 2.8159, Val Loss: 2.2201.      Train RMSE: 19626.0496, Val RMSE: 262.1400
Epoch 35.      Train Loss: 2.4558, Val Loss: 1.8258.      Train RMSE: 8819.3782, Val RMSE: 136.6826
Epoch 40.      Train Loss: 2.3823, Val Loss: 1.7386.      Train RMSE: 874.1507, Val RMSE: 73.8644

🌱 Training 

3it [2:45:12, 3611.08s/it]


📊 h: 10, batch_size:128, lr: 5e-05, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 113.3089, Val Loss: 113.3092.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 67.6479, Val Loss: 67.6469.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 26.7123, Val Loss: 25.2234.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 15.      Train Loss: 11.6663, Val Loss: 11.0472.      Train RMSE: 18758.1696, Val RMSE: 22.9881
Epoch 20.      Train Loss: 7.3381, Val Loss: 6.6828.      Train RMSE: 22479.6932, Val RMSE: 733.9451
Epoch 25.      Train Loss: 4.3311, Val Loss: 3.7114.      Train RMSE: 22610.5144, Val RMSE: 374.1846
Epoch 30.      Train Loss: 3.4163, Val Loss: 4.4211.      Train RMSE: 110.4258, Val RMSE: 43.1094
Epoch 35.      Train Loss: 2.4876, Val Loss: 1.8724.      Train RMSE: 295.7268, Val RMSE: 50.5667
Epoch 40.      Train Loss: 2.3693, Val Loss: 1.9622.      Train RMSE: 10516.1175, Val RMSE: 21.0255

🌱 Training

4it [3:27:34, 3189.17s/it]


📊 h: 10, batch_size:128, lr: 0.0001, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 20.0597, Val Loss: 20.0593.      Train RMSE: 26090.2201, Val RMSE: 34208.5893
Epoch 5.      Train Loss: 7.9602, Val Loss: 8.5658.      Train RMSE: 3268742.2529, Val RMSE: 3268593.2431
Epoch 10.      Train Loss: 2.6966, Val Loss: 3.0983.      Train RMSE: 28714.0239, Val RMSE: 1022.0173
Epoch 15.      Train Loss: 2.3443, Val Loss: 1.5500.      Train RMSE: 44670.9327, Val RMSE: 3587.4922
Epoch 20.      Train Loss: 2.1648, Val Loss: 1.4708.      Train RMSE: 99124.3169, Val RMSE: 98233.6832
Epoch 25.      Train Loss: 2.1586, Val Loss: 1.4402.      Train RMSE: 88904.5986, Val RMSE: 51431.6634
Epoch 30.      Train Loss: 2.1428, Val Loss: 1.4524.      Train RMSE: 82133.9924, Val RMSE: 34399.0716
Epoch 35.      Train Loss: 2.1442, Val Loss: 1.4364.      Train RMSE: 84985.5720, Val RMSE: 45141.5877
Epoch 40.      Train Loss: 2.1429, Val Loss: 1.4384.      Train RMSE: 85106.9223, Val RMSE:

5it [3:57:51, 2694.15s/it]


📊 h: 10, batch_size:128, lr: 0.0001, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 34.8134, Val Loss: 34.8130.      Train RMSE: 26090.2225, Val RMSE: 34208.5893
Epoch 5.      Train Loss: 12.2928, Val Loss: 12.8979.      Train RMSE: 3268891.8974, Val RMSE: 3268802.8814
Epoch 10.      Train Loss: 3.5740, Val Loss: 4.4330.      Train RMSE: 10049.5763, Val RMSE: 145.0134
Epoch 15.      Train Loss: 2.4996, Val Loss: 1.9465.      Train RMSE: 26217.2047, Val RMSE: 548.5112
Epoch 20.      Train Loss: 2.2894, Val Loss: 1.6604.      Train RMSE: 31327.4699, Val RMSE: 721.7778
Epoch 25.      Train Loss: 2.2829, Val Loss: 1.6360.      Train RMSE: 32949.9907, Val RMSE: 855.6383
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 36.1298, Val Loss: 36.1287.      Train RMSE: 24141.1940, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 13.5955, Val Loss: 14.2008.      Train RMSE: 3268906.8134, Val RMSE: 3268907.6153
Epoch 10.      Train Loss: 4.7686,

6it [4:27:28, 2382.42s/it]


📊 h: 10, batch_size:128, lr: 0.0001, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 67.0944, Val Loss: 67.0940.      Train RMSE: 26090.2213, Val RMSE: 34208.5893
Epoch 5.      Train Loss: 19.0201, Val Loss: 19.6249.      Train RMSE: 3268951.7213, Val RMSE: 3268907.6153
Epoch 10.      Train Loss: 5.7651, Val Loss: 5.1090.      Train RMSE: 22507.3357, Val RMSE: 1095.9491
Epoch 15.      Train Loss: 2.9742, Val Loss: 2.4107.      Train RMSE: 1794.8229, Val RMSE: 90.6431
Epoch 20.      Train Loss: 2.4096, Val Loss: 1.8190.      Train RMSE: 2794.7941, Val RMSE: 102.6285
Epoch 25.      Train Loss: 2.3895, Val Loss: 1.7251.      Train RMSE: 651.8258, Val RMSE: 65.1787
Epoch 30.      Train Loss: 2.3975, Val Loss: 1.7203.      Train RMSE: 385.1858, Val RMSE: 55.1889
Epoch 35.      Train Loss: 2.3828, Val Loss: 1.7323.      Train RMSE: 621.7877, Val RMSE: 66.4217
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 71.0599, Val Loss: 71.0588.  

7it [4:59:46, 2237.02s/it]


📊 h: 10, batch_size:128, lr: 0.0001, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 99.5438, Val Loss: 99.5434.      Train RMSE: 26090.2213, Val RMSE: 34208.5893
Epoch 5.      Train Loss: 26.7187, Val Loss: 27.3235.      Train RMSE: 3268951.7213, Val RMSE: 3268907.6153
Epoch 10.      Train Loss: 6.4684, Val Loss: 5.7901.      Train RMSE: 21082.1631, Val RMSE: 518.4248
Epoch 15.      Train Loss: 3.1755, Val Loss: 2.5732.      Train RMSE: 14723.0258, Val RMSE: 191.5680
Epoch 20.      Train Loss: 2.4612, Val Loss: 1.8810.      Train RMSE: 66.6935, Val RMSE: 32.6069
Epoch 25.      Train Loss: 2.4032, Val Loss: 1.9664.      Train RMSE: 21.8992, Val RMSE: 22.8434
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 106.1529, Val Loss: 106.1519.      Train RMSE: 24141.1940, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 33.8267, Val Loss: 34.4312.      Train RMSE: 3268769.5198, Val RMSE: 3268907.6153
Epoch 10.      Train Loss: 11.1050, Val 

8it [5:27:26, 2053.43s/it]


📊 h: 10, batch_size:128, lr: 0.0005, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 7.9921, Val Loss: 8.5970.      Train RMSE: 3268742.2529, Val RMSE: 3268697.9837
Epoch 5.      Train Loss: 2.4916, Val Loss: 2.3034.      Train RMSE: 52713.1285, Val RMSE: 7405.5568
Epoch 10.      Train Loss: 2.4819, Val Loss: 2.2442.      Train RMSE: 64684.3373, Val RMSE: 22702.5010
Epoch 15.      Train Loss: 18.4384, Val Loss: 18.4185.      Train RMSE: 17.4302, Val RMSE: 15.0717
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 19.0588, Val Loss: 19.0657.      Train RMSE: 94884.2900, Val RMSE: 50449.0022
Epoch 5.      Train Loss: 2.4968, Val Loss: 2.2594.      Train RMSE: 54626.8142, Val RMSE: 11167.2496
Epoch 10.      Train Loss: 2.4818, Val Loss: 2.2397.      Train RMSE: 53241.3530, Val RMSE: 10699.7413
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 3.0737, Val Loss: 2.9015.      Train RMSE: 276555.4267, Val

9it [5:42:05, 1686.25s/it]


📊 h: 10, batch_size:128, lr: 0.0005, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 12.6594, Val Loss: 13.2644.      Train RMSE: 3268742.2529, Val RMSE: 3268697.9837
Epoch 5.      Train Loss: 2.8297, Val Loss: 3.6512.      Train RMSE: 89.7619, Val RMSE: 61.7254
Epoch 10.      Train Loss: 2.8019, Val Loss: 3.2421.      Train RMSE: 449.1404, Val RMSE: 132.0141
Epoch 15.      Train Loss: 3.5050, Val Loss: 4.5740.      Train RMSE: 16.9418, Val RMSE: 14.5999
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 24.8824, Val Loss: 24.8893.      Train RMSE: 94884.3007, Val RMSE: 50449.0174
Epoch 5.      Train Loss: 7.5907, Val Loss: 8.1959.      Train RMSE: 3269011.7044, Val RMSE: 3269012.5063
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 5.1148, Val Loss: 5.3030.      Train RMSE: 126201.3994, Val RMSE: 127366.2511
Epoch 5.      Train Loss: 2.4270, Val Loss: 2.0709.      Train RMSE: 37.8255, Val RMSE: 25.

10it [5:52:47, 1363.79s/it]


📊 h: 10, batch_size:128, lr: 0.0005, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 18.7746, Val Loss: 17.2867.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 5.      Train Loss: 2.3943, Val Loss: 1.7685.      Train RMSE: 46.7717, Val RMSE: 28.6892
Epoch 10.      Train Loss: 2.9491, Val Loss: 3.8602.      Train RMSE: 47.0645, Val RMSE: 30.9748
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 23.2210, Val Loss: 23.8261.      Train RMSE: 3268891.8974, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 3.0263, Val Loss: 3.9298.      Train RMSE: 47.3855, Val RMSE: 34.4612
Epoch 10.      Train Loss: 2.4148, Val Loss: 1.8757.      Train RMSE: 30.6925, Val RMSE: 19.8919
Epoch 15.      Train Loss: 2.9485, Val Loss: 3.8965.      Train RMSE: 39.1634, Val RMSE: 25.3606
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 7.4505, Val Loss: 8.0912.      Train RMSE: 52593.4457, Val RMSE: 43858.6559
Epoch 5.      

11it [6:05:12, 1174.61s/it]


📊 h: 10, batch_size:128, lr: 0.0005, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 26.3883, Val Loss: 24.9004.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 5.      Train Loss: 2.5254, Val Loss: 2.0483.      Train RMSE: 39.6204, Val RMSE: 25.7379
Epoch 10.      Train Loss: 2.4492, Val Loss: 1.8228.      Train RMSE: 24.8438, Val RMSE: 16.9739
Epoch 15.      Train Loss: 2.4804, Val Loss: 1.9549.      Train RMSE: 25.8838, Val RMSE: 17.6929
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 33.7198, Val Loss: 34.3249.      Train RMSE: 3268891.8974, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 3.1562, Val Loss: 2.5029.      Train RMSE: 29.8565, Val RMSE: 23.4263
Epoch 10.      Train Loss: 2.5283, Val Loss: 2.0927.      Train RMSE: 24.7007, Val RMSE: 17.3693
Epoch 15.      Train Loss: 2.4500, Val Loss: 1.8388.      Train RMSE: 32.4772, Val RMSE: 21.9345
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train

12it [6:19:44, 1082.38s/it]


📊 h: 10, batch_size:128, lr: 0.001, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 7.5986, Val Loss: 8.2035.      Train RMSE: 3268966.7973, Val RMSE: 3269012.5063
Epoch 5.      Train Loss: 2.4818, Val Loss: 2.2336.      Train RMSE: 50471.6646, Val RMSE: 7450.1976
Epoch 10.      Train Loss: 7.4746, Val Loss: 8.0798.      Train RMSE: 3269011.7044, Val RMSE: 3269012.5063
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 7.7056, Val Loss: 8.3108.      Train RMSE: 3268996.6285, Val RMSE: 3269012.5063
Epoch 5.      Train Loss: 2.4819, Val Loss: 2.2567.      Train RMSE: 49164.4750, Val RMSE: 6030.3585
Epoch 10.      Train Loss: 2.4821, Val Loss: 2.2313.      Train RMSE: 55843.7019, Val RMSE: 13635.9935
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 7.6509, Val Loss: 8.2564.      Train RMSE: 3268951.7213, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 7.4787, Val Loss: 8.0840.      Train RMSE: 32690

13it [6:31:23, 966.37s/it] 


📊 h: 10, batch_size:128, lr: 0.001, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 8.7373, Val Loss: 9.3422.      Train RMSE: 3268966.7973, Val RMSE: 3269012.5063
Epoch 5.      Train Loss: 7.5228, Val Loss: 8.1280.      Train RMSE: 3269011.3836, Val RMSE: 3269012.5063
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 9.8032, Val Loss: 10.4083.      Train RMSE: 3268996.6285, Val RMSE: 3269012.5063
Epoch 5.      Train Loss: 2.8016, Val Loss: 3.1877.      Train RMSE: 764.8955, Val RMSE: 146.9589
Epoch 10.      Train Loss: 2.8034, Val Loss: 3.1550.      Train RMSE: 1195.3352, Val RMSE: 156.7029
Epoch 15.      Train Loss: 2.8030, Val Loss: 3.1582.      Train RMSE: 21386.8677, Val RMSE: 275.4210
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 3.0937, Val Loss: 3.6035.      Train RMSE: 17291.7201, Val RMSE: 406.2484
Epoch 5.      Train Loss: 7.4134, Val Loss: 5.9227.      Train RMSE: 17.4283, Val RMSE: 

14it [6:43:07, 887.08s/it]


📊 h: 10, batch_size:128, lr: 0.001, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 8.7314, Val Loss: 7.2407.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 5.      Train Loss: 2.9510, Val Loss: 3.8524.      Train RMSE: 50.0820, Val RMSE: 32.9926
Epoch 10.      Train Loss: 2.9890, Val Loss: 3.8583.      Train RMSE: 50.8430, Val RMSE: 36.1062
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 6.7570, Val Loss: 7.5414.      Train RMSE: 69.1503, Val RMSE: 66.3323
Epoch 5.      Train Loss: 7.3565, Val Loss: 5.8659.      Train RMSE: 17.4283, Val RMSE: 15.0702
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 7.8345, Val Loss: 6.3440.      Train RMSE: 13992.4168, Val RMSE: 15.0702
Epoch 5.      Train Loss: 2.9486, Val Loss: 3.8687.      Train RMSE: 44.8619, Val RMSE: 29.8554
Epoch 10.      Train Loss: 7.5131, Val Loss: 8.1183.      Train RMSE: 3269011.7044, Val RMSE: 3269012.5063
Early stopping triggere

15it [6:57:28, 879.14s/it]


📊 h: 10, batch_size:128, lr: 0.001, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 8.5913, Val Loss: 7.1006.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 5.      Train Loss: 3.0338, Val Loss: 4.0063.      Train RMSE: 35.4024, Val RMSE: 23.1083
Epoch 10.      Train Loss: 3.0353, Val Loss: 3.9985.      Train RMSE: 37.1176, Val RMSE: 24.0744
Epoch 15.      Train Loss: 3.0328, Val Loss: 4.0245.      Train RMSE: 31.8512, Val RMSE: 20.6032
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 8.7031, Val Loss: 9.5352.      Train RMSE: 60.0884, Val RMSE: 57.8389
Epoch 5.      Train Loss: 3.0327, Val Loss: 4.0103.      Train RMSE: 34.2949, Val RMSE: 22.1758
Epoch 10.      Train Loss: 2.9925, Val Loss: 2.1175.      Train RMSE: 17.1086, Val RMSE: 14.7654
Epoch 15.      Train Loss: 3.0345, Val Loss: 4.0380.      Train RMSE: 29.5189, Val RMSE: 19.2690
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 3.4234, 

16it [7:23:08, 1078.14s/it]


📊 h: 10, batch_size:256, lr: 5e-05, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 20.4715, Val Loss: 20.4719.      Train RMSE: 26090.1379, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 19.9388, Val Loss: 19.9384.      Train RMSE: 26090.2078, Val RMSE: 34208.5893
Epoch 10.      Train Loss: 19.4532, Val Loss: 19.4522.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 15.      Train Loss: 8.0458, Val Loss: 6.5559.      Train RMSE: 68.9020, Val RMSE: 15.0702
Epoch 20.      Train Loss: 7.8074, Val Loss: 6.3186.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 25.      Train Loss: 7.6402, Val Loss: 6.1506.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 30.      Train Loss: 2.5927, Val Loss: 1.8788.      Train RMSE: 22592.4759, Val RMSE: 249.7546
Epoch 35.      Train Loss: 2.3997, Val Loss: 1.7049.      Train RMSE: 20092.0577, Val RMSE: 699.6084
Epoch 40.      Train Loss: 2.3213, Val Loss: 1.6696.      Train RMSE: 33497.8256, Val RMSE: 2312.7356

🌱 Trainin

17it [8:20:53, 1795.90s/it]


📊 h: 10, batch_size:256, lr: 5e-05, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 38.9321, Val Loss: 38.9324.      Train RMSE: 26090.1404, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 33.6045, Val Loss: 33.6041.      Train RMSE: 26090.2090, Val RMSE: 34208.5893
Epoch 10.      Train Loss: 28.7497, Val Loss: 28.7487.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 15.      Train Loss: 14.2490, Val Loss: 12.7597.      Train RMSE: 68.9020, Val RMSE: 15.0702
Epoch 20.      Train Loss: 11.8456, Val Loss: 10.3555.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 25.      Train Loss: 10.2036, Val Loss: 8.7120.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 30.      Train Loss: 4.3794, Val Loss: 3.7135.      Train RMSE: 20863.6771, Val RMSE: 67.2791
Epoch 35.      Train Loss: 4.0092, Val Loss: 3.3841.      Train RMSE: 20025.0587, Val RMSE: 860.8380
Epoch 40.      Train Loss: 3.5470, Val Loss: 2.9170.      Train RMSE: 17794.6347, Val RMSE: 593.6912

🌱 Traini

18it [8:56:55, 1905.94s/it]


📊 h: 10, batch_size:256, lr: 5e-05, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 79.9163, Val Loss: 79.9167.      Train RMSE: 26090.1404, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 63.3188, Val Loss: 63.3184.      Train RMSE: 26090.2078, Val RMSE: 34208.5893
Epoch 10.      Train Loss: 47.9267, Val Loss: 47.9257.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 15.      Train Loss: 26.2846, Val Loss: 26.8895.      Train RMSE: 3268787.0026, Val RMSE: 3268697.9837
Epoch 20.      Train Loss: 19.0697, Val Loss: 19.6734.      Train RMSE: 3268360.3315, Val RMSE: 3268907.6153
Epoch 25.      Train Loss: 14.4047, Val Loss: 15.0092.      Train RMSE: 3268981.5526, Val RMSE: 3269012.5063
Epoch 30.      Train Loss: 8.1657, Val Loss: 9.2370.      Train RMSE: 31469.9125, Val RMSE: 35929.1000
Epoch 35.      Train Loss: 6.5401, Val Loss: 5.8327.      Train RMSE: 18412.5470, Val RMSE: 666.0637
Epoch 40.      Train Loss: 5.3280, Val Loss: 4.6889.      Train RMSE: 17638.

19it [9:30:11, 1932.96s/it]


📊 h: 10, batch_size:256, lr: 5e-05, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 120.9137, Val Loss: 120.9140.      Train RMSE: 26090.1404, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 93.2512, Val Loss: 93.2508.      Train RMSE: 26090.2078, Val RMSE: 34208.5893
Epoch 10.      Train Loss: 67.5977, Val Loss: 67.5966.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 15.      Train Loss: 38.8256, Val Loss: 39.4305.      Train RMSE: 3268787.0026, Val RMSE: 3268697.9837
Epoch 20.      Train Loss: 26.8009, Val Loss: 27.4045.      Train RMSE: 3268357.6044, Val RMSE: 3268907.6153
Epoch 25.      Train Loss: 18.9483, Val Loss: 19.5528.      Train RMSE: 3268981.5526, Val RMSE: 3269012.5063
Epoch 30.      Train Loss: 10.4985, Val Loss: 9.8565.      Train RMSE: 6037.0185, Val RMSE: 47.7102
Epoch 35.      Train Loss: 8.7196, Val Loss: 8.0014.      Train RMSE: 25068.4005, Val RMSE: 1462.6384
Epoch 40.      Train Loss: 6.6265, Val Loss: 5.9883.      Train RMSE: 27264.

20it [10:04:14, 1965.85s/it]


📊 h: 10, batch_size:256, lr: 0.0001, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 20.3227, Val Loss: 20.3230.      Train RMSE: 26090.2348, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 19.4680, Val Loss: 19.4670.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 7.9592, Val Loss: 8.5643.      Train RMSE: 3268742.0925, Val RMSE: 3268593.2431
Epoch 15.      Train Loss: 7.6779, Val Loss: 8.2823.      Train RMSE: 3268921.8897, Val RMSE: 3268907.6153
Epoch 20.      Train Loss: 2.6564, Val Loss: 2.9457.      Train RMSE: 31053.6826, Val RMSE: 1407.4275
Epoch 25.      Train Loss: 2.3768, Val Loss: 1.6415.      Train RMSE: 40601.0018, Val RMSE: 2914.3982
Epoch 30.      Train Loss: 2.3363, Val Loss: 1.5751.      Train RMSE: 45607.0451, Val RMSE: 4091.9438
Epoch 35.      Train Loss: 2.1755, Val Loss: 1.4966.      Train RMSE: 101192.8193, Val RMSE: 104961.6975
Epoch 40.      Train Loss: 2.1586, Val Loss: 1.4518.      Train RMSE: 104475.4180, V

21it [10:36:15, 1952.63s/it]


📊 h: 10, batch_size:256, lr: 0.0001, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 37.4434, Val Loss: 37.4437.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 28.8976, Val Loss: 28.8966.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 12.2835, Val Loss: 12.8881.      Train RMSE: 3268891.8974, Val RMSE: 3268802.8814
Epoch 15.      Train Loss: 9.5001, Val Loss: 10.1047.      Train RMSE: 3268966.7973, Val RMSE: 3269012.5063
Epoch 20.      Train Loss: 3.2041, Val Loss: 2.6407.      Train RMSE: 13571.7613, Val RMSE: 341.7332
Epoch 25.      Train Loss: 2.8472, Val Loss: 2.1797.      Train RMSE: 20431.2999, Val RMSE: 651.9859
Epoch 30.      Train Loss: 2.5018, Val Loss: 1.9199.      Train RMSE: 26128.0857, Val RMSE: 612.4619
Epoch 35.      Train Loss: 2.3426, Val Loss: 1.6718.      Train RMSE: 28985.3348, Val RMSE: 618.3293
Epoch 40.      Train Loss: 2.8210, Val Loss: 3.6354.      Train RMSE: 2945.5938, Val RMSE:

22it [11:04:13, 1870.21s/it]


📊 h: 10, batch_size:256, lr: 0.0001, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 75.3433, Val Loss: 75.3436.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 47.9776, Val Loss: 47.9766.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 19.0221, Val Loss: 19.6265.      Train RMSE: 3268951.5609, Val RMSE: 3268907.6153
Epoch 15.      Train Loss: 6.8750, Val Loss: 6.1620.      Train RMSE: 13861.7436, Val RMSE: 15.2620
Epoch 20.      Train Loss: 5.6992, Val Loss: 5.1193.      Train RMSE: 21446.9847, Val RMSE: 877.1610
Epoch 25.      Train Loss: 4.0690, Val Loss: 3.3908.      Train RMSE: 20423.8418, Val RMSE: 447.4381
Epoch 30.      Train Loss: 2.8673, Val Loss: 2.2515.      Train RMSE: 12576.2678, Val RMSE: 187.8165
Epoch 35.      Train Loss: 2.4380, Val Loss: 1.7933.      Train RMSE: 3446.2615, Val RMSE: 104.5026
Epoch 40.      Train Loss: 2.3877, Val Loss: 1.7483.      Train RMSE: 1585.0720, Val RMSE: 86.6188


23it [11:42:03, 1990.17s/it]


📊 h: 10, batch_size:256, lr: 0.0001, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 113.2919, Val Loss: 113.2923.      Train RMSE: 26090.2360, Val RMSE: 22021.9251
Epoch 5.      Train Loss: 67.6825, Val Loss: 67.6815.      Train RMSE: 27903.2110, Val RMSE: 43075.3996
Epoch 10.      Train Loss: 26.6634, Val Loss: 27.2678.      Train RMSE: 3268951.5609, Val RMSE: 3268907.6153
Epoch 15.      Train Loss: 9.4503, Val Loss: 10.5619.      Train RMSE: 2292.0388, Val RMSE: 18.9440
Epoch 20.      Train Loss: 6.1481, Val Loss: 5.4778.      Train RMSE: 21080.5981, Val RMSE: 420.3572
Epoch 25.      Train Loss: 4.3588, Val Loss: 3.6982.      Train RMSE: 22005.0147, Val RMSE: 319.2543
Epoch 30.      Train Loss: 3.2611, Val Loss: 4.2600.      Train RMSE: 66.4326, Val RMSE: 35.8851
Epoch 35.      Train Loss: 2.4946, Val Loss: 1.8560.      Train RMSE: 445.8572, Val RMSE: 55.7967
Epoch 40.      Train Loss: 2.4590, Val Loss: 1.8008.      Train RMSE: 65.6301, Val RMSE: 30.5736

🌱 Tra

24it [12:09:27, 1886.21s/it]


📊 h: 10, batch_size:256, lr: 0.0005, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 19.4769, Val Loss: 19.4758.      Train RMSE: 26090.1943, Val RMSE: 43075.3996
Epoch 5.      Train Loss: 7.5277, Val Loss: 8.1318.      Train RMSE: 3268936.6451, Val RMSE: 3268907.6153
Epoch 10.      Train Loss: 2.4950, Val Loss: 2.3390.      Train RMSE: 52927.6179, Val RMSE: 8533.6023
Epoch 15.      Train Loss: 2.4813, Val Loss: 2.2348.      Train RMSE: 54719.4643, Val RMSE: 13534.6967
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 19.6040, Val Loss: 19.6031.      Train RMSE: 27903.1124, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 2.8073, Val Loss: 3.3156.      Train RMSE: 51340.5870, Val RMSE: 10952.5811
Epoch 10.      Train Loss: 7.3876, Val Loss: 5.8981.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 15.      Train Loss: 2.4820, Val Loss: 2.2452.      Train RMSE: 53005.0636, Val RMSE: 11516.1237
Epoch 20.      Train Loss: 2.4812, Val 

25it [12:18:48, 1488.58s/it]


📊 h: 10, batch_size:256, lr: 0.0005, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 28.9853, Val Loss: 28.9842.      Train RMSE: 26090.1943, Val RMSE: 43075.3996
Epoch 5.      Train Loss: 3.3869, Val Loss: 4.2703.      Train RMSE: 11911.0060, Val RMSE: 233.9440
Epoch 10.      Train Loss: 2.8021, Val Loss: 3.2214.      Train RMSE: 512.8055, Val RMSE: 135.4022
Epoch 15.      Train Loss: 2.3389, Val Loss: 1.8477.      Train RMSE: 28688.3123, Val RMSE: 535.2252
Epoch 20.      Train Loss: 2.3134, Val Loss: 1.7778.      Train RMSE: 39663.6031, Val RMSE: 1400.6666
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 30.2586, Val Loss: 30.2576.      Train RMSE: 27903.1124, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 4.1680, Val Loss: 4.8576.      Train RMSE: 590.4712, Val RMSE: 109.6564
Epoch 10.      Train Loss: 7.4640, Val Loss: 5.9745.      Train RMSE: 17.4283, Val RMSE: 15.0702
Early stopping triggered.

🌱 Training ensemble model 3/3


26it [12:25:41, 1165.88s/it]


📊 h: 10, batch_size:256, lr: 0.0005, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 47.9948, Val Loss: 47.9938.      Train RMSE: 27903.2133, Val RMSE: 43075.3996
Epoch 5.      Train Loss: 4.5312, Val Loss: 3.8823.      Train RMSE: 17745.6239, Val RMSE: 36.2151
Epoch 10.      Train Loss: 2.7375, Val Loss: 2.1274.      Train RMSE: 12765.1664, Val RMSE: 169.3573
Epoch 15.      Train Loss: 2.3858, Val Loss: 1.7349.      Train RMSE: 1094.3165, Val RMSE: 75.5994
Epoch 20.      Train Loss: 2.9476, Val Loss: 3.8805.      Train RMSE: 41.8192, Val RMSE: 27.1829
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 51.9672, Val Loss: 51.9662.      Train RMSE: 27903.1124, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 5.0850, Val Loss: 5.8867.      Train RMSE: 64.5910, Val RMSE: 59.2530
Epoch 10.      Train Loss: 3.0670, Val Loss: 2.5271.      Train RMSE: 38.7578, Val RMSE: 31.5656
Epoch 15.      Train Loss: 2.9613, Val Loss: 3.8828.      Train R

27it [12:34:49, 980.35s/it] 


📊 h: 10, batch_size:256, lr: 0.0005, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 67.7112, Val Loss: 67.7102.      Train RMSE: 27903.2133, Val RMSE: 43075.3996
Epoch 5.      Train Loss: 3.5055, Val Loss: 3.0296.      Train RMSE: 1600.9967, Val RMSE: 15.0414
Epoch 10.      Train Loss: 2.4568, Val Loss: 1.8636.      Train RMSE: 26.0961, Val RMSE: 17.8783
Epoch 15.      Train Loss: 3.0321, Val Loss: 4.0252.      Train RMSE: 31.4933, Val RMSE: 20.3810
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 74.3318, Val Loss: 74.3309.      Train RMSE: 27903.1124, Val RMSE: 50405.7774
Epoch 5.      Train Loss: 5.7140, Val Loss: 6.5870.      Train RMSE: 52.9187, Val RMSE: 47.6646
Epoch 10.      Train Loss: 3.1935, Val Loss: 2.5691.      Train RMSE: 29.8331, Val RMSE: 23.4061
Epoch 15.      Train Loss: 2.4860, Val Loss: 1.7901.      Train RMSE: 23.8471, Val RMSE: 16.3296
Epoch 20.      Train Loss: 3.0320, Val Loss: 4.0152.      Train RMSE: 33

28it [12:43:31, 842.86s/it]


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:0.1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 7.9965, Val Loss: 8.6016.      Train RMSE: 3268936.6451, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 2.4820, Val Loss: 2.2428.      Train RMSE: 52105.8639, Val RMSE: 8694.7239
Epoch 10.      Train Loss: 7.4729, Val Loss: 8.0777.      Train RMSE: 3269011.7044, Val RMSE: 3269012.5063
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 8.1214, Val Loss: 8.7261.      Train RMSE: 3268906.6530, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 2.5219, Val Loss: 2.4728.      Train RMSE: 47562.4161, Val RMSE: 4374.8554
Epoch 10.      Train Loss: 2.4812, Val Loss: 2.2308.      Train RMSE: 56528.3147, Val RMSE: 13143.9665
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 3.0137, Val Loss: 2.8409.      Train RMSE: 252497.6420, Val RMSE: 214115.2880
Epoch 5.      Train Loss: 7.5089, Val Loss: 8.1136.      Train RMSE: 3268981

29it [12:48:05, 672.19s/it]


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:1

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 12.6686, Val Loss: 13.2736.      Train RMSE: 3268921.7293, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 2.8121, Val Loss: 3.4160.      Train RMSE: 517.9571, Val RMSE: 115.5022
Epoch 10.      Train Loss: 2.8081, Val Loss: 3.1553.      Train RMSE: 626.7453, Val RMSE: 146.4218
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 13.9501, Val Loss: 14.5545.      Train RMSE: 3268861.7445, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 7.5284, Val Loss: 6.0372.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 10.      Train Loss: 2.8012, Val Loss: 3.1983.      Train RMSE: 725.7118, Val RMSE: 143.4726
Epoch 15.      Train Loss: 7.3632, Val Loss: 5.8718.      Train RMSE: 17.4283, Val RMSE: 15.0702
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.      Train Loss: 5.0277, Val Loss: 5.2115.      Train RMSE: 117365.3736, Val RMSE: 115006.10

30it [12:55:57, 612.19s/it]


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:3

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 18.8592, Val Loss: 19.4642.      Train RMSE: 3268936.6451, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 7.3868, Val Loss: 5.8960.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 10.      Train Loss: 2.9485, Val Loss: 3.8625.      Train RMSE: 46.0582, Val RMSE: 30.2357
Epoch 15.      Train Loss: 2.9636, Val Loss: 3.8672.      Train RMSE: 49.6469, Val RMSE: 34.7105
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 23.1928, Val Loss: 23.7974.      Train RMSE: 3268891.8974, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 3.0498, Val Loss: 3.9542.      Train RMSE: 46.7751, Val RMSE: 34.4832
Epoch 10.      Train Loss: 2.9487, Val Loss: 3.8797.      Train RMSE: 42.2306, Val RMSE: 27.4681
Epoch 15.      Train Loss: 2.9508, Val Loss: 3.8894.      Train RMSE: 40.5996, Val RMSE: 26.3837
Early stopping triggered.

🌱 Training ensemble model 3/3
Epoch 1.  

31it [13:02:14, 541.68s/it]


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:5

🌱 Training ensemble model 1/3
Epoch 1.      Train Loss: 26.4498, Val Loss: 27.0548.      Train RMSE: 3268936.6451, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 7.4777, Val Loss: 8.0823.      Train RMSE: 3269011.3836, Val RMSE: 3269012.5063
Epoch 10.      Train Loss: 3.0335, Val Loss: 4.0059.      Train RMSE: 35.0410, Val RMSE: 22.7617
Epoch 15.      Train Loss: 3.0320, Val Loss: 4.0233.      Train RMSE: 31.8013, Val RMSE: 20.5744
Early stopping triggered.

🌱 Training ensemble model 2/3
Epoch 1.      Train Loss: 33.6719, Val Loss: 34.2765.      Train RMSE: 3268891.8974, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 3.0821, Val Loss: 4.0470.      Train RMSE: 34.8480, Val RMSE: 24.0935
Epoch 10.      Train Loss: 3.0608, Val Loss: 4.0002.      Train RMSE: 37.6673, Val RMSE: 25.3910
Epoch 15.      Train Loss: 3.0330, Val Loss: 4.0205.      Train RMSE: 31.9824, Val RMSE: 20.6636
Early stopping triggered.

🌱 Training ensemble model 3/3


32it [13:09:00, 1479.39s/it]


In [124]:
# Directory containing your saved results
input_dir = 'validation_results'

# Prepare a list to collect all results
all_results = []
existing_results = []

# Loop over all files in the directory
for filename in os.listdir(input_dir):
    if filename.endswith('.json'):
        filepath = os.path.join(input_dir, filename)
        
        # Open and load the JSON file
        with open(filepath, 'r') as f:
            result = json.load(f)
        
        # Parse hyperparameters from the filename
        name_parts = filename.replace('.json', '').split('_')
        h = int(name_parts[0][1:])  # strip the 'h'
        batch_size = int(name_parts[1][5:])  # strip 'batch'
        lr = float(name_parts[2][2:].replace('e', 'e'))  # scientific notation stays
        l2_lambda = float(name_parts[3][6:])  # strip 'lambda'

        existing_results.append([h,batch_size,lr,l2_lambda])
        
        # Combine hyperparameters and results
        entry = {
            'h': h,
            'batch_size': batch_size,
            'lr': lr,
            'l2_lambda': l2_lambda,
            **result  # unpack the RMSE, MAE, etc.
        }
        all_results.append(entry)

# Now `all_results` is a list of dictionaries
print(all_results)

[{'h': 10, 'batch_size': 128, 'lr': 0.001, 'l2_lambda': 0.1, 'rmse': 1090049.2927131238, 'mae': 1090033.8120599193, 'val_losses': [2.2335792174104783, 2.2248938916648022, 2.2156353583101365]}, {'h': 10, 'batch_size': 128, 'lr': 0.001, 'l2_lambda': 1.0, 'rmse': 147.9306646846928, 'mae': 91.75115358035862, 'val_losses': [3.205442184307536, 3.155019033150595, 3.1157379365358198]}, {'h': 10, 'batch_size': 128, 'lr': 0.001, 'l2_lambda': 3.0, 'rmse': 1089672.4822779745, 'mae': 1089672.4821690312, 'val_losses': [3.8523679932609936, 3.8758978902316485, 3.868692558319842]}, {'h': 10, 'batch_size': 128, 'lr': 0.001, 'l2_lambda': 5.0, 'rmse': 15.735735470120613, 'mae': 5.61969919791881, 'val_losses': [3.9867418515877646, 2.1175166185731524, 3.9932443470251364]}, {'h': 10, 'batch_size': 128, 'lr': 0.0001, 'l2_lambda': 0.1, 'rmse': 34193.6550314017, 'mae': 1176.888619656484, 'val_losses': [1.4340720767246895, 1.4380639163685627, 2.084253795933528]}, {'h': 10, 'batch_size': 128, 'lr': 0.0001, 'l2_la

In [125]:
pd.DataFrame(all_results).sort_values("rmse")

Unnamed: 0,h,batch_size,lr,l2_lambda,rmse,mae,val_losses
3,10,128,0.001,5.0,15.73574,5.619699,"[3.9867418515877646, 2.1175166185731524, 3.993..."
19,10,256,0.001,5.0,16.41833,5.853721,"[3.767888710147045, 3.9999983037104374, 1.9362..."
10,10,128,0.0005,3.0,24.74484,7.08983,"[1.7247410456909509, 1.8098387199408206, 1.727..."
23,10,256,0.0001,5.0,30.59022,26.34872,"[1.8007626371061216, 2.9390000163531695, 2.006..."
26,10,256,0.0005,3.0,33.02706,9.76857,"[1.7297815034379724, 2.052061457125867, 1.7190..."
15,10,128,5e-05,5.0,37.21788,35.54003,"[1.8198008092089755, 3.408201170260789, 2.0205..."
27,10,256,0.0005,5.0,37.87881,35.25905,"[1.8094378482611453, 1.7900777059744617, 1.958..."
14,10,128,5e-05,3.0,40.15474,17.68114,"[1.7360578602576842, 2.8795792064705834, 1.947..."
9,10,128,0.0005,1.0,40.20711,30.84474,"[3.2421056198292093, 3.493430658442075, 2.0708..."
22,10,256,0.0001,3.0,42.1265,10.91575,"[1.7482722812011593, 2.5364784168415384, 1.911..."


In [None]:
h_list = [10]#[4, 6, 8, 10]
batch_size_list = [64, 128, 256]
lr_list = [5e-5, 1e-4, 5e-4, 1e-3]
l2_lambda_list = [0.1, 1, 3, 5]

d1=source1_tensor.shape[1]  # number of features of the 1st source
d2=source2_tensor.shape[1]  # number of features of the 2nd source

# Make sure the folder exists
output_dir = 'validation_results'
os.makedirs(output_dir, exist_ok=True)

# printing such hyperparams combination that are not already processed
for h, batch_size, lr, l2_lambda in tqdm.tqdm(itertools.product(h_list, batch_size_list, lr_list, l2_lambda_list)):
    if [h, batch_size, lr, l2_lambda] in existing_results:
        continue
    
    print(h, batch_size, lr, l2_lambda)

48it [00:00, ?it/s]


## Training final model

In [105]:
h, batch_size, lr, l2_lambda = 10, 256, 1e-3, 5

train_loader, val_loader, test_loader = create_datasets(source1_array, source2_array, target_array, weight_array, batch_size, h)

print(f"\n📊 h: {h}, batch_size:{batch_size}, lr: {lr}, l2_lambda:{l2_lambda}")

ensemble, losses = train_tme_ensemble(
    train_loader=train_loader,
    val_loader=val_loader,
    d1=d1,  # number of features of the 1st source
    d2=d2,  # number of features of the 2nd source
    h=h,  # lag length
    num_models=20,#20,
    device=device,
    lr=lr,
    weight_decay=0.1,
    l2_lambda=l2_lambda,
    max_epochs=100,
    patience=10,
    adam=True
)

y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, test_loader, device='cpu')


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:5

🌱 Training ensemble model 1/20


KeyboardInterrupt: 

In [None]:
# y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, test_loader, device='cpu')

In [91]:
print(rmse, mae)

106.65681468511072 89.68795543227324


In [92]:
y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, val_loader, device='cpu')

In [93]:
print(rmse, mae)

107.48528647251614 90.31830594075106


## Direct target

In [106]:
h, batch_size, lr, l2_lambda = 10, 256, 1e-3, 5

train_loader, val_loader, test_loader = create_datasets(source1_array, source2_array, target_direct_array, weight_array, batch_size, h)

print(f"\n📊 h: {h}, batch_size:{batch_size}, lr: {lr}, l2_lambda:{l2_lambda}")

ensemble, losses = train_tme_ensemble(
    train_loader=train_loader,
    val_loader=val_loader,
    d1=d1,  # number of features of the 1st source
    d2=d2,  # number of features of the 2nd source
    h=h,  # lag length
    num_models=8,#20,
    device=device,
    lr=lr,
    weight_decay=0.1,
    l2_lambda=l2_lambda,
    max_epochs=100,
    patience=10,
    adam=True,
    direct_target=True
)

y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, test_loader, device='cpu', direct_target=True)


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:5

🌱 Training ensemble model 1/8
Epoch 1.      Train Loss: 26.4498, Val Loss: 27.0549.      Train RMSE: 3268936.6451, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 7.3809, Val Loss: 5.8902.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 10.      Train Loss: 2.5106, Val Loss: 2.0291.      Train RMSE: 29.9533, Val RMSE: 19.5687
Early stopping triggered.

🌱 Training ensemble model 2/8
Epoch 1.      Train Loss: 33.6719, Val Loss: 34.2765.      Train RMSE: 3268891.8974, Val RMSE: 3268907.6153
Epoch 5.      Train Loss: 3.0258, Val Loss: 3.9920.      Train RMSE: 30.6748, Val RMSE: 19.6259
Epoch 10.      Train Loss: 2.4538, Val Loss: 1.8122.      Train RMSE: 22.5426, Val RMSE: 15.8390
Epoch 15.      Train Loss: 2.5813, Val Loss: 2.2480.      Train RMSE: 31.5759, Val RMSE: 21.8355
Early stopping triggered.

🌱 Training ensemble model 3/8
Epoch 1.      Train Loss: 10.4106, Val Loss: 11.2312.      Train RMSE: 44895.0321, Val RMSE: 31727.2839
Epo

In [107]:
print(rmse, mae)

1733.741818863391 1733.6873583279287


In [16]:
h, batch_size, lr, l2_lambda = 10, 256, 1e-3, 5

train_loader, val_loader, test_loader = create_datasets(source1_array, source2_array, target_direct_array, weight_array, batch_size, h)

d1=source1_tensor.shape[1]  # number of features of the 1st source
d2=source2_tensor.shape[1]  # number of features of the 2nd source

print(f"\n📊 h: {h}, batch_size:{batch_size}, lr: {lr}, l2_lambda:{l2_lambda}")

device = 'cuda' if torch.cuda.is_available() else 'cpu'

ensemble, losses = train_tme_ensemble(
    train_loader=train_loader,
    val_loader=val_loader,
    d1=d1,  # number of features of the 1st source
    d2=d2,  # number of features of the 2nd source
    h=h,  # lag length
    num_models=20,#20,
    device=device,
    lr=lr,
    weight_decay=0.1,
    l2_lambda=l2_lambda,
    max_epochs=100,
    patience=15,
    adam=True,
    direct_target=True
)

y_preds, y_preds_median, rmse, mae = evaluate_tme_ensemble(ensemble, test_loader, device='cpu', direct_target=True)


📊 h: 10, batch_size:256, lr: 0.001, l2_lambda:5

🌱 Training ensemble model 1/20
Epoch 1.      Train Loss: 26.4498, Val Loss: 27.0548.      Train RMSE: 3268936.6451, Val RMSE: 3268802.8814
Epoch 5.      Train Loss: 7.4777, Val Loss: 8.0823.      Train RMSE: 3269011.3836, Val RMSE: 3269012.5063
Epoch 10.      Train Loss: 3.0335, Val Loss: 4.0059.      Train RMSE: 35.0410, Val RMSE: 22.7617
Epoch 15.      Train Loss: 3.0320, Val Loss: 4.0233.      Train RMSE: 31.8013, Val RMSE: 20.5744
Epoch 20.      Train Loss: 7.3631, Val Loss: 5.8706.      Train RMSE: 17.4283, Val RMSE: 15.0702
Epoch 25.      Train Loss: 4.0139, Val Loss: 2.7961.      Train RMSE: 16.3452, Val RMSE: 14.0884
Epoch 30.      Train Loss: 3.0332, Val Loss: 4.0308.      Train RMSE: 30.3432, Val RMSE: 19.7020
Epoch 35.      Train Loss: 3.0329, Val Loss: 4.0135.      Train RMSE: 33.3686, Val RMSE: 21.5440
Epoch 40.      Train Loss: 3.0321, Val Loss: 4.0121.      Train RMSE: 33.7835, Val RMSE: 21.8402
Early stopping triggered.


In [14]:
print(rmse, mae)

17.02415103234464 5.254571181093764


In [129]:
print(rmse, mae)

18.311172156212304 8.250561849001937


In [17]:
print(rmse, mae)

18.311172156212304 8.250561849001937
