In [1]:
import torch
from utils import DataLoader, evaluate_model
from models import FFNNModel, BiLSTMModel
from matplotlib import pyplot as plt

In [2]:
import logging
logging.basicConfig(level=logging.INFO)

## Ablation Study: Impact of Inclusion of Temporal Axes

In [11]:
dl_without_axes = DataLoader(
    benchmark="benchmark_1",
    split=None,
    embedding="bert_cls", 
    max_len=128,
    include_axes=False,
    shuffle_axes=False,  
    normalization="zscore"
)

INFO:utils.dataloader:Initializing DataLoader...
INFO:utils.dataloader:Using device: cuda
INFO:utils.dataloader:Loading dataset for benchmark benchmark_1 with split None
INFO:utils.dataloader:No split specified; loading all splits (train, validation, test).
INFO:utils.dataloader:Dataset loaded successfully.
INFO:utils.dataloader:Initializing embedding for method bert_cls...
INFO:utils.dataloader:Initialized BERT model and tokenizer.
INFO:utils.dataloader:Embedding resources initialized.


In [None]:
data_without_axes = dl_without_axes.preprocess()
X_train, y_train = data_without_axes["train"]
X_valid, y_valid = data_without_axes["validation"]
X_test, y_test = data_without_axes["test"]

INFO:utils.dataloader:Starting preprocessing of dataset(s)...
INFO:utils.dataloader:Processing split: train with 878 samples...
INFO:utils.dataloader:Processing a single dataset split...
INFO:utils.dataloader:Feature vector shape without axes: torch.Size([878, 768])
INFO:utils.dataloader:Extracting and normalizing target values...
INFO:utils.dataloader:Normalizing target values...
INFO:utils.dataloader:Z-score parameters: mean=[ 5.44271868e+01  1.15348462e+01 -2.48804100e-02], std=[20.1219028   3.81394874  1.39502752]
INFO:utils.dataloader:Processing split: validation with 247 samples...
INFO:utils.dataloader:Processing a single dataset split...
INFO:utils.dataloader:Feature vector shape without axes: torch.Size([247, 768])
INFO:utils.dataloader:Extracting and normalizing target values...
INFO:utils.dataloader:Normalizing target values...
INFO:utils.dataloader:Z-score parameters: mean=[53.97165992 11.64925101  0.09862348], std=[21.3092882   3.78886061  1.38975187]
INFO:utils.dataloader

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


In [14]:
input_dim = X_train.shape[1]
print(f"Input dimension: {input_dim}")

Input dimension: 768


### FFNN Performance Comparison

In [20]:
ffnn_save_path = "baseline_weights/ablation_studies/ffnn_without_axes.pth"

# Get best config for FFNN (from grid search)
checkpoint = torch.load("baseline_weights/benchmark_1/ffnn.pth")
hyperparams = checkpoint["hyperparameters"]
print(f"Best hyperparameters: {hyperparams}")
ffnn = FFNNModel(
    input_dim,
    hidden_dim=hyperparams.get("hidden_dim"),
    dropout=hyperparams.get("dropout"),
    weight_decay=hyperparams.get("weight_decay"),
    l1=hyperparams.get("l1")
)

  checkpoint = torch.load("baseline_weights/benchmark_1/ffnn.pth")
INFO:models.ffnn:FFNN built with input_dim=768, hidden_dim=64, dropout=0.0, weight_decay=0.0, l1=0.001


Best hyperparameters: {'input_dim': 6912, 'hidden_dim': 64, 'dropout': 0.0, 'weight_decay': 0.0, 'l1': 0.001}


In [16]:
train_losses, valid_losses = ffnn.fit(train_data=(X_train.cpu().numpy(), y_train), valid_data=(X_valid.cpu().numpy(), y_valid), lr=1e-3, epochs=50, device=device)

INFO:models.ffnn:Loaded best model state based on validation loss.


In [17]:
test_preds = ffnn.evaluate(test_data=(X_test.cpu().numpy(), y_test), device=device)

In [18]:
mse, mae, r2, nll, crps = evaluate_model(y_test, test_preds)
print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}, NLL: {nll:.4f}, CRPS: {crps:.4f}")

MSE: 0.9368, MAE: 0.7531, R2: 0.0632, NLL: 1.3863, CRPS: 0.7531


### BiLSTM Performance Comparison

In [21]:
lstm_save_path = "baseline_weights/ablation_studies/lstm_without_axes.pth"

# Get best config for FFNN (from grid search)
checkpoint = torch.load("baseline_weights/benchmark_1/bilstm.pth")
hyperparams = checkpoint["hyperparameters"]
print(f"Best hyperparameters: {hyperparams}")
lstm = BiLSTMModel(
    input_dim,
    hidden_dim=hyperparams.get("hidden_dim"),
    bidirectional=hyperparams.get("bidirectional"),
)

  checkpoint = torch.load("baseline_weights/benchmark_1/bilstm.pth")
INFO:models.bilstm:BiLSTM built with input_dim=768, hidden_dim=64, bidirectional=True


Best hyperparameters: {'input_dim': 768, 'hidden_dim': 64, 'bidirectional': True}


In [22]:
train_losses, valid_losses = lstm.fit(train_data=(X_train.cpu().numpy(), y_train), valid_data=(X_valid.cpu().numpy(), y_valid), lr=1e-4, epochs=50, device=device)

INFO:models.bilstm:Loaded best BiLSTM model state based on validation loss.


In [23]:
test_preds = lstm.evaluate(test_data=(X_test.cpu().numpy(), y_test), device=device)

In [24]:
mse, mae, r2, nll, crps = evaluate_model(y_test, test_preds)
print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}, NLL: {nll:.4f}, CRPS: {crps:.4f}")

MSE: 0.9625, MAE: 0.7659, R2: 0.0375, NLL: 1.3998, CRPS: 0.7659


## Ablation Study: Impact of Shuffling of Temporal Axes

In [3]:
dl_shuffled_axes = DataLoader(
    benchmark="benchmark_1",
    split=None,
    embedding="bert_cls", 
    max_len=128,
    include_axes=True,
    shuffle_axes=True,  
    normalization="zscore"
)

INFO:utils.dataloader:Initializing DataLoader...
INFO:utils.dataloader:Using device: cuda
INFO:utils.dataloader:Loading dataset for benchmark benchmark_1 with split None
INFO:utils.dataloader:No split specified; loading all splits (train, validation, test).
INFO:utils.dataloader:Dataset loaded successfully.
INFO:utils.dataloader:Initializing embedding for method bert_cls...
INFO:utils.dataloader:Initialized BERT model and tokenizer.
INFO:utils.dataloader:Embedding resources initialized.


In [4]:
data_shuffled_axes = dl_shuffled_axes.preprocess()
X_train, y_train = data_shuffled_axes["train"]
X_valid, y_valid = data_shuffled_axes["validation"]
X_test, y_test = data_shuffled_axes["test"]

INFO:utils.dataloader:Starting preprocessing of dataset(s)...
INFO:utils.dataloader:Processing split: train with 878 samples (shuffle_axes=True)...
INFO:utils.dataloader:Processing a single dataset split...
INFO:utils.dataloader:Combined feature vector shape: torch.Size([878, 6912])
INFO:utils.dataloader:Extracting and normalizing target values...
INFO:utils.dataloader:Normalizing target values...
INFO:utils.dataloader:Z-score parameters: mean=[ 5.44271868e+01  1.15348462e+01 -2.48804100e-02], std=[20.1219028   3.81394874  1.39502752]
INFO:utils.dataloader:Processing split: validation with 247 samples (shuffle_axes=False)...
INFO:utils.dataloader:Processing a single dataset split...
INFO:utils.dataloader:Combined feature vector shape: torch.Size([247, 6912])
INFO:utils.dataloader:Extracting and normalizing target values...
INFO:utils.dataloader:Normalizing target values...
INFO:utils.dataloader:Z-score parameters: mean=[53.97165992 11.64925101  0.09862348], std=[21.3092882   3.78886061

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


In [6]:
input_dim = X_train.shape[1]
print(f"Input dimension: {input_dim}")

Input dimension: 6912


### FFNN Performance Comparison

In [7]:
ffnn_save_path = "baseline_weights/ablation_studies/ffnn_without_axes.pth"

# Get best config for FFNN (from grid search)
checkpoint = torch.load("baseline_weights/benchmark_1/ffnn.pth")
hyperparams = checkpoint["hyperparameters"]
print(f"Best hyperparameters: {hyperparams}")
ffnn = FFNNModel(
    input_dim,
    hidden_dim=hyperparams.get("hidden_dim"),
    dropout=hyperparams.get("dropout"),
    weight_decay=hyperparams.get("weight_decay"),
    l1=hyperparams.get("l1")
)

  checkpoint = torch.load("baseline_weights/benchmark_1/ffnn.pth")
INFO:models.ffnn:FFNN built with input_dim=6912, hidden_dim=64, dropout=0.0, weight_decay=0.0, l1=0.001


Best hyperparameters: {'input_dim': 6912, 'hidden_dim': 64, 'dropout': 0.0, 'weight_decay': 0.0, 'l1': 0.001}


In [8]:
train_losses, valid_losses = ffnn.fit(train_data=(X_train.cpu().numpy(), y_train), valid_data=(X_valid.cpu().numpy(), y_valid), lr=1e-3, epochs=50, device=device)

INFO:models.ffnn:Loaded best model state based on validation loss.


In [9]:
test_preds = ffnn.evaluate(test_data=(X_test.cpu().numpy(), y_test), device=device)

In [10]:
mse, mae, r2, nll, crps = evaluate_model(y_test, test_preds)
print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}, NLL: {nll:.4f}, CRPS: {crps:.4f}")

MSE: 0.9933, MAE: 0.7591, R2: 0.0067, NLL: 1.4156, CRPS: 0.7591


### BiLSTM Performance Comparison

In [11]:
lstm_save_path = "baseline_weights/ablation_studies/lstm_without_axes.pth"

# Get best config for FFNN (from grid search)
checkpoint = torch.load("baseline_weights/benchmark_1/bilstm.pth")
hyperparams = checkpoint["hyperparameters"]
print(f"Best hyperparameters: {hyperparams}")
lstm = BiLSTMModel(
    input_dim,
    hidden_dim=hyperparams.get("hidden_dim"),
    bidirectional=hyperparams.get("bidirectional"),
)

  checkpoint = torch.load("baseline_weights/benchmark_1/bilstm.pth")
INFO:models.bilstm:BiLSTM built with input_dim=6912, hidden_dim=64, bidirectional=True


Best hyperparameters: {'input_dim': 768, 'hidden_dim': 64, 'bidirectional': True}


In [12]:
train_losses, valid_losses = lstm.fit(train_data=(X_train.cpu().numpy(), y_train), valid_data=(X_valid.cpu().numpy(), y_valid), lr=1e-4, epochs=50, device=device)

INFO:models.bilstm:Loaded best BiLSTM model state based on validation loss.


In [13]:
test_preds = lstm.evaluate(test_data=(X_test.cpu().numpy(), y_test), device=device)

In [14]:
mse, mae, r2, nll, crps = evaluate_model(y_test, test_preds)
print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}, NLL: {nll:.4f}, CRPS: {crps:.4f}")

MSE: 1.0107, MAE: 0.7984, R2: -0.0107, NLL: 1.4243, CRPS: 0.7984
