In [1]:
import pandas as pd
import numpy as np
import setuptools
import openml
from sklearn.linear_model import LinearRegression 
import lightgbm as lgbm
import lightgbmlss
import optuna
from scipy.spatial.distance import mahalanobis
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process.kernels import Matern
from engression import engression, engression_bagged
import torch
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import mahalanobis
from scipy.stats import norm
from sklearn.metrics import mean_squared_error
from rtdl_revisiting_models import MLP, ResNet, FTTransformer
from properscoring import crps_gaussian, crps_ensemble
import random
import gpytorch
import tqdm.auto as tqdm
from lightgbmlss.model import *
from lightgbmlss.distributions.Gaussian import *
from drf import drf
import os
from pygam import LinearGAM, s, f
from utils import EarlyStopping, train, train_trans, train_no_early_stopping, train_trans_no_early_stopping, train_GP, ExactGPModel
from torch.utils.data import TensorDataset, DataLoader

SUITE_ID = 336 # Regression on numerical features
#SUITE_ID = 337 # Classification on numerical features
#SUITE_ID = 335 # Regression on numerical and categorical features
#SUITE_ID = 334 # Classification on numerical and categorical features
benchmark_suite = openml.study.get_suite(SUITE_ID)  # obtain the benchmark suite

task_id=361072
task = openml.tasks.get_task(task_id)  # download the OpenML task
dataset = task.get_dataset()

X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="dataframe", target=dataset.default_target_attribute)

# Set the random seed for reproducibility
N_TRIALS=10
N_SAMPLES=100
PATIENCE=40
N_EPOCHS=1000
GP_ITERATIONS=1000
BATCH_SIZE=1024
seed=10
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)


# calculate the mean and covariance matrix of the dataset
mean = np.mean(X, axis=0)
cov = np.cov(X.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X.values]

mahalanobis_dist=pd.Series(mahalanobis_dist,index=X.index)
far_index=mahalanobis_dist.index[np.where(mahalanobis_dist>=np.quantile(mahalanobis_dist,0.8))[0]]
close_index=mahalanobis_dist.index[np.where(mahalanobis_dist<np.quantile(mahalanobis_dist,0.8))[0]]

X_train = X.loc[close_index,:]
X_test = X.loc[far_index,:]
y_train = y.loc[close_index]
y_test = y.loc[far_index]

mean = np.mean(X_train, axis=0)
cov = np.cov(X_train.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist_ = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X_train.values]

mahalanobis_dist_=pd.Series(mahalanobis_dist_,index=X_train.index)
far_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_>=np.quantile(mahalanobis_dist_,0.8))[0]]
close_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_<np.quantile(mahalanobis_dist_,0.8))[0]]

X_train_ = X_train.loc[close_index_,:]
X_val = X_train.loc[far_index_,:]
y_train_ = y_train.loc[close_index_]
y_val = y_train.loc[far_index_]


# Convert data to PyTorch tensors
X_train__tensor = torch.tensor(X_train_.values, dtype=torch.float32)
y_train__tensor = torch.tensor(y_train_.values, dtype=torch.float32)
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Convert to use GPU if available
if torch.cuda.is_available():
    X_train__tensor = X_train__tensor.cuda()
    y_train__tensor = y_train__tensor.cuda()
    X_train_tensor = X_train_tensor.cuda()
    y_train_tensor = y_train_tensor.cuda()
    X_val_tensor = X_val_tensor.cuda()
    y_val_tensor = y_val_tensor.cuda()
    X_test_tensor = X_test_tensor.cuda()
    y_test_tensor = y_test_tensor.cuda()

# Create flattened versions of the data
y_val_np = y_val.values.flatten()
y_test_np = y_test.values.flatten()

# Create TensorDatasets for training and validation sets
train__dataset = TensorDataset(X_train__tensor, y_train__tensor)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders for training and validation sets
train__loader = DataLoader(train__dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


# #### MLP
d_out = 1  
d_in=X_train_.shape[1]

def MLP_opt(trial):

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    n_blocks = trial.suggest_int("n_blocks", 1, 5)
    d_block = trial.suggest_int("d_block", 10, 500)
    dropout = trial.suggest_float("dropout", 0, 1)

    MLP_model = MLP(
    d_in=d_in,
    d_out=d_out,
    n_blocks=n_blocks,
    d_block=d_block,
    dropout=dropout,
    )
    n_epochs=N_EPOCHS
    learning_rate=trial.suggest_float('learning_rate', 0.0001, 0.05, log=True)
    weight_decay=trial.suggest_float('weight_decay', 1e-8, 1e-3, log=True)
    optimizer=torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    criterion = torch.nn.MSELoss()

    if torch.cuda.is_available():
        MLP_model = MLP_model.cuda()

    early_stopping = EarlyStopping(patience=PATIENCE, verbose=False)
    n_epochs=train(MLP_model, criterion, optimizer, n_epochs, train__loader, val_loader, early_stopping)
    n_epochs = trial.suggest_int('n_epochs', n_epochs, n_epochs)

    # Point prediction
    predictions = []
    with torch.no_grad():
        for batch_X, _ in val_loader:
            batch_predictions = MLP_model(batch_X).reshape(-1,)
            predictions.append(batch_predictions.cpu().numpy())

    y_val_hat_MLP = np.concatenate(predictions)

    # Estimate standard deviation of the prediction error
    std_dev_error = np.std(y_val - y_val_hat_MLP)

    # Calculate the CRPS for each prediction
    crps_values = [crps_gaussian(y_val_np[i], mu=y_val_hat_MLP[i], sig=std_dev_error) for i in range(len(y_val_hat_MLP))]

    # Calculate the mean CRPS
    mean_crps = np.mean(crps_values)

    return mean_crps

sampler_MLP = optuna.samplers.TPESampler(seed=seed)
study_MLP = optuna.create_study(sampler=sampler_MLP, direction='minimize')
study_MLP.optimize(MLP_opt, n_trials=N_TRIALS)

MLP_model = MLP(
    d_in=d_in,
    d_out=d_out,
    n_blocks=study_MLP.best_params['n_blocks'],
    d_block=study_MLP.best_params['d_block'],
    dropout=study_MLP.best_params['dropout'],
    )

if torch.cuda.is_available():
    MLP_model = MLP_model.cuda()
    
n_epochs=study_MLP.best_params['n_epochs']
learning_rate=study_MLP.best_params['learning_rate']
weight_decay=study_MLP.best_params['weight_decay']
optimizer=torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.MSELoss()

train_no_early_stopping(MLP_model, criterion, optimizer, n_epochs, train_loader)

# Point prediction
predictions = []
with torch.no_grad():
    for batch_X, _ in test_loader:
        batch_predictions = MLP_model(batch_X).reshape(-1,)
        predictions.append(batch_predictions.cpu().numpy())

y_test_hat_MLP = np.concatenate(predictions)

# Estimate standard deviation of the prediction error
std_dev_error = np.std(y_test - y_test_hat_MLP)

# Create a normal distribution for each prediction
pred_distributions = [norm(loc=y_test_hat_MLP[i], scale=std_dev_error) for i in range(len(y_test_hat_MLP))]

# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_MLP[i], sig=std_dev_error) for i in range(len(y_test_hat_MLP))]

# Calculate the mean CRPS
crps_MLP = np.mean(crps_values)

print("CRPS MLP: ", crps_MLP)

Starting from Version 0.15.0 `download_splits` will default to ``False`` instead of ``True`` and be independent from `download_data`. To disable this message until version 0.15 explicitly set `download_splits` to a bool.
Starting from Version 0.15 `download_data`, `download_qualities`, and `download_features_meta_data` will all be ``False`` instead of ``True`` by default to enable lazy loading. To disable this message until version 0.15 explicitly set `download_data`, `download_qualities`, and `download_features_meta_data` to a bool while calling `get_dataset`.
[I 2024-02-20 17:25:27,801] A new study created in memory with name: no-name-f102c980-3a5b-4376-9fbd-645716f7c368


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:25:38,486] Trial 0 finished with value: 61.8878542160432 and parameters: {'n_blocks': 4, 'd_block': 20, 'dropout': 0.6336482349262754, 'learning_rate': 0.010495405390719734, 'weight_decay': 3.1083868392602017e-06, 'n_epochs': 43}. Best is trial 0 with value: 61.8878542160432.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:26:56,364] Trial 1 finished with value: 367.74707685166317 and parameters: {'n_blocks': 2, 'd_block': 107, 'dropout': 0.7605307121989587, 'learning_rate': 0.0002860388842288948, 'weight_decay': 2.765025054332623e-08, 'n_epochs': 563}. Best is trial 0 with value: 61.8878542160432.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:12,094] Trial 2 finished with value: 10.012117630178597 and parameters: {'n_blocks': 4, 'd_block': 478, 'dropout': 0.003948266327914451, 'learning_rate': 0.002412079153798176, 'weight_decay': 0.00011563912803570738, 'n_epochs': 156}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:37,723] Trial 3 finished with value: 687.1707511457577 and parameters: {'n_blocks': 4, 'd_block': 364, 'dropout': 0.29187606817063316, 'learning_rate': 0.029994721053560828, 'weight_decay': 3.7400629930578146e-05, 'n_epochs': 48}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:53,694] Trial 4 finished with value: 34.32406426910931 and parameters: {'n_blocks': 3, 'd_block': 79, 'dropout': 0.3733407600514692, 'learning_rate': 0.006598821883612051, 'weight_decay': 1.618698156523955e-06, 'n_epochs': 77}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:29:29,798] Trial 5 finished with value: 34.30768456132583 and parameters: {'n_blocks': 3, 'd_block': 313, 'dropout': 0.5131382425543909, 'learning_rate': 0.005693803629695728, 'weight_decay': 1.0120332166548561e-05, 'n_epochs': 125}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:34:01,958] Trial 6 finished with value: 656.4224662058078 and parameters: {'n_blocks': 5, 'd_block': 266, 'dropout': 0.9086488808086682, 'learning_rate': 0.0007271242493848924, 'weight_decay': 2.8333273009960152e-08, 'n_epochs': 623}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:36:09,539] Trial 7 finished with value: 815.261944553728 and parameters: {'n_blocks': 2, 'd_block': 65, 'dropout': 0.8286813263076767, 'learning_rate': 0.00013383563361780206, 'weight_decay': 1.3534298216580227e-05, 'n_epochs': 714}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:36:25,372] Trial 8 finished with value: 9136.531093068048 and parameters: {'n_blocks': 3, 'd_block': 412, 'dropout': 0.1989475396788123, 'learning_rate': 0.020540606581753273, 'weight_decay': 5.731432699830849e-07, 'n_epochs': 3}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:37:42,874] Trial 9 finished with value: 1190.8960749744613 and parameters: {'n_blocks': 4, 'd_block': 155, 'dropout': 0.8839364795611863, 'learning_rate': 0.0007560423904195248, 'weight_decay': 6.684662552536899e-08, 'n_epochs': 282}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/156 [00:00<?, ?it/s]

CRPS MLP:  11.790757381123871


In [1]:
import os
import pandas as pd
import numpy as np
import setuptools
import openml
from sklearn.linear_model import LinearRegression 
import lightgbm as lgbm
import lightgbmlss
import optuna
from scipy.spatial.distance import mahalanobis
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process.kernels import Matern
from engression import engression, engression_bagged
import torch
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import mahalanobis
from scipy.stats import norm
from sklearn.metrics import mean_squared_error
from rtdl_revisiting_models import MLP, ResNet, FTTransformer
from properscoring import crps_gaussian, crps_ensemble
import random
import gpytorch
import tqdm.auto as tqdm
from lightgbmlss.model import *
from lightgbmlss.distributions.Gaussian import *
from pygam import LinearGAM, s, f
from utils import EarlyStopping, train, train_trans, train_no_early_stopping, train_trans_no_early_stopping, train_GP, ExactGPModel
from torch.utils.data import TensorDataset, DataLoader
from drf import drf

SUITE_ID = 336 # Regression on numerical features
#SUITE_ID = 337 # Classification on numerical features
#SUITE_ID = 335 # Regression on numerical and categorical features
#SUITE_ID = 334 # Classification on numerical and categorical features
benchmark_suite = openml.study.get_suite(SUITE_ID)  # obtain the benchmark suite

task_id=361072

print(f"Task {task_id}")

Task 361072


In [2]:
# Create the checkpoint directory if it doesn't exist
os.makedirs('CHECKPOINTS/MAHALANOBIS', exist_ok=True)
CHECKPOINT_PATH = f'CHECKPOINTS/MAHALANOBIS/task_{task_id}.pt'

print(f"Task {task_id}")

task = openml.tasks.get_task(task_id)  # download the OpenML task
dataset = task.get_dataset()

X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="dataframe", target=dataset.default_target_attribute)

# Set the random seed for reproducibility
N_TRIALS=2
N_SAMPLES=100
PATIENCE=40
N_EPOCHS=1000
GP_ITERATIONS=1000
BATCH_SIZE=1024
seed=10
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)


# calculate the mean and covariance matrix of the dataset
mean = np.mean(X, axis=0)
cov = np.cov(X.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X.values]

mahalanobis_dist=pd.Series(mahalanobis_dist,index=X.index)
far_index=mahalanobis_dist.index[np.where(mahalanobis_dist>=np.quantile(mahalanobis_dist,0.8))[0]]
close_index=mahalanobis_dist.index[np.where(mahalanobis_dist<np.quantile(mahalanobis_dist,0.8))[0]]

X_train = X.loc[close_index,:]
X_test = X.loc[far_index,:]
y_train = y.loc[close_index]
y_test = y.loc[far_index]

mean = np.mean(X_train, axis=0)
cov = np.cov(X_train.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist_ = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X_train.values]

mahalanobis_dist_=pd.Series(mahalanobis_dist_,index=X_train.index)
far_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_>=np.quantile(mahalanobis_dist_,0.8))[0]]
close_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_<np.quantile(mahalanobis_dist_,0.8))[0]]

X_train_ = X_train.loc[close_index_,:]
X_val = X_train.loc[far_index_,:]
y_train_ = y_train.loc[close_index_]
y_val = y_train.loc[far_index_]


# Convert data to PyTorch tensors
X_train__tensor = torch.tensor(X_train_.values, dtype=torch.float32)
y_train__tensor = torch.tensor(y_train_.values, dtype=torch.float32)
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Convert to use GPU if available
if torch.cuda.is_available():
    X_train__tensor = X_train__tensor.cuda()
    y_train__tensor = y_train__tensor.cuda()
    X_train_tensor = X_train_tensor.cuda()
    y_train_tensor = y_train_tensor.cuda()
    X_val_tensor = X_val_tensor.cuda()
    y_val_tensor = y_val_tensor.cuda()
    X_test_tensor = X_test_tensor.cuda()
    y_test_tensor = y_test_tensor.cuda()

# Create flattened versions of the data
y_val_np = y_val.values.flatten()
y_test_np = y_test.values.flatten()

# Create TensorDatasets for training and validation sets
train__dataset = TensorDataset(X_train__tensor, y_train__tensor)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders for training and validation sets
train__loader = DataLoader(train__dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Task 361072


Starting from Version 0.15.0 `download_splits` will default to ``False`` instead of ``True`` and be independent from `download_data`. To disable this message until version 0.15 explicitly set `download_splits` to a bool.
Starting from Version 0.15 `download_data`, `download_qualities`, and `download_features_meta_data` will all be ``False`` instead of ``True`` by default to enable lazy loading. To disable this message until version 0.15 explicitly set `download_data`, `download_qualities`, and `download_features_meta_data` to a bool while calling `get_dataset`.


In [3]:
dtrain_ = lgb.Dataset(torch.tensor(X_train_.values, dtype=torch.float32).clone().detach(), label=y_train_.values)

def boosted(trial):

    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.5, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
    }
    opt_params = params.copy()
    n_rounds = opt_params["n_estimators"]
    del opt_params["n_estimators"]
    opt_params['feature_pre_filter']=False

    # Use LightGBMLossGuideRegressor for distributional prediction
    boosted_tree_model = LightGBMLSS(Gaussian(stabilization="None", response_fn="exp", loss_fn="nll"))
    boosted_tree_model.train(opt_params, dtrain_, num_boost_round=n_rounds)

    # Predict both the mean and standard deviation
    pred_params=boosted_tree_model.predict(X_val, pred_type="parameters")
    y_val_hat_boost=pred_params['loc']
    y_val_hat_std = pred_params['scale']

    # Calculate the CRPS for each prediction
    crps_values = [crps_gaussian(y_val_np[i], mu=y_val_hat_boost[i], sig=y_val_hat_std[i]) for i in range(len(y_val))]

    # Return the mean CRPS as the objective to be minimized
    return np.mean(crps_values)

sampler_boost = optuna.samplers.TPESampler(seed=seed)
study_boost = optuna.create_study(sampler=sampler_boost, direction='minimize')
study_boost.optimize(boosted, n_trials=N_TRIALS)

def rf(trial):
    params = {'num_trees': trial.suggest_int('num_trees', 100, 500),
        'mtry': trial.suggest_int('mtry', 1, 30),
        'min_node_size': trial.suggest_int('min_node_size', 10, 100)}
    
    drf_model = drf(**params)
    drf_model.fit(X_train_, y_train_)
    
    # Generate a sample from the drf model for each data point
    y_val_hat=drf_model.predict(newdata = X_val, functional = "quantile", quantiles=list(np.random.uniform(0,1,N_SAMPLES)))

    # Calculate the CRPS for each prediction
    crps_values = [crps_ensemble(y_val_np[i], y_val_hat.quantile[i].reshape(-1)) for i in range(len(y_val_np))]

    # Return the mean CRPS as the objective to be minimized
    return np.mean(crps_values)

sampler_drf = optuna.samplers.TPESampler(seed=seed)
study_drf = optuna.create_study(sampler=sampler_drf, direction='minimize')
study_drf.optimize(rf, n_trials=N_TRIALS)


def engressor_NN(trial):

    params = {'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.01, log=True),
            'num_epoches': trial.suggest_int('num_epoches', 100, 1000),
            'num_layer': trial.suggest_int('num_layer', 2, 5),
            'hidden_dim': trial.suggest_int('hidden_dim', 100, 500),
            'resblock': trial.suggest_categorical('resblock', [True, False])}
    params['noise_dim']=params['hidden_dim']

    # Check if CUDA is available and if so, move the tensors and the model to the GPU
    if torch.cuda.is_available():
        engressor_model=engression(X_train__tensor, y_train__tensor.reshape(-1,1), lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'], device="cuda")
    else:
        engressor_model=engression(X_train__tensor, y_train__tensor.reshape(-1,1), lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'])
    # Generate a sample from the engression model for each data point
    y_val_hat_engression_samples = [engressor_model.sample(torch.Tensor(np.array([X_val.values[i]])), sample_size=N_SAMPLES) for i in range(len(X_val))]

    # Calculate the CRPS for each prediction
    crps_values = [crps_ensemble(y_val_np[i], np.array(y_val_hat_engression_samples[i]).reshape(-1,)) for i in range(len(y_val_np))]

    return np.mean(crps_values)

sampler_engression = optuna.samplers.TPESampler(seed=seed)
study_engression = optuna.create_study(sampler=sampler_engression, direction='minimize')
study_engression.optimize(engressor_NN, n_trials=N_TRIALS)


dtrain = lgb.Dataset(torch.tensor(X_train.values, dtype=torch.float32).clone().detach(), label=y_train.values)
opt_params = study_boost.best_params.copy()
n_rounds = opt_params["n_estimators"]
del opt_params["n_estimators"]
opt_params['feature_pre_filter']=False
# Use LightGBMLossGuideRegressor for distributional prediction
boosted_tree_model = LightGBMLSS(Gaussian(stabilization="None", response_fn="exp", loss_fn="nll"))
boosted_tree_model.train(opt_params, dtrain, num_boost_round=n_rounds)
# Predict both the mean and standard deviation
pred_params=boosted_tree_model.predict(X_test, pred_type="parameters")
y_test_hat_boost=pred_params['loc']
y_test_hat_std = pred_params['scale']
# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_boost[i], sig=y_test_hat_std[i]) for i in range(len(y_test))]
# Return the mean CRPS as the objective to be minimized
CRPS_boosted=np.mean(crps_values)

drf_model=drf(**study_drf.best_params)
drf_model.fit(X_train, y_train)
# Generate a sample from the drf model for each data point
y_test_hat_drf=drf_model.predict(newdata = X_test, functional = "quantile", quantiles=list(np.random.uniform(0,1,N_SAMPLES)))
# Calculate the CRPS for each prediction
crps_values = [crps_ensemble(y_test_np[i], y_test_hat_drf.quantile[i].reshape(-1)) for i in range(len(y_test_np))]
# Return the mean CRPS as the objective to be minimized
CRPS_rf=np.mean(crps_values)

lin_reg=LinearRegression()
lin_reg.fit(X_train, y_train)
y_test_hat_linreg=lin_reg.predict(X_test)
# Calculate the standard deviation of the residuals
std_dev = np.std(y_test - y_test_hat_linreg)
# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_linreg[i], sig=std_dev) for i in range(len(y_test_np))]
CRPS_linreg = np.mean(crps_values)

params=study_engression.best_params
params['noise_dim']=params['hidden_dim']
X_train_tensor = torch.Tensor(np.array(X_train))
y_train_tensor = torch.Tensor(np.array(y_train).reshape(-1,1))

# Check if CUDA is available and if so, move the tensors and the model to the GPU
if torch.cuda.is_available():
    engressor_model=engression(X_train_tensor, y_train_tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'], device="cuda")
else:
    engressor_model=engression(X_train_tensor, y_train_tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'])
# Generate a sample from the engression model for each data point
y_test_hat_engression_samples = [engressor_model.sample(torch.Tensor(np.array([X_test.values[i]])).cuda() if torch.cuda.is_available() else torch.Tensor(np.array([X_test.values[i]])), sample_size=N_SAMPLES) for i in range(len(X_test))]
# Calculate the CRPS for each prediction
crps_values = [crps_ensemble(y_test_np[i], np.array(y_test_hat_engression_samples[i]).reshape(-1,)) for i in range(len(y_test_np))]
CRPS_engression=np.mean(crps_values)

print("CRPS linear regression: ",CRPS_linreg)
print("CRPS boosted trees", CRPS_boosted)
print("CRPS random forest", CRPS_rf)
print("CRPS engression", CRPS_engression)

[I 2024-03-04 16:49:45,125] A new study created in memory with name: no-name-10d2b83f-29d7-4fb8-a298-b4f4973893db
[I 2024-03-04 16:49:48,843] Trial 0 finished with value: 7.675922492698488 and parameters: {'learning_rate': 0.0713003929222653, 'n_estimators': 108, 'reg_lambda': 0.005044685709888605, 'max_depth': 23, 'min_child_samples': 55}. Best is trial 0 with value: 7.675922492698488.
[I 2024-03-04 16:49:52,140] Trial 1 finished with value: 11.478143692353438 and parameters: {'learning_rate': 0.0006784471913345375, 'n_estimators': 179, 'reg_lambda': 0.0699481785242808, 'max_depth': 6, 'min_child_samples': 18}. Best is trial 0 with value: 7.675922492698488.
[I 2024-03-04 16:49:52,145] A new study created in memory with name: no-name-b335370b-45fb-4937-9932-596578599fdc


[0.771320643266746, 0.0207519493594015, 0.6336482349262754, 0.7488038825386119, 0.4985070123025904, 0.22479664553084766, 0.19806286475962398, 0.7605307121989587, 0.16911083656253545, 0.08833981417401027, 0.6853598183677972, 0.9533933461949365, 0.003948266327914451, 0.5121922633857766, 0.8126209616521135, 0.6125260668293881, 0.7217553174317995, 0.29187606817063316, 0.9177741225129434, 0.7145757833976906, 0.5425443680112613, 0.14217004760152696, 0.3733407600514692, 0.6741336150663453, 0.4418331744229961, 0.4340139933332937, 0.6177669784693172, 0.5131382425543909, 0.6503971819314672, 0.6010389534045444, 0.8052231968327465, 0.5216471523936341, 0.9086488808086682, 0.3192360889885453, 0.09045934927090737, 0.30070005663620336, 0.11398436186354977, 0.8286813263076767, 0.04689631938924976, 0.6262871483113925, 0.5475861559192435, 0.8192869956700687, 0.1989475396788123, 0.8568503024577332, 0.3516526394320879, 0.7546476915298572, 0.2959617068796787, 0.8839364795611863, 0.3255116378322488, 0.165015

[I 2024-03-04 16:49:58,743] Trial 0 finished with value: 8.820375070178413 and parameters: {'num_trees': 409, 'mtry': 1, 'min_node_size': 67}. Best is trial 0 with value: 8.820375070178413.


[0.5781364298824675, 0.8539337505004864, 0.06809727353795003, 0.46453080777933253, 0.7819491186191484, 0.7186028103822503, 0.5860219800531759, 0.037094413234407875, 0.350656391283133, 0.563190684492745, 0.29972987242456284, 0.5123341532735493, 0.6734669252847205, 0.1591937333780935, 0.05047767015399762, 0.33781588706467947, 0.10806377277945256, 0.17890280857109042, 0.8858270961677057, 0.3653649712141158, 0.21876934917953672, 0.7524961702186028, 0.10687958439356915, 0.7446032407755606, 0.46978529344049447, 0.5982556712791092, 0.14762019228529766, 0.18403482209315125, 0.6450721264682419, 0.048628006263405577, 0.24861250780276944, 0.5424085162280042, 0.2267733432700092, 0.3814115349046321, 0.9222327869035463, 0.9253568728677768, 0.566749924575, 0.5334708849890026, 0.014860024633228108, 0.977899263402005, 0.5730289040331858, 0.791756996276624, 0.5615573602763689, 0.8773352415649347, 0.5841958285306755, 0.7088498263689552, 0.14853345135645857, 0.4284507389678964, 0.6938900663424117, 0.10461

[I 2024-03-04 16:50:05,759] Trial 1 finished with value: 7.719169244181002 and parameters: {'num_trees': 400, 'mtry': 15, 'min_node_size': 30}. Best is trial 1 with value: 7.719169244181002.
[I 2024-03-04 16:50:05,765] A new study created in memory with name: no-name-0a8b2bbb-fd89-453e-a6ce-fae258250946
[W 2024-03-04 16:50:05,775] Trial 0 failed with parameters: {'learning_rate': 0.0034885205571560775, 'num_epoches': 118, 'num_layer': 4, 'hidden_dim': 400, 'resblock': True} because of the following error: IndexError('tuple index out of range').
Traceback (most recent call last):
  File "c:\Users\dalma\Desktop\THESIS_ETH_NEW\CODE\.venv\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\dalma\AppData\Local\Temp\ipykernel_6772\2861052019.py", line 71, in engressor_NN
    engressor_model=engression(X_train__tensor, y_train__tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'

IndexError: tuple index out of range

In [None]:
dtrain_ = lgb.Dataset(torch.tensor(X_train_.values, dtype=torch.float32).clone().detach(), label=y_train_.values)

def boosted(trial):

    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.5, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 30),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
    }
    opt_params = params.copy()
    n_rounds = opt_params["n_estimators"]
    del opt_params["n_estimators"]
    opt_params['feature_pre_filter']=False

    # Use LightGBMLossGuideRegressor for distributional prediction
    boosted_tree_model = LightGBMLSS(Gaussian(stabilization="None", response_fn="exp", loss_fn="nll"))
    boosted_tree_model.train(opt_params, dtrain_, num_boost_round=n_rounds)

    # Predict both the mean and standard deviation
    pred_params=boosted_tree_model.predict(X_val, pred_type="parameters")
    y_val_hat_boost=pred_params['loc']
    y_val_hat_std = pred_params['scale']

    # Calculate the CRPS for each prediction
    crps_values = [crps_gaussian(y_val_np[i], mu=y_val_hat_boost[i], sig=y_val_hat_std[i]) for i in range(len(y_val))]

    # Return the mean CRPS as the objective to be minimized
    return np.mean(crps_values)

sampler_boost = optuna.samplers.TPESampler(seed=seed)
study_boost = optuna.create_study(sampler=sampler_boost, direction='minimize')
study_boost.optimize(boosted, n_trials=N_TRIALS)

def rf(trial):
    params = {'num_trees': trial.suggest_int('num_trees', 100, 500),
        'mtry': trial.suggest_int('mtry', 1, 30),
        'min_node_size': trial.suggest_int('min_node_size', 10, 100)}
    
    drf_model = drf(**params)
    drf_model.fit(X_train_, y_train_)
    
    # Generate a sample from the drf model for each data point
    y_val_hat=drf_model.predict(newdata = X_val, functional = "quantile", quantiles=list(np.random.uniform(0,1,N_SAMPLES)))

    # Calculate the CRPS for each prediction
    crps_values = [crps_ensemble(y_val_np[i], y_val_hat.quantile[i].reshape(-1)) for i in range(len(y_val_np))]

    # Return the mean CRPS as the objective to be minimized
    return np.mean(crps_values)

sampler_drf = optuna.samplers.TPESampler(seed=seed)
study_drf = optuna.create_study(sampler=sampler_drf, direction='minimize')
study_drf.optimize(rf, n_trials=N_TRIALS)


def engressor_NN(trial):

    params = {'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.01, log=True),
            'num_epoches': trial.suggest_int('num_epoches', 100, 1000),
            'num_layer': trial.suggest_int('num_layer', 2, 5),
            'hidden_dim': trial.suggest_int('hidden_dim', 100, 500),
            'resblock': trial.suggest_categorical('resblock', [True, False])}
    params['noise_dim']=params['hidden_dim']

    # Check if CUDA is available and if so, move the tensors and the model to the GPU
    if torch.cuda.is_available():
        engressor_model=engression(X_train__tensor, y_train__tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'], device="cuda")
    else:
        engressor_model=engression(X_train__tensor, y_train__tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'])
    
    # Generate a sample from the engression model for each data point
    y_val_hat_engression_samples = [engressor_model.sample(torch.Tensor(np.array([X_val.values[i]])), sample_size=N_SAMPLES) for i in range(len(X_val))]

    # Calculate the CRPS for each prediction
    crps_values = [crps_ensemble(y_val_np[i], np.array(y_val_hat_engression_samples[i]).reshape(-1,)) for i in range(len(y_val_np))]

    return np.mean(crps_values)

sampler_engression = optuna.samplers.TPESampler(seed=seed)
study_engression = optuna.create_study(sampler=sampler_engression, direction='minimize')
study_engression.optimize(engressor_NN, n_trials=N_TRIALS)


dtrain = lgb.Dataset(torch.tensor(X_train.values, dtype=torch.float32).clone().detach(), label=y_train.values)
opt_params = study_boost.best_params.copy()
n_rounds = opt_params["n_estimators"]
del opt_params["n_estimators"]
opt_params['feature_pre_filter']=False
# Use LightGBMLossGuideRegressor for distributional prediction
boosted_tree_model = LightGBMLSS(Gaussian(stabilization="None", response_fn="exp", loss_fn="nll"))
boosted_tree_model.train(opt_params, dtrain, num_boost_round=n_rounds)
# Predict both the mean and standard deviation
pred_params=boosted_tree_model.predict(X_test, pred_type="parameters")
y_test_hat_boost=pred_params['loc']
y_test_hat_std = pred_params['scale']
# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_boost[i], sig=y_test_hat_std[i]) for i in range(len(y_test))]
# Return the mean CRPS as the objective to be minimized
CRPS_boosted=np.mean(crps_values)

drf_model=drf(**study_drf.best_params)
drf_model.fit(X_train, y_train)
# Generate a sample from the drf model for each data point
y_test_hat_drf=drf_model.predict(newdata = X_test, functional = "quantile", quantiles=list(np.random.uniform(0,1,N_SAMPLES)))
# Calculate the CRPS for each prediction
crps_values = [crps_ensemble(y_test_np[i], y_test_hat_drf.quantile[i].reshape(-1)) for i in range(len(y_test_np))]
# Return the mean CRPS as the objective to be minimized
CRPS_rf=np.mean(crps_values)

lin_reg=LinearRegression()
lin_reg.fit(X_train, y_train)
y_test_hat_linreg=lin_reg.predict(X_test)
# Calculate the standard deviation of the residuals
std_dev = np.std(y_test - y_test_hat_linreg)
# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_linreg[i], sig=std_dev) for i in range(len(y_test_np))]
CRPS_linreg = np.mean(crps_values)

params=study_engression.best_params
params['noise_dim']=params['hidden_dim']
X_train_tensor = torch.Tensor(np.array(X_train))
y_train_tensor = torch.Tensor(np.array(y_train).reshape(-1,1))

# Check if CUDA is available and if so, move the tensors and the model to the GPU
if torch.cuda.is_available():
    engressor_model=engression(X_train_tensor, y_train_tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'], device="cuda")
else:
    engressor_model=engression(X_train_tensor, y_train_tensor, lr=params['learning_rate'], num_epoches=params['num_epoches'],num_layer=params['num_layer'], hidden_dim=params['hidden_dim'], noise_dim=params['noise_dim'], batch_size=BATCH_SIZE, resblock=params['resblock'])
# Generate a sample from the engression model for each data point
y_test_hat_engression_samples = [engressor_model.sample(torch.Tensor(np.array([X_test.values[i]])).cuda() if torch.cuda.is_available() else torch.Tensor(np.array([X_test.values[i]])), sample_size=N_SAMPLES) for i in range(len(X_test))]
# Calculate the CRPS for each prediction
crps_values = [crps_ensemble(y_test_np[i], np.array(y_test_hat_engression_samples[i]).reshape(-1,)) for i in range(len(y_test_np))]
CRPS_engression=np.mean(crps_values)

print("CRPS linear regression: ",CRPS_linreg)
print("CRPS boosted trees", CRPS_boosted)
print("CRPS random forest", CRPS_rf)
print("CRPS engression", CRPS_engression)