In [1]:
import pandas as pd
import numpy as np
import setuptools
import openml
from sklearn.linear_model import LinearRegression 
import lightgbm as lgbm
import lightgbmlss
import optuna
from scipy.spatial.distance import mahalanobis
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process.kernels import Matern
from engression import engression, engression_bagged
import torch
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import mahalanobis
from scipy.stats import norm
from sklearn.metrics import mean_squared_error
from rtdl_revisiting_models import MLP, ResNet, FTTransformer
from properscoring import crps_gaussian, crps_ensemble
import random
import gpytorch
import tqdm.auto as tqdm
from lightgbmlss.model import *
from lightgbmlss.distributions.Gaussian import *
from drf import drf
import os
from pygam import LinearGAM, s, f
from utils import EarlyStopping, train, train_trans, train_no_early_stopping, train_trans_no_early_stopping, train_GP, ExactGPModel
from torch.utils.data import TensorDataset, DataLoader

SUITE_ID = 336 # Regression on numerical features
#SUITE_ID = 337 # Classification on numerical features
#SUITE_ID = 335 # Regression on numerical and categorical features
#SUITE_ID = 334 # Classification on numerical and categorical features
benchmark_suite = openml.study.get_suite(SUITE_ID)  # obtain the benchmark suite

task_id=361072
task = openml.tasks.get_task(task_id)  # download the OpenML task
dataset = task.get_dataset()

X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="dataframe", target=dataset.default_target_attribute)

# Set the random seed for reproducibility
N_TRIALS=10
N_SAMPLES=100
PATIENCE=40
N_EPOCHS=1000
GP_ITERATIONS=1000
BATCH_SIZE=1024
seed=10
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
random.seed(seed)


# calculate the mean and covariance matrix of the dataset
mean = np.mean(X, axis=0)
cov = np.cov(X.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X.values]

mahalanobis_dist=pd.Series(mahalanobis_dist,index=X.index)
far_index=mahalanobis_dist.index[np.where(mahalanobis_dist>=np.quantile(mahalanobis_dist,0.8))[0]]
close_index=mahalanobis_dist.index[np.where(mahalanobis_dist<np.quantile(mahalanobis_dist,0.8))[0]]

X_train = X.loc[close_index,:]
X_test = X.loc[far_index,:]
y_train = y.loc[close_index]
y_test = y.loc[far_index]

mean = np.mean(X_train, axis=0)
cov = np.cov(X_train.T)

# calculate the Mahalanobis distance for each data point
mahalanobis_dist_ = [mahalanobis(x, mean, np.linalg.inv(cov)) for x in X_train.values]

mahalanobis_dist_=pd.Series(mahalanobis_dist_,index=X_train.index)
far_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_>=np.quantile(mahalanobis_dist_,0.8))[0]]
close_index_=mahalanobis_dist_.index[np.where(mahalanobis_dist_<np.quantile(mahalanobis_dist_,0.8))[0]]

X_train_ = X_train.loc[close_index_,:]
X_val = X_train.loc[far_index_,:]
y_train_ = y_train.loc[close_index_]
y_val = y_train.loc[far_index_]


# Convert data to PyTorch tensors
X_train__tensor = torch.tensor(X_train_.values, dtype=torch.float32)
y_train__tensor = torch.tensor(y_train_.values, dtype=torch.float32)
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Convert to use GPU if available
if torch.cuda.is_available():
    X_train__tensor = X_train__tensor.cuda()
    y_train__tensor = y_train__tensor.cuda()
    X_train_tensor = X_train_tensor.cuda()
    y_train_tensor = y_train_tensor.cuda()
    X_val_tensor = X_val_tensor.cuda()
    y_val_tensor = y_val_tensor.cuda()
    X_test_tensor = X_test_tensor.cuda()
    y_test_tensor = y_test_tensor.cuda()

# Create flattened versions of the data
y_val_np = y_val.values.flatten()
y_test_np = y_test.values.flatten()

# Create TensorDatasets for training and validation sets
train__dataset = TensorDataset(X_train__tensor, y_train__tensor)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders for training and validation sets
train__loader = DataLoader(train__dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


# #### MLP
d_out = 1  
d_in=X_train_.shape[1]

def MLP_opt(trial):

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    n_blocks = trial.suggest_int("n_blocks", 1, 5)
    d_block = trial.suggest_int("d_block", 10, 500)
    dropout = trial.suggest_float("dropout", 0, 1)

    MLP_model = MLP(
    d_in=d_in,
    d_out=d_out,
    n_blocks=n_blocks,
    d_block=d_block,
    dropout=dropout,
    )
    n_epochs=N_EPOCHS
    learning_rate=trial.suggest_float('learning_rate', 0.0001, 0.05, log=True)
    weight_decay=trial.suggest_float('weight_decay', 1e-8, 1e-3, log=True)
    optimizer=torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    criterion = torch.nn.MSELoss()

    if torch.cuda.is_available():
        MLP_model = MLP_model.cuda()

    early_stopping = EarlyStopping(patience=PATIENCE, verbose=False)
    n_epochs=train(MLP_model, criterion, optimizer, n_epochs, train__loader, val_loader, early_stopping)
    n_epochs = trial.suggest_int('n_epochs', n_epochs, n_epochs)

    # Point prediction
    predictions = []
    with torch.no_grad():
        for batch_X, _ in val_loader:
            batch_predictions = MLP_model(batch_X).reshape(-1,)
            predictions.append(batch_predictions.cpu().numpy())

    y_val_hat_MLP = np.concatenate(predictions)

    # Estimate standard deviation of the prediction error
    std_dev_error = np.std(y_val - y_val_hat_MLP)

    # Calculate the CRPS for each prediction
    crps_values = [crps_gaussian(y_val_np[i], mu=y_val_hat_MLP[i], sig=std_dev_error) for i in range(len(y_val_hat_MLP))]

    # Calculate the mean CRPS
    mean_crps = np.mean(crps_values)

    return mean_crps

sampler_MLP = optuna.samplers.TPESampler(seed=seed)
study_MLP = optuna.create_study(sampler=sampler_MLP, direction='minimize')
study_MLP.optimize(MLP_opt, n_trials=N_TRIALS)

MLP_model = MLP(
    d_in=d_in,
    d_out=d_out,
    n_blocks=study_MLP.best_params['n_blocks'],
    d_block=study_MLP.best_params['d_block'],
    dropout=study_MLP.best_params['dropout'],
    )

if torch.cuda.is_available():
    MLP_model = MLP_model.cuda()
    
n_epochs=study_MLP.best_params['n_epochs']
learning_rate=study_MLP.best_params['learning_rate']
weight_decay=study_MLP.best_params['weight_decay']
optimizer=torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.MSELoss()

train_no_early_stopping(MLP_model, criterion, optimizer, n_epochs, train_loader)

# Point prediction
predictions = []
with torch.no_grad():
    for batch_X, _ in test_loader:
        batch_predictions = MLP_model(batch_X).reshape(-1,)
        predictions.append(batch_predictions.cpu().numpy())

y_test_hat_MLP = np.concatenate(predictions)

# Estimate standard deviation of the prediction error
std_dev_error = np.std(y_test - y_test_hat_MLP)

# Create a normal distribution for each prediction
pred_distributions = [norm(loc=y_test_hat_MLP[i], scale=std_dev_error) for i in range(len(y_test_hat_MLP))]

# Calculate the CRPS for each prediction
crps_values = [crps_gaussian(y_test_np[i], mu=y_test_hat_MLP[i], sig=std_dev_error) for i in range(len(y_test_hat_MLP))]

# Calculate the mean CRPS
crps_MLP = np.mean(crps_values)

print("CRPS MLP: ", crps_MLP)

Starting from Version 0.15.0 `download_splits` will default to ``False`` instead of ``True`` and be independent from `download_data`. To disable this message until version 0.15 explicitly set `download_splits` to a bool.
Starting from Version 0.15 `download_data`, `download_qualities`, and `download_features_meta_data` will all be ``False`` instead of ``True`` by default to enable lazy loading. To disable this message until version 0.15 explicitly set `download_data`, `download_qualities`, and `download_features_meta_data` to a bool while calling `get_dataset`.
[I 2024-02-20 17:25:27,801] A new study created in memory with name: no-name-f102c980-3a5b-4376-9fbd-645716f7c368


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:25:38,486] Trial 0 finished with value: 61.8878542160432 and parameters: {'n_blocks': 4, 'd_block': 20, 'dropout': 0.6336482349262754, 'learning_rate': 0.010495405390719734, 'weight_decay': 3.1083868392602017e-06, 'n_epochs': 43}. Best is trial 0 with value: 61.8878542160432.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:26:56,364] Trial 1 finished with value: 367.74707685166317 and parameters: {'n_blocks': 2, 'd_block': 107, 'dropout': 0.7605307121989587, 'learning_rate': 0.0002860388842288948, 'weight_decay': 2.765025054332623e-08, 'n_epochs': 563}. Best is trial 0 with value: 61.8878542160432.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:12,094] Trial 2 finished with value: 10.012117630178597 and parameters: {'n_blocks': 4, 'd_block': 478, 'dropout': 0.003948266327914451, 'learning_rate': 0.002412079153798176, 'weight_decay': 0.00011563912803570738, 'n_epochs': 156}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:37,723] Trial 3 finished with value: 687.1707511457577 and parameters: {'n_blocks': 4, 'd_block': 364, 'dropout': 0.29187606817063316, 'learning_rate': 0.029994721053560828, 'weight_decay': 3.7400629930578146e-05, 'n_epochs': 48}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:28:53,694] Trial 4 finished with value: 34.32406426910931 and parameters: {'n_blocks': 3, 'd_block': 79, 'dropout': 0.3733407600514692, 'learning_rate': 0.006598821883612051, 'weight_decay': 1.618698156523955e-06, 'n_epochs': 77}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:29:29,798] Trial 5 finished with value: 34.30768456132583 and parameters: {'n_blocks': 3, 'd_block': 313, 'dropout': 0.5131382425543909, 'learning_rate': 0.005693803629695728, 'weight_decay': 1.0120332166548561e-05, 'n_epochs': 125}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:34:01,958] Trial 6 finished with value: 656.4224662058078 and parameters: {'n_blocks': 5, 'd_block': 266, 'dropout': 0.9086488808086682, 'learning_rate': 0.0007271242493848924, 'weight_decay': 2.8333273009960152e-08, 'n_epochs': 623}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:36:09,539] Trial 7 finished with value: 815.261944553728 and parameters: {'n_blocks': 2, 'd_block': 65, 'dropout': 0.8286813263076767, 'learning_rate': 0.00013383563361780206, 'weight_decay': 1.3534298216580227e-05, 'n_epochs': 714}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:36:25,372] Trial 8 finished with value: 9136.531093068048 and parameters: {'n_blocks': 3, 'd_block': 412, 'dropout': 0.1989475396788123, 'learning_rate': 0.020540606581753273, 'weight_decay': 5.731432699830849e-07, 'n_epochs': 3}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/1000 [00:00<?, ?it/s]

[I 2024-02-20 17:37:42,874] Trial 9 finished with value: 1190.8960749744613 and parameters: {'n_blocks': 4, 'd_block': 155, 'dropout': 0.8839364795611863, 'learning_rate': 0.0007560423904195248, 'weight_decay': 6.684662552536899e-08, 'n_epochs': 282}. Best is trial 2 with value: 10.012117630178597.


Early stopping


Train:   0%|          | 0/156 [00:00<?, ?it/s]

CRPS MLP:  11.790757381123871
