In [1]:
#!pip uninstall -y MLBenchmarks && pip install git+https://github.com/rcpsilva/MLBenchmarks@main

In [17]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures
from MLBenchmarks.benchmarking_methods  import load_regression_datasets, run_cross_dataset_benchmark_models, load_specific_datasets
from MLBenchmarks.regression_datasets_loaders import load_spm_demagnetization_analytical

In [18]:
datasets = load_specific_datasets(['load_spm_demagnetization_analytical','load_spm_demagnetization_FEM'])

Running load_spm_demagnetization_FEM ...
Running load_spm_demagnetization_analytical ...


In [21]:
# Create tabnet
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
import torch
from sklearn.model_selection import train_test_split

# define the model
tabnet = TabNetRegressor(optimizer_fn=torch.optim.Adam,
                       scheduler_params={"step_size":10, 
                                         "gamma":0.9},
                       scheduler_fn=torch.optim.lr_scheduler.StepLR,
                      )

dataset = load_spm_demagnetization_analytical()
X = dataset['data']
y = dataset['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

tabnet.fit(
    X_train,y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    eval_name=['train', 'test'],
    eval_metric=['mae','rmse'],
)  

epoch 0  | loss: 17.23214| train_mae: 20.41526| train_rmse: 28.74273| test_mae: 20.2621 | test_rmse: 28.85676|  0:00:00s
epoch 1  | loss: 6.33205 | train_mae: 15.03458| train_rmse: 17.80155| test_mae: 15.00564| test_rmse: 17.66862|  0:00:01s
epoch 2  | loss: 3.84964 | train_mae: 9.95161 | train_rmse: 11.63141| test_mae: 9.93508 | test_rmse: 11.59449|  0:00:01s
epoch 3  | loss: 3.11514 | train_mae: 8.85134 | train_rmse: 10.51914| test_mae: 8.76839 | test_rmse: 10.4081 |  0:00:02s
epoch 4  | loss: 2.44551 | train_mae: 8.07292 | train_rmse: 9.34721 | test_mae: 7.90293 | test_rmse: 9.16986 |  0:00:02s
epoch 5  | loss: 2.29441 | train_mae: 5.95344 | train_rmse: 7.05739 | test_mae: 5.80124 | test_rmse: 6.86128 |  0:00:03s
epoch 6  | loss: 1.94214 | train_mae: 6.05291 | train_rmse: 7.3489  | test_mae: 5.80998 | test_rmse: 7.03121 |  0:00:03s
epoch 7  | loss: 2.07131 | train_mae: 3.9985  | train_rmse: 5.09701 | test_mae: 3.92312 | test_rmse: 5.05385 |  0:00:03s
epoch 8  | loss: 1.70225 | train

In [26]:
models = {'LR':LinearRegression(),
          'DT':DecisionTreeRegressor(),
          'RF':RandomForestRegressor(),
          'GB':GradientBoostingRegressor(),
          'HGB':HistGradientBoostingRegressor()}

In [27]:
metrics = ['neg_mean_absolute_percentage_error','neg_mean_absolute_error','neg_root_mean_squared_error','explained_variance'] # accepts scikit-learn metrics

In [28]:
import warnings
warnings.filterwarnings("ignore")
output_json = 'regression_benchmarks.json'
res = run_cross_dataset_benchmark_models(models, datasets, metrics, output_json, cv=10)

100%|██████████| 2/2 [00:00<00:00, 14.98it/s]
100%|██████████| 2/2 [00:01<00:00,  1.61it/s]
100%|██████████| 2/2 [01:09<00:00, 34.51s/it]
100%|██████████| 2/2 [00:33<00:00, 16.62s/it]
100%|██████████| 2/2 [00:18<00:00,  9.40s/it]
100%|██████████| 5/5 [02:02<00:00, 24.50s/it]


In [29]:
import numpy as np
models = list(res.keys())
datasets = list(res[models[0]].keys())
metrics = list(res[models[0]][datasets[0]].keys())

print(models)
print(datasets)
print(metrics)

for dataset in datasets:
    print(f'{dataset}')
    for metric in metrics[3:4]: # assesing neg_mean_absolute_error
        print(f'\t{metric}')
        for model in models:
            print(f'\t\t{model:>35}:\t {np.abs(np.mean(res[model][dataset][metric])):.3f} \t +- {np.std(res[model][dataset][metric]):.3f}')

['LR', 'DT', 'RF', 'GB', 'HGB']
['load_spm_demagnetization_FEM', 'load_spm_demagnetization_analytical']
['fit_time', 'score_time', 'test_neg_mean_absolute_percentage_error', 'test_neg_mean_absolute_error', 'test_neg_root_mean_squared_error', 'test_explained_variance', 'memory_usage(MB)']
load_spm_demagnetization_FEM
	test_neg_mean_absolute_error
		                                 LR:	 0.631 	 +- 0.019
		                                 DT:	 0.163 	 +- 0.157
		                                 RF:	 0.185 	 +- 0.077
		                                 GB:	 0.311 	 +- 0.023
		                                HGB:	 0.225 	 +- 0.039
load_spm_demagnetization_analytical
	test_neg_mean_absolute_error
		                                 LR:	 0.812 	 +- 0.021
		                                 DT:	 0.258 	 +- 0.255
		                                 RF:	 0.192 	 +- 0.085
		                                 GB:	 0.239 	 +- 0.029
		                                HGB:	 0.159 	 +- 0.036
