In [1]:
from download_delgado.delgado_datasets import DownloadAndConvertDelgadoDatasets
from mlaut.data import Data
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.experiments import Orchestrator
from mlaut.analyze_results import AnalyseResults
from download_delgado.delgado_datasets import DownloadAndConvertDelgadoDatasets


  from ._conv import register_converters as _register_converters


### Step 1: Download the datasets

In [2]:
delgado = DownloadAndConvertDelgadoDatasets()
datasets, metadata = delgado.download_and_extract_datasets(verbose = False)

Error: Dataset Delgado_data/molec-biol-protein-second has a different number of arff files


### Step 2: Define Input and Output HDF5 objects

In [3]:
data = Data()
input_io = data.open_hdf5('data/delgado.hdf5', mode='a')
out_io = data.open_hdf5('data/classification.hdf5', mode='a')

### Step 3: Save the datasets in HDF5 database

In [4]:
# files io object needs to be used in test orch
# returned object needs to be integrated in code

data.pandas_to_db(save_loc_hdf5='delgado_datasets/', datasets=datasets, 
                  dts_metadata=metadata, input_io=input_io)

### Step 4: Split datasets

In [5]:
dts_names_list, dts_names_list_full_path = data.list_datasets(hdf5_io=input_io, hdf5_group='delgado_datasets/')
split_dts_list = data.split_datasets(hdf5_in=input_io, hdf5_out=out_io, dataset_paths=dts_names_list_full_path)

### Step 5: Instantiate estimator objects and the experiments orchestrator class.

In [6]:
instantiated_models = instantiate_default_estimators(estimators=['SVC'])

orchest = Orchestrator(hdf5_input_io=input_io, hdf5_output_io=out_io, dts_names=dts_names_list[0:2],
                 original_datasets_group_h5_path='delgado_datasets/')

### Step 6: Run the experiments

In [7]:
orchest.run(modelling_strategies=instantiated_models)

** Training estimator: SVC on dataset: abalone. Datasets processed: 1/2 **
** Training estimator: SVC on dataset: acute_inflammation. Datasets processed: 2/2 **


### Step 7: Make predictions on the test sets

In [8]:
orchest.predict_all(trained_models_dir='data/trained_models', estimators=instantiated_models)

KeyError: 'Unable to open object (component not found)'

### Step 8: Analyze the results

In [None]:
analyze = AnalyseResults(hdf5_output_io=out_io, 
                         hdf5_input_io=input_io,
                         input_h5_original_datasets_group='delgado_datasets/', 
                         output_h5_predictions_group='experiments/predictions/')
error_all_datasets = analyze.calculate_error_all_datasets(metric='mean_squared_error')
# observations = analyze.calculate_loss_all_datasets(input_h5_original_datasets_group='delgado_datasets/', 
#                                     output_h5_predictions_group='experiments/predictions/', 
#                                     metric='mean_squared_error')

#### t-test

In [None]:
t_test, t_test_df = analyze.t_test(error_all_datasets)
t_test_df

#### sign test

In [None]:
sign_test, sign_test_df = analyze.sign_test(error_all_datasets)
sign_test_df

#### t-test with bonferroni correction

In [None]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(error_all_datasets)
t_test_bonferroni_df

#### Wilcoxon test

In [None]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(error_all_datasets)
wilcoxon_test_df

#### Friedman test

In [None]:
friedman_test, friedman_test_df = analyze.friedman_test(error_all_datasets)
friedman_test_df

#### Nemenyi test

In [None]:
nemeniy_test = analyze.nemenyi(error_all_datasets)
nemeniy_test

In [None]:
nemeniy_test = analyze.nemenyi(error_all_datasets)
nemeniy_test