# Libraries 

In [1]:
from util_machine_learning import ensemble_design_generalization, multi_objective_design_generalization, expansion_design_generalization, likelihood_design_generalization
from util_machine_learning import ensemble_design_cross_validation, multi_objective_design_cross_validation, expansion_design_cross_validation, likelihood_design_cross_validation
from util_metrics import table_metrics
import pandas as pd
import numpy as np

# Dataset and configuration 

In [2]:
data = pd.read_csv('Data/dataset_odeint_3_inertial.csv')
number_of_trees = 15 

# Generalization 
##  Train and test designs

In [3]:
result_ensemble = ensemble_design_generalization(data, number_of_trees)
labels_train_mo, train_predictions_mo, labels_test_mo, test_predictions_mo, list_labels_mo = multi_objective_design_generalization(data, number_of_trees)
results_mo = [labels_train_mo, train_predictions_mo, labels_test_mo, test_predictions_mo, list_labels_mo]
result_expansion = expansion_design_generalization(data, number_of_trees)
result_likelihood = likelihood_design_generalization(data, number_of_trees)

## Metrics 

In [4]:
table_metrics(result_ensemble, result_expansion, result_likelihood, results_mo)

Test Set Metrics:
+------------------------+----------+---------------------+---------------------+
|         Model          | Accuracy |         MAE         |        RMSE         |
+------------------------+----------+---------------------+---------------------+
|     Ensemble Mean      |  0.4175  |       0.06825       | 0.10072652745594544 |
|    Ensemble Voting     |  0.8295  |       0.1705        | 0.4129164564412516  |
|  Multi-objective Mean  |  0.417   | 0.06833333333333332 | 0.10065756029451758 |
| Multi-objective Voting |  0.8325  |       0.1675        | 0.40926763859362253 |
|       Expansion        |   0.0    | 0.29641666666666666 | 0.29641666666666666 |
|       Likelihood       |  917.0   | 0.06590555555555555 | 0.09408674090866795 |
+------------------------+----------+---------------------+---------------------+
Train Set Metrics:
+------------------------+----------+-----------------------+----------------------+
|         Model          | Accuracy |          MAE        

# Cross validation 
## Train and test designs

In [3]:
result_ensemble = ensemble_design_cross_validation(data, number_of_trees)
result_mo, list_labels_mo = multi_objective_design_cross_validation(data, number_of_trees)
result_expansion = expansion_design_cross_validation(data, number_of_trees)
result_likelihood = likelihood_design_cross_validation(data, number_of_trees)

## Metrics

In [4]:
from util_metrics import calculate_overall_metrics, process_ensemble_multi_objective

In [6]:


print('------------------OVERALL METRICS - EXPANSION----------------------------------')
print('Train Metrics:')
calculate_overall_metrics(result_expansion[result_expansion['Set'] == 'Train']['Label'].tolist(),
                          result_expansion[result_expansion['Set'] == 'Train']['Predicted'].tolist())
print('Test Metrics:')
calculate_overall_metrics(result_expansion[result_expansion['Set'] == 'Test']['Label'].tolist(),
                          result_expansion[result_expansion['Set'] == 'Test']['Predicted'].tolist())

print('------------------OVERALL METRICS - LIKELIHOOD----------------------------------')
print('Train Metrics:')
calculate_overall_metrics(result_likelihood[result_likelihood['Set'] == 'Train']['Label'].tolist(),
                          result_likelihood[result_likelihood['Set'] == 'Train']['Predicted'].tolist(),
                          binary_classification=False)
print('Test Metrics:')
calculate_overall_metrics(result_likelihood[result_likelihood['Set'] == 'Test']['Label'].tolist(),
                          result_likelihood[result_likelihood['Set'] == 'Test']['Predicted'].tolist(),
                          binary_classification=False)

# Calcular las métricas para los conjuntos de prueba y entrenamiento por diseño
print('------------------OVERALL METRICS - ENSEMBLE----------------------------------')
process_ensemble_multi_objective([result_ensemble], 'Ensemble')

print('------------------OVERALL METRICS - MULTI-OBJECTIVE----------------------------------')
process_ensemble_multi_objective(result_mo, 'Multi-objective', binary_classification=False)

print('------------------OVERALL METRICS - EXPANSION----------------------------------')
calculate_overall_metrics(np.vstack(result_expansion[result_expansion['Set'] == 'Train']['Label'].tolist()).flatten(),
                          np.vstack(result_expansion[result_expansion['Set'] == 'Train']['Predicted'].tolist()).flatten())
calculate_overall_metrics(np.vstack(result_expansion[result_expansion['Set'] == 'Test']['Label'].tolist()).flatten(),
                          np.vstack(result_expansion[result_expansion['Set'] == 'Test']['Predicted'].tolist()).flatten())

print('------------------OVERALL METRICS - LIKELIHOOD----------------------------------')
calculate_overall_metrics(np.vstack(result_likelihood[result_likelihood['Set'] == 'Train']['Label'].tolist()).flatten(),
                          np.vstack(result_likelihood[result_likelihood['Set'] == 'Train']['Predicted'].tolist()).flatten(),
                          binary_classification=False)
calculate_overall_metrics(np.vstack(result_likelihood[result_likelihood['Set'] == 'Test']['Label'].tolist()).flatten(),
                          np.vstack(result_likelihood[result_likelihood['Set'] == 'Test']['Predicted'].tolist()).flatten(),
                          binary_classification=False)


------------------OVERALL METRICS - EXPANSION----------------------------------
Train Metrics:
Accuracy 0.6985645833333333
MAE 0.3014354166666666
RMSE 0.5490310530144773
Test Metrics:
Accuracy 0.6115833333333334
MAE 0.38841666666666674
RMSE 0.6232277230134029
------------------OVERALL METRICS - LIKELIHOOD----------------------------------
Train Metrics:
Accuracy 0.934175
MAE 0.018561527777777802
RMSE 0.02730790746697297
Test Metrics:
Accuracy 0.6251
MAE 0.049158333333333346
RMSE 0.0688857581972901
------------------OVERALL METRICS - EXPANSION----------------------------------


InvalidParameterError: The 'y_true' parameter of accuracy_score must be an array-like or a sparse matrix. Got 1 instead.

In [7]:
result_ensemble.columns

Index(['Fold', 'Label name', 'Label', 'Predicted', 'Set',
       'Feature importance'],
      dtype='object')

In [8]:
result_mo

[{'Fold': 1,
  'Labels_train':       RP_dynamical_threshold  RP_mach_number  RP_acoustic_emissions  \
  1                          1               0                      1   
  2                          0               1                      0   
  4                          0               1                      0   
  5                          0               1                      0   
  6                          0               1                      0   
  ...                      ...             ...                    ...   
  9994                       0               1                      0   
  9995                       0               1                      0   
  9996                       0               1                      1   
  9997                       0               1                      1   
  9999                       0               1                      1   
  
        RP_expansion_radius  KM_dynamical_threshold  KM_mach_number  \
  1                  