In [132]:
import os
import pickle

from glob import glob
from itertools import product

import numpy as np
import pandas as pd

from interval import interval, inf

from brb.attr_input import AttributeInput
from brb.brb import csv2BRB

# Read the Rule Base and create the Expert System

In [133]:
filename =  'csv_HPO_BeliefRuleBase_wKO_v15.csv_RefVals_AntImp-1Mglobscaled.csv'
filepath = os.path.join('csv_rulebases', filename)

assert os.path.exists(filepath), "rulebase doesn't exist"

# create model from rules.csv
model = csv2BRB(filepath,
                #'csv_rulebases/csv_ML_BeliefRuleBase_v5.csv_spec_refvals*ant_imp--scaled.csv',
                #'csv_rulebases/csv_HPO_BeliefRuleBase_v11.csv_spec_refvals*ant_imp--scaled.csv',
                antecedents_prefix='A_',
                consequents_prefix='D_',
                deltas_prefix='del_')

In [134]:
model.U_names

['A_UR: quality demands',
 "A_User's programming ability",
 'A_UR: need for model transparency',
 'A_UR: Availability of a well documented library',
 'A_UR: Computer operating system',
 'A_Hardware: Number of workers/kernels for parallel computing',
 'A_Production application area',
 'A_Number of maximum function evaluations/ trials budget',
 'A_Running time per trial [s]',
 'A_Total Computing Time [s]',
 'A_Machine Learning Algorithm',
 'A_Obtainability of good approximate',
 'A_Supports parallel evaluations',
 'A_Dimensionality of HPs',
 'A_Conditional HP space',
 'A_HP datatypes',
 'A_Availability of a warm-start HP configuration',
 'A_Obtainability of gradients',
 'A_Input Data',
 'A_#Instances training dataset',
 'A_Ratio training to test dataset',
 'A_Noise in dataset',
 'A_Training Technique',
 'A_ML task',
 'A_Detailed ML task']

In [135]:
model.U[10].referential_values

['Synthetic Function',
 'Deep Neural Net',
 'Decision Tree',
 'Convolutional Neural Net',
 'Gradient Boosting Machine',
 'Multilayer Perceptron',
 'Logistic Regression',
 'XGBoost',
 'Latent Dirichlet Allocation (HPOlib)',
 'Random Forest',
 'Support Vector Machine',
 'LSTM']

# Max's results

In [136]:
dataset = 'scania'  # 'turbofan', 'scania', 'sensor
ml_task = 'Binary Classification'  # 'Regression', 'Binary Classification', 'Multiclass Classification'

file_name = 'expanded_metrics_' + dataset + '.csv'
maxr_fpath = os.path.join('max_results', file_name)

maxr = pd.read_csv(maxr_fpath, index_col=0)

maxr.columns

Index(['Trial-ID', 'HPO-library', 'HPO-method', 'ML-algorithm', 'Runs',
       'Evaluations', 'Workers', 'GPU', 'Warmstart', 'Wall clock time [s]',
       't outperform default [s]', 'Mean (final validation loss)',
       'Validation baseline', 'Area under curve (AUC)',
       'Mean (final test loss)', 'Test loss ratio (default / best)',
       'Test baseline', 'Interquartile range (final test loss)',
       't best configuration [s]', 'Generalization error',
       'Evaluations for best configuration', 'Crashes', '# training instances',
       '# training features', '# test instances', '# test features', 'dataset',
       '# cont. HPs', '# int. HPs', '# cat. HPs', 'loss_metric'],
      dtype='object')

In [137]:
maxr.head()

Unnamed: 0,Trial-ID,HPO-library,HPO-method,ML-algorithm,Runs,Evaluations,Workers,GPU,Warmstart,Wall clock time [s],...,Crashes,# training instances,# training features,# test instances,# test features,dataset,# cont. HPs,# int. HPs,# cat. HPs,loss_metric
0,e3c90f48-3ecb-426a-94d1-769dd6794e16,robo,Bohamiann,AdaBoostRegressor,5,200.0,1,False,False,415.614018,...,0,16584,17,4147,17,turbofan,1,2,1,RUL-loss
1,e3c90f48-3ecb-426a-94d1-769dd6794e16,robo,Bohamiann,AdaBoostRegressor,5,,1,False,False,40.0,...,0,16584,17,4147,17,turbofan,1,2,1,RUL-loss
2,a53df618-2580-48d5-b647-e741a08d73cc,hpbandster,BOHB,AdaBoostRegressor,10,200.0,1,False,False,415.614018,...,0,16584,17,4147,17,turbofan,1,2,1,RUL-loss
3,a53df618-2580-48d5-b647-e741a08d73cc,hpbandster,BOHB,AdaBoostRegressor,10,,1,False,False,40.0,...,0,16584,17,4147,17,turbofan,1,2,1,RUL-loss
4,65f01428-62bb-4163-b4bb-3e8aba31aa52,optuna,CMA-ES,AdaBoostRegressor,10,200.0,1,False,False,415.614018,...,0,16584,17,4147,17,turbofan,1,2,1,RUL-loss


# Input generation for the Expert System (Use Cases)

In [138]:
# Mapping between BRB and BM notation

# Map ML algorithms: BRB -> BM
# TODO: Needs to be expanded to include classification algorithms

if ml_task == 'Regression':
    
    ml_brb2bm_map = {
        'Ada Boost': 'AdaBoostRegressor',
        'Decision Tree': 'DecisionTreeRegressor',
        'Support Vector Machine': 'SVR',
        'KNN': 'KNNRegressor',
        'Gradient Boosting Machine': 'LGBMRegressor',
        'Random Forest': 'RandomForestRegressor',
        'XGBoost': 'XGBoostRegressor',
        'ElasticNet': 'ElasticNet',
        'Multilayer Perceptron': 'KerasRegressor'
    }

elif ml_task == 'Binary Classification' or ml_task == 'Multiclass Classification':

    ml_brb2bm_map = {
        'Ada Boost': 'AdaBoostClassifier',
        'Decision Tree': 'DecisionTreeClassifier',
        'Support Vector Machine': 'SVC',
        'KNN': 'KNNClassifier',
        'Gradient Boosting Machine': 'LGBMClassifier',
        'Random Forest': 'RandomForestClassifier',
        'XGBoost': 'XGBoostClassifier',
        'Logistic Regression': 'LogisticRegression',
        'Multilayer Perceptron': 'KerasClassifier',
        'NaiveBayes': 'NaiveBayes'
    }

else:
    raise Exception('Unknown ML task!')

# Map ML algorithms: BM -> BRB
ml_bm2brb_map = {v: k for k, v in ml_brb2bm_map.items()}

# Map HPO techniques: BRB -> BM
hpo_brb2bm_map = {
    'BOHAMIANN': 'Bohamiann',
    'BOHB': 'BOHB',
    'CMA-ES': 'CMA-ES',
    'FABOLAS': 'Fabolas',
    'GPBO': 'GPBO',
    'HB': 'Hyperband',
    'Random Search': 'RandomSearch',
    'SMAC': 'SMAC',
    'TPE': 'TPE',
    'Default Values': 'Default Values'
}

# Map HPO techniques: BM -> BRB
hpo_bm2brb_map = {v: k for k, v in hpo_brb2bm_map.items()}

# Map warm tart notation: BRB -> BM
wst_brb2bm_map = {'yes': True, 'no': False}

wst_bm2brb_map = {v: k for k, v in wst_brb2bm_map.items()}

# Map ML algorithms with HP data types
bmalgo2paratype_map = {
    'RandomForestRegressor': '[continuous, discrete, nominal]',
    'RandomForestClassifier': '[continuous, discrete, nominal]',
    'MLPRegressor': '[discrete, nominal]',
    'MLPClassifier': '[discrete, nominal]',
    'SVR': '[continuous, nominal]',
    'SVC': '[continuous, nominal]',
    'KerasRegressor': '[continuous, discrete, nominal]',
    'KerasClassifier': '[continuous, discrete, nominal]',
    'XGBoostClassifier': '[continuous, discrete, nominal]',
    'XGBoostRegressor': '[continuous, discrete, nominal]',
    'AdaBoostRegressor': '[continuous, discrete, nominal]',
    'AdaBoostClassifier': '[continuous, discrete, nominal]',
    'DecisionTreeRegressor': '[continuous, discrete]',
    'DecisionTreeClassifier': '[continuous, discrete]',
    'LinearRegression': '[nominal]',
    'KNNRegressor': '[discrete, nominal]',
    'KNNClassifier': '[discrete, nominal]',
    'LGBMRegressor': '[continuous, discrete]',
    'LGBMClassifier': '[continuous, discrete]',
    'LogisticRegression': '[continuous, discrete, nominal]',
    'ElasticNet': '[continuous, discrete, nominal]',
    'NaiveBayes': '[continuous]'}

# Map ML algorithms with conditional / non-conditional HPs
bmalgo2cond_map = {
    'RandomForestRegressor': 'no',
    'RandomForestClassifier': 'no',
    'MLPRegressor': 'no',
    'MLPClassifier': 'no',
    'SVR': 'no',
    'SVC': 'no',
    'KerasRegressor': 'no',
    'KerasClassifier': 'no',
    'XGBoostClassifier': 'yes',
    'XGBoostRegressor': 'yes',
    'AdaBoostRegressor': 'no',
    'AdaBoostClassifier': 'no',
    'DecisionTreeRegressor': 'no',
    'DecisionTreeClassifier': 'no',
    'LinearRegression': 'no',
    'KNNRegressor': 'no',
    'KNNClassifier': 'no',
    'LGBMRegressor': 'no',
    'LGBMClassifier': 'no',
    'LogisticRegression': 'no',
    'ElasticNet': 'no',
    'NaiveBayes': 'no'}

# Map dataset information with some (constant) antecedents
datset2constant_map = {
    'turbofan': {
        "Detailed ML task": 'Prediction of Remaining Useful Lifetime',
        "Production application area": 'Predictive Maintenance',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Regression'
        },
    'scania': {
        "Detailed ML task": 'Prediction of Part Failure',
        "Production application area": 'Predictive Maintenance',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Binary Classification'
    },
    'sensor': {
        "Detailed ML task": 'Prediction of Product Quality',
        "Production application area": 'Predictive Quality',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Multiclass Classification'
    }
}

In [139]:
# Create a DataFrame to store the use cases for the evaluation of the BRBES (each row corresponds to a single use case)
df_use_case = pd.DataFrame([])
df_use_case['Machine Learning Algorithm'] = maxr['ML-algorithm'].map(ml_bm2brb_map)
df_use_case['Hardware: Number of workers/kernels for parallel computing'] = maxr['Workers']
df_use_case['Availability of a warm-start HP configuration'] = maxr['Warmstart'].map(wst_bm2brb_map)
df_use_case['Number of maximum function evaluations/ trials budget'] = maxr['Evaluations']
# df_use_case['Running time per trial [s]'] = [interval[0, 30]] * len(maxr['ML-algorithm']) # TODO: Calculation necessary
df_use_case['Running time per trial [s]'] = maxr['Wall clock time [s]'] / maxr['Evaluations']
df_use_case['Total Computing Time [s]'] = maxr['Wall clock time [s]']
df_use_case['Dimensionality of HPs'] = maxr['# cont. HPs'] + maxr['# int. HPs'] + maxr['# cat. HPs']
df_use_case['HP datatypes'] = maxr['ML-algorithm'].map(bmalgo2paratype_map)
df_use_case['Conditional HP space'] = maxr['ML-algorithm'].map(bmalgo2cond_map)
df_use_case["Detailed ML task"] = datset2constant_map[dataset]["Detailed ML task"]
df_use_case["Production application area"] = datset2constant_map[dataset]["Production application area"]
df_use_case['Input Data'] = datset2constant_map[dataset]["Input Data"]
df_use_case['#Instances training dataset'] = maxr['# training instances']
df_use_case['Ratio training to test dataset'] = datset2constant_map[dataset]["Ratio training to test dataset"]
df_use_case['ML task'] = datset2constant_map[dataset]["ML task"]

# fixed antecedents (cannot yet be derived from the metrics .csv file)
df_use_case["UR: quality demands"] = 'high'
df_use_case["User's programming ability"] = ''
df_use_case["UR: Computer operating system"] = 'Linux'
df_use_case["Obtainability of good approximate"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Supports parallel evaluations"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Obtainability of gradients"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Noise in dataset"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Training Technique"] = "Offline"
df_use_case["UR: need for model transparency"] = ''
df_use_case["UR: Availability of a well documented library"] = ''


# TODO: Iterate over quality demands, robustness, UR antecedents, etc. to create new use cases

In [140]:
# Remove duplicate use cases
col_subset = df_use_case.columns
col_subset = col_subset[:-7]
df_use_case.drop_duplicates(subset=col_subset, inplace=True, ignore_index=True)

In [141]:
df_use_case.head()

Unnamed: 0,Machine Learning Algorithm,Hardware: Number of workers/kernels for parallel computing,Availability of a warm-start HP configuration,Number of maximum function evaluations/ trials budget,Running time per trial [s],Total Computing Time [s],Dimensionality of HPs,HP datatypes,Conditional HP space,Detailed ML task,...,UR: quality demands,User's programming ability,UR: Computer operating system,Obtainability of good approximate,Supports parallel evaluations,Obtainability of gradients,Noise in dataset,Training Technique,UR: need for model transparency,UR: Availability of a well documented library
0,Ada Boost,1,no,200.0,2.07807,415.614018,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,high,,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline,,
1,Ada Boost,1,no,,,40.0,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,high,,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline,,
2,Ada Boost,1,yes,200.0,2.090257,418.051303,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,high,,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline,,
3,Ada Boost,1,yes,,,10.0,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,high,,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline,,
4,Ada Boost,1,yes,,,80.0,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,high,,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline,,


# Run the BRBES for the BM use cases

In [142]:
# Run the brb model for the use cases from the BM study on compute the beliefs and rankings of the HPO techniques

df_belief = pd.DataFrame([])
df_rank = pd.DataFrame([])

for idx, row in df_use_case.iterrows():
    
    use_case_dict = row.to_dict()
    use_case_dict = {'A_' + k: v for k, v in use_case_dict.items()}

    X = AttributeInput(use_case_dict)

    # run brb model
    belief_degrees = model.run(X)
    belief_degrees = {k[2:]: v for k, v in zip(model.D, belief_degrees)}

    # append results
    df_belief = df_belief.append(belief_degrees, ignore_index=True)
    
    hpo_beliefs = {v: k for k, v in belief_degrees.items()}
    hpo_beliefs = sorted(hpo_beliefs.items(), reverse=True)

    # BEWARE: THE RANK IS 0-BASED
    hpo_ranks = {hpo_belief[-1]: i for i, hpo_belief in enumerate(hpo_beliefs)}
    
    df_rank = df_rank.append(hpo_ranks, ignore_index=True)

df_use_case.to_csv('use_cases.csv')
df_belief.to_csv('hpo_beliefs.csv')
df_rank.to_csv('hpo_ranks.csv')


In [143]:
df_belief.head()

Unnamed: 0,ASHA,BOHAMIANN,BOHB,CMA-ES,DNGO,Default Values,FABOLAS,GPBO,Grid Search,HB,HB-LCNet,HOAG,MTBO,PBT,Random Search,SHA,SMAC,TPE
0,0.051417,0.018876,0.138582,0.04205,0.008856,0.064043,0.019548,0.072556,0.015336,0.142985,0.028152,0.007255,0.005377,0.056019,0.05376,0.072904,0.11793,0.07169
1,0.05506,0.018346,0.136457,0.03597,0.007344,0.104301,0.032509,0.063428,0.015297,0.158395,0.031254,0.006406,0.001878,0.051676,0.042812,0.078816,0.097167,0.048965
2,0.050827,0.020221,0.138673,0.043166,0.008757,0.063304,0.019327,0.07336,0.015163,0.143025,0.027833,0.007174,0.005316,0.055375,0.053142,0.072057,0.118252,0.072503
3,0.054804,0.019988,0.141474,0.037561,0.007311,0.103806,0.03068,0.062865,0.015227,0.1577,0.03111,0.006377,0.00187,0.051436,0.040586,0.078445,0.09643,0.048473
4,0.055383,0.020195,0.139249,0.037953,0.007386,0.087795,0.022565,0.067707,0.015385,0.161372,0.031435,0.006443,0.001889,0.051978,0.045097,0.079285,0.101786,0.0531


In [144]:
df_belief.idxmax(axis='columns')

0       HB
1       HB
2       HB
3       HB
4       HB
5       HB
6       HB
7       HB
8       HB
9       HB
10      HB
11      HB
12      HB
13      HB
14      HB
15      HB
16      HB
17      HB
18      HB
19      HB
20      HB
21      HB
22      HB
23      HB
24      HB
25      HB
26      HB
27      HB
28      HB
29      HB
30      HB
31      HB
32      HB
33      HB
34      HB
35      HB
36      HB
37    BOHB
38      HB
39    BOHB
40      HB
41      HB
42      HB
43    GPBO
44    GPBO
45    GPBO
46    GPBO
47    GPBO
48    GPBO
49    GPBO
dtype: object

# Translation to max results

In [145]:
use_test_loss = False

brb_scores = list()  # Stores the scaled score achieved by the BRBES in each use case
rs_scores = list()  # Stores the scaled score achieved by RS in each use case
dv_scores = list()  # Stores the scaled score achieved by the Default HPs in each use case

summary_df = df_use_case.copy(deep=True)

for idx, use_case in df_use_case.iterrows():

    # Identify the experiments (from the benchmarking study), that correspond to this specific use case
    exp = maxr.loc[(maxr['ML-algorithm'] == ml_brb2bm_map[use_case['Machine Learning Algorithm']]) &
        (maxr['Workers'] == use_case['Hardware: Number of workers/kernels for parallel computing']) &
        (maxr['Warmstart'] == wst_brb2bm_map[use_case['Availability of a warm-start HP configuration']]) &
        (maxr['Wall clock time [s]'] == use_case['Total Computing Time [s]']), :] 
        # TODO: Specify additional antecedent values here  -> e.g. quality demands, UR antecedents, robustness, etc.(Be careful: e.g. a 'high' robustness value should also be selected, if  only a 'low' robustness is required)

    if len(exp) == 0:
        continue
    
    if use_test_loss:  # If available, use the test loss for the ranking of HPO techniques
    
        # Check whether the test loss is available for this experiment
        if exp['Mean (final test loss)'].isnull().any():
            
            # If not -> rank the HPO techniques based on the validation loss
            hpos = exp.set_index('HPO-method')['Mean (final validation loss)'] 
            hpos.loc['Default Values'] = np.nanmean(exp['Validation baseline'])

        else: 
            
            # If available -> rank the HPO techniques based on the test loss
            hpos = exp.set_index('HPO-method')['Mean (final test loss)'] 
            hpos.loc['Default Values'] = np.nanmean(exp['Test baseline'])
    
    else:  # Always use the validation loss

        hpos = exp.set_index('HPO-method')['Mean (final validation loss)'] 
        hpos.loc['Default Values'] = np.nanmean(exp['Validation baseline'])

    # Compute the scaled loss deviation for each HPO technique in this experiment
    loss_arr = hpos.to_numpy()
    min_value = np.nanmin(loss_arr)
    max_value = np.nanmax(loss_arr[loss_arr != np.inf])
    scaled_hpos = (hpos - min_value) / (max_value - min_value)

    rec_hpo = df_belief.iloc[idx].idxmax(axis='columns')

    brb_scores.append(scaled_hpos.loc[hpo_brb2bm_map[rec_hpo]])
    rs_scores.append(scaled_hpos.loc['RandomSearch'])
    dv_scores.append(scaled_hpos.loc['Default Values'])

    summary_df.loc[idx, 'BRBES Recommendation'] = hpo_brb2bm_map[rec_hpo]
    summary_df.loc[idx, 'Best HPO Technique'] = scaled_hpos.idxmin(axis=0)
    summary_df.loc[idx, 'Distance Value'] = scaled_hpos.loc[hpo_brb2bm_map[rec_hpo]]

print('BRBES avg. score (stdev): \t\t{:.3f} ({:.2f})'.format(np.mean(brb_scores), np.std(brb_scores)))
print('RandomSearch avg. score (stdev): \t{:.3f} ({:.2f})'.format(np.mean(rs_scores), np.std(rs_scores)))
print('Default Values avg. score (stdev): \t{:.3f} ({:.2f})'.format(np.mean(dv_scores), np.std(dv_scores)))

if use_test_loss:
    summary_fname = dataset + '_brbes_results_val_and_test_loss.csv'
else:
    summary_fname = dataset + '_brbes_results_val_loss_only.csv'

summary_df.to_csv(os.path.join('./max_results', summary_fname))

BRBES avg. score (stdev): 		0.155 (0.23)
RandomSearch avg. score (stdev): 	0.171 (0.24)
Default Values avg. score (stdev): 	0.739 (0.39)
