In [1]:
import os
import pickle

from glob import glob
from itertools import product

import numpy as np
import pandas as pd

from interval import interval, inf

from brb.attr_input import AttributeInput
from brb.brb import csv2BRB

# Read the Rule Base and create the Expert System

In [2]:
filename =  'csv_HPO_BeliefRuleBase_wKO_v16.csv_RefVals_AntImp-1Mglobscaled.csv'
filepath = os.path.join('csv_rulebases', filename)

assert os.path.exists(filepath), "rulebase doesn't exist"

# create model from rules.csv
model = csv2BRB(filepath,
                #'csv_rulebases/csv_ML_BeliefRuleBase_v5.csv_spec_refvals*ant_imp--scaled.csv',
                #'csv_rulebases/csv_HPO_BeliefRuleBase_v11.csv_spec_refvals*ant_imp--scaled.csv',
                antecedents_prefix='A_',
                consequents_prefix='D_',
                deltas_prefix='del_')

In [None]:
model.U_names

In [None]:
model.U[10].referential_values

# Max's results

In [3]:
dataset = 'turbofan'  # 'turbofan', 'scania', 'sensor
ml_task = 'Regression'  # 'Regression', 'Binary Classification', 'Multiclass Classification'

file_name = 'expanded_metrics_' + dataset + '.csv'
maxr_fpath = os.path.join('max_results', file_name)

maxr = pd.read_csv(maxr_fpath, index_col=0)

maxr.columns

Index(['Trial-ID', 'HPO-library', 'HPO-method', 'ML-algorithm', 'Runs',
       'Evaluations', 'Workers', 'GPU', 'Warmstart', 'Wall clock time [s]',
       't outperform default [s]', 'Mean (final validation loss)',
       'Validation baseline', 'Area under curve (AUC)',
       'Mean (final test loss)', 'Test loss ratio (default / best)',
       'Test baseline', 'Interquartile range (final test loss)',
       't best configuration [s]', 'Generalization error',
       'Evaluations for best configuration', 'Crashes', '# training instances',
       '# training features', '# test instances', '# test features', 'dataset',
       '# cont. HPs', '# int. HPs', '# cat. HPs', 'loss_metric', 'Robustness',
       'UR: Availability of a well documented library',
       'UR: Need for model transparency', 'User's programming ability'],
      dtype='object')

In [None]:
maxr.head()

# Input generation for the Expert System (Use Cases)

In [4]:
# Mapping between BRB and BM notation

# Map ML algorithms: BRB -> BM
# TODO: Needs to be expanded to include classification algorithms

if ml_task == 'Regression':
    
    ml_brb2bm_map = {
        'Ada Boost': 'AdaBoostRegressor',
        'Decision Tree': 'DecisionTreeRegressor',
        'Support Vector Machine': 'SVR',
        'KNN': 'KNNRegressor',
        'Gradient Boosting Machine': 'LGBMRegressor',
        'Random Forest': 'RandomForestRegressor',
        'XGBoost': 'XGBoostRegressor',
        'ElasticNet': 'ElasticNet',
        'Multilayer Perceptron': 'KerasRegressor'
    }

elif ml_task == 'Binary Classification' or ml_task == 'Multiclass Classification':

    ml_brb2bm_map = {
        'Ada Boost': 'AdaBoostClassifier',
        'Decision Tree': 'DecisionTreeClassifier',
        'Support Vector Machine': 'SVC',
        'KNN': 'KNNClassifier',
        'Gradient Boosting Machine': 'LGBMClassifier',
        'Random Forest': 'RandomForestClassifier',
        'XGBoost': 'XGBoostClassifier',
        'Logistic Regression': 'LogisticRegression',
        'Multilayer Perceptron': 'KerasClassifier',
        'NaiveBayes': 'NaiveBayes'
    }

else:
    raise Exception('Unknown ML task!')

# Map ML algorithms: BM -> BRB
ml_bm2brb_map = {v: k for k, v in ml_brb2bm_map.items()}

# Map HPO techniques: BRB -> BM
hpo_brb2bm_map = {
    'BOHAMIANN': 'Bohamiann',
    'BOHB': 'BOHB',
    'CMA-ES': 'CMA-ES',
    'FABOLAS': 'Fabolas',
    'GPBO': 'GPBO',
    'HB': 'Hyperband',
    'Random Search': 'RandomSearch',
    'SMAC': 'SMAC',
    'TPE': 'TPE',
    'Default Values': 'Default Values'
}

# Map HPO techniques: BM -> BRB
hpo_bm2brb_map = {v: k for k, v in hpo_brb2bm_map.items()}

# Map warm tart notation: BRB -> BM
wst_brb2bm_map = {'yes': True, 'no': False}

wst_bm2brb_map = {v: k for k, v in wst_brb2bm_map.items()}

# Map ML algorithms with HP data types
bmalgo2paratype_map = {
    'RandomForestRegressor': '[continuous, discrete, nominal]',
    'RandomForestClassifier': '[continuous, discrete, nominal]',
    'MLPRegressor': '[discrete, nominal]',
    'MLPClassifier': '[discrete, nominal]',
    'SVR': '[continuous, nominal]',
    'SVC': '[continuous, nominal]',
    'KerasRegressor': '[continuous, discrete, nominal]',
    'KerasClassifier': '[continuous, discrete, nominal]',
    'XGBoostClassifier': '[continuous, discrete, nominal]',
    'XGBoostRegressor': '[continuous, discrete, nominal]',
    'AdaBoostRegressor': '[continuous, discrete, nominal]',
    'AdaBoostClassifier': '[continuous, discrete, nominal]',
    'DecisionTreeRegressor': '[continuous, discrete]',
    'DecisionTreeClassifier': '[continuous, discrete]',
    'LinearRegression': '[nominal]',
    'KNNRegressor': '[discrete, nominal]',
    'KNNClassifier': '[discrete, nominal]',
    'LGBMRegressor': '[continuous, discrete]',
    'LGBMClassifier': '[continuous, discrete]',
    'LogisticRegression': '[continuous, discrete, nominal]',
    'ElasticNet': '[continuous, discrete, nominal]',
    'NaiveBayes': '[continuous]'}

# Map ML algorithms with conditional / non-conditional HPs
bmalgo2cond_map = {
    'RandomForestRegressor': 'no',
    'RandomForestClassifier': 'no',
    'MLPRegressor': 'no',
    'MLPClassifier': 'no',
    'SVR': 'no',
    'SVC': 'no',
    'KerasRegressor': 'no',
    'KerasClassifier': 'no',
    'XGBoostClassifier': 'yes',
    'XGBoostRegressor': 'yes',
    'AdaBoostRegressor': 'no',
    'AdaBoostClassifier': 'no',
    'DecisionTreeRegressor': 'no',
    'DecisionTreeClassifier': 'no',
    'LinearRegression': 'no',
    'KNNRegressor': 'no',
    'KNNClassifier': 'no',
    'LGBMRegressor': 'no',
    'LGBMClassifier': 'no',
    'LogisticRegression': 'no',
    'ElasticNet': 'no',
    'NaiveBayes': 'no'}

# Map dataset information with some (constant) antecedents
datset2constant_map = {
    'turbofan': {
        "Detailed ML task": 'Prediction of Remaining Useful Lifetime',
        "Production application area": 'Predictive Maintenance',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Regression'
        },
    'scania': {
        "Detailed ML task": 'Prediction of Part Failure',
        "Production application area": 'Predictive Maintenance',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Binary Classification'
    },
    'sensor': {
        "Detailed ML task": 'Prediction of Product Quality',
        "Production application area": 'Predictive Quality',
        "Input Data": 'Tabular Data',
        "Ratio training to test dataset": 4,
        "ML task": 'Multiclass Classification'
    }
}

In [5]:
# Create a DataFrame to store the use cases for the evaluation of the BRBES (each row corresponds to a single use case)
df_use_case = pd.DataFrame([])
df_use_case['Machine Learning Algorithm'] = maxr['ML-algorithm'].map(ml_bm2brb_map)
df_use_case['Hardware: Number of workers/kernels for parallel computing'] = maxr['Workers']
df_use_case['Availability of a warm-start HP configuration'] = maxr['Warmstart'].map(wst_bm2brb_map)
df_use_case['Number of maximum function evaluations/ trials budget'] = maxr['Evaluations']
# df_use_case['Running time per trial [s]'] = [interval[0, 30]] * len(maxr['ML-algorithm']) # TODO: Calculation necessary
df_use_case['Running time per trial [s]'] = maxr['Wall clock time [s]'] / maxr['Evaluations']
df_use_case['Total Computing Time [s]'] = maxr['Wall clock time [s]']
df_use_case['Dimensionality of HPs'] = maxr['# cont. HPs'] + maxr['# int. HPs'] + maxr['# cat. HPs']
df_use_case['HP datatypes'] = maxr['ML-algorithm'].map(bmalgo2paratype_map)
df_use_case['Conditional HP space'] = maxr['ML-algorithm'].map(bmalgo2cond_map)
df_use_case["Detailed ML task"] = datset2constant_map[dataset]["Detailed ML task"]
df_use_case["Production application area"] = datset2constant_map[dataset]["Production application area"]
df_use_case['Input Data'] = datset2constant_map[dataset]["Input Data"]
df_use_case['#Instances training dataset'] = maxr['# training instances']
df_use_case['Ratio training to test dataset'] = datset2constant_map[dataset]["Ratio training to test dataset"]
df_use_case['ML task'] = datset2constant_map[dataset]["ML task"]
df_use_case["UR: need for model transparency"] = maxr["UR: Need for model transparency"]
df_use_case["UR: Availability of a well documented library"] = maxr["UR: Availability of a well documented library"]
df_use_case["User's programming ability"] = maxr["User's programming ability"]

# fixed antecedents (cannot yet be derived from the metrics .csv file)
df_use_case["UR: quality demands"] = 'high'  # TODO: Keep this antecedent fixed?
df_use_case["UR: Computer operating system"] = 'Linux'
df_use_case["Obtainability of good approximate"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Supports parallel evaluations"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Obtainability of gradients"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Noise in dataset"] = [{'yes':0.5, 'no':0.5}] * len(maxr['ML-algorithm'])
df_use_case["Training Technique"] = "Offline"

# 'Robustness' antecedent is not available for the current version of the rule base
# df_use_case['Robustness'] = maxr['Robustness']


In [6]:
# Columns with dict entries cause problems when identifying duplicates (not hashable)
dict_cols = {'Obtainability of good approximate', 'Supports parallel evaluations', 'Obtainability of gradients', 'Noise in dataset'}
non_dict_cols = set(df_use_case.columns) - dict_cols

# Remove duplicate use cases (Each use case is contained x times, where x is the number of HPO techniques applied on this use case)
df_use_case.drop_duplicates(subset=non_dict_cols, inplace=True, ignore_index=True)

In [7]:
df_use_case.head()

Unnamed: 0,Machine Learning Algorithm,Hardware: Number of workers/kernels for parallel computing,Availability of a warm-start HP configuration,Number of maximum function evaluations/ trials budget,Running time per trial [s],Total Computing Time [s],Dimensionality of HPs,HP datatypes,Conditional HP space,Detailed ML task,...,UR: need for model transparency,UR: Availability of a well documented library,User's programming ability,UR: quality demands,UR: Computer operating system,Obtainability of good approximate,Supports parallel evaluations,Obtainability of gradients,Noise in dataset,Training Technique
0,Ada Boost,1,no,200.0,2.07807,415.614018,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,no,no,medium,high,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline
1,Ada Boost,1,no,,,40.0,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,no,no,medium,high,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline
2,Ada Boost,1,no,200.0,2.07807,415.614018,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,no,no,high,high,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline
3,Ada Boost,1,no,,,40.0,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,no,no,high,high,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline
4,Ada Boost,1,no,200.0,2.07807,415.614018,4,"[continuous, discrete, nominal]",no,Prediction of Remaining Useful Lifetime,...,no,yes,medium,high,Linux,"{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}","{'yes': 0.5, 'no': 0.5}",Offline


# Run the BRBES for the BM use cases

In [8]:
# Run the brb model for the use cases from the BM study on compute the beliefs and rankings of the HPO techniques

df_belief = pd.DataFrame([])
df_rank = pd.DataFrame([])

for idx, row in df_use_case.iterrows():
    
    use_case_dict = row.to_dict()
    use_case_dict = {'A_' + k: v for k, v in use_case_dict.items()}

    X = AttributeInput(use_case_dict)

    # run brb model
    belief_degrees = model.run(X)
    belief_degrees = {k[2:]: v for k, v in zip(model.D, belief_degrees)}

    # append results
    df_belief = df_belief.append(belief_degrees, ignore_index=True)
    
    hpo_beliefs = {v: k for k, v in belief_degrees.items()}
    hpo_beliefs = sorted(hpo_beliefs.items(), reverse=True)

    # BEWARE: THE RANK IS 0-BASED
    hpo_ranks = {hpo_belief[-1]: i for i, hpo_belief in enumerate(hpo_beliefs)}
    
    df_rank = df_rank.append(hpo_ranks, ignore_index=True)

df_use_case.to_csv('use_cases.csv')
df_belief.to_csv('hpo_beliefs.csv')
df_rank.to_csv('hpo_ranks.csv')


In [54]:
df_belief.head()

Unnamed: 0,ASHA,BOHAMIANN,BOHB,CMA-ES,DNGO,Default Values,FABOLAS,GPBO,Grid Search,HB,HB-LCNet,HOAG,MTBO,PBT,Random Search,SHA,SMAC,TPE
0,0.049629,0.018307,0.13588,0.044356,0.008615,0.058729,0.018883,0.075486,0.014942,0.138979,0.027187,0.007015,0.0052,0.057323,0.060831,0.070711,0.120655,0.075036
1,0.053025,0.017763,0.133739,0.038233,0.007142,0.096893,0.031323,0.067044,0.014882,0.153554,0.030114,0.006179,0.001814,0.053319,0.050949,0.076279,0.100993,0.053334
2,0.049861,0.01839,0.137677,0.044646,0.008654,0.058476,0.020235,0.075206,0.015025,0.139375,0.028588,0.007971,0.005224,0.05753,0.053816,0.070998,0.120895,0.075144
3,0.053292,0.01785,0.135686,0.038516,0.007177,0.096788,0.032887,0.06669,0.014971,0.154053,0.031669,0.007222,0.001822,0.053521,0.043269,0.076616,0.101145,0.053338
4,0.046065,0.017017,0.134681,0.049092,0.00801,0.061992,0.017547,0.077629,0.021576,0.13755,0.025256,0.006524,0.004835,0.053198,0.063653,0.065708,0.12042,0.077865


In [9]:
df_belief.idxmax(axis='columns')

0      HB
1      HB
2      HB
3      HB
4      HB
       ..
595    HB
596    HB
597    HB
598    HB
599    HB
Length: 600, dtype: object

# Translation to max results

In [18]:
scaled_hpos.keys()


Index(['CMA-ES', 'GPBO', 'RandomSearch', 'SMAC', 'TPE', 'Default Values'], dtype='object', name='HPO-method')

In [19]:
use_test_loss = False

brb_scores = list()  # Stores the scaled score achieved by the BRBES in each use case
rs_scores = list()  # Stores the scaled score achieved by RS in each use case
dv_scores = list()  # Stores the scaled score achieved by the Default HPs in each use case

summary_df = df_use_case.copy(deep=True)

for idx, use_case in df_use_case.iterrows():

    # Identify the experiments (from the benchmarking study), that correspond to this specific use case
    exp = maxr.loc[(maxr['ML-algorithm'] == ml_brb2bm_map[use_case['Machine Learning Algorithm']]) &
        (maxr['Workers'] == use_case['Hardware: Number of workers/kernels for parallel computing']) &
        (maxr['Warmstart'] == wst_brb2bm_map[use_case['Availability of a warm-start HP configuration']]) &
        (maxr['Wall clock time [s]'] == use_case['Total Computing Time [s]']) &
        (maxr["User's programming ability"] == use_case["User's programming ability"]) &
        (maxr['UR: Need for model transparency'] == use_case['UR: need for model transparency']) &
        (maxr['UR: Availability of a well documented library'] == use_case['UR: Availability of a well documented library']) &
        (maxr['Robustness'] == 'low'), :] 
        # TODO: Specify additional antecedent values here  -> i.e. Quality Demands, Robustness

    if len(exp) == 0:
        continue
    
    if use_test_loss:  # If available, use the test loss for the ranking of HPO techniques
    
        # Check whether the test loss is available for this experiment
        if exp['Mean (final test loss)'].isnull().any():
            
            # If not -> rank the HPO techniques based on the validation loss
            hpos = exp.set_index('HPO-method')['Mean (final validation loss)'] 
            hpos.loc['Default Values'] = np.nanmean(exp['Validation baseline'])

        else: 
            
            # If available -> rank the HPO techniques based on the test loss
            hpos = exp.set_index('HPO-method')['Mean (final test loss)'] 
            hpos.loc['Default Values'] = np.nanmean(exp['Test baseline'])
    
    else:  # Always use the validation loss

        hpos = exp.set_index('HPO-method')['Mean (final validation loss)'] 
        hpos.loc['Default Values'] = np.nanmean(exp['Validation baseline'])

    # Compute the scaled loss deviation for each HPO technique in this experiment
    loss_arr = hpos.to_numpy()
    min_value = np.nanmin(loss_arr)
    max_value = np.nanmax(loss_arr[loss_arr != np.inf])
    scaled_hpos = (hpos - min_value) / (max_value - min_value)

    rec_hpo = df_belief.iloc[idx].idxmax(axis='columns')

    # TODO: Continue here
    if hpo_brb2bm_map[rec_hpo] not in scaled_hpos.keys():
        brb_scores.append(np.nan)
    else:
        brb_scores.append(scaled_hpos.loc[hpo_brb2bm_map[rec_hpo]])

    rs_scores.append(scaled_hpos.loc['RandomSearch'])
    dv_scores.append(scaled_hpos.loc['Default Values'])

    summary_df.loc[idx, 'BRBES Recommendation'] = hpo_brb2bm_map[rec_hpo]
    summary_df.loc[idx, 'Best HPO Technique'] = scaled_hpos.idxmin(axis=0)
    summary_df.loc[idx, 'Distance Value'] = scaled_hpos.loc[hpo_brb2bm_map[rec_hpo]]

print('BRBES avg. score (stdev): \t\t{:.3f} ({:.2f})'.format(np.nanmean(brb_scores), np.nanstd(brb_scores)))
print('RandomSearch avg. score (stdev): \t{:.3f} ({:.2f})'.format(np.nanmean(rs_scores), np.nanstd(rs_scores)))
print('Default Values avg. score (stdev): \t{:.3f} ({:.2f})'.format(np.nanmean(dv_scores), np.nanstd(dv_scores)))

if use_test_loss:
    summary_fname = dataset + '_brbes_results_val_and_test_loss.csv'
else:
    summary_fname = dataset + '_brbes_results_val_loss_only.csv'

summary_df.to_csv(os.path.join('./max_results', summary_fname))

KeyError: 'Hyperband'