In [18]:
#This notebook is used to serialize collected data onto disc.
#The serialized dataframe contains only essential columns.
import sys
import pandas as pd
import pyarrow

sys.path.append('../')
sys.path.insert(0, '../../syne-tune/benchmarking/examples/benchmark_hypertune')
from benchmark_definitions import benchmark_definitions
import ipynb_utils

In [None]:
# Load the experiment
#df = ipynb_utils.load_experiment_synetune('benchtest-1')
df = ipynb_utils.load_experiment_synetune('baselines-4')

algorithm_to_remove = 'DyHPO-lim'

# Remove the algorithm
df = df.drop(algorithm_to_remove, level='algorithm')
print(df.info())

# Remove extra benchmarks (too low runtime)
benchmarks_to_remove= ['lcbench-jannis', 'lcbench-volkert']
df = df.drop(benchmarks_to_remove, level='benchmark')

In [None]:
df.rename(index={'HYPERTUNE-INDEP': 'HYPERTUNE', 'MOBSTER-JOINT': 'MOBSTER', 'SYNCHB': 'Hyperband'}, inplace=True)
print(f"Benchmarks: {df.index.get_level_values('benchmark').unique()}")
print(f"Algorithms: {df.index.get_level_values('algorithm').unique()}")

In [None]:
# Add all missing columns and values into the dataframe
new_dfs = []

benchmark_names = df.index.get_level_values('benchmark').unique().tolist()
budgets = {'n' : 4000, 'l' : 1000, 'f' : 2000}

for benchmark_name in benchmark_names:
    benchmark = benchmark_definitions[benchmark_name]
    budget = budgets[benchmark_name[0]]
    # Load correct subset of the dataframe
    print(f"Calculating {benchmark_name}")
    bench_df = df.loc[(benchmark_name, slice(None), slice(None), slice(None))]
    bench_df.drop(columns=['metric_mode'], inplace=True)
    bench_df['max_num_evaluations'] = budget
    bench_df['optimization_metric'] = benchmark.metric
    bench_df['mode'] = benchmark.mode
    bench_df['metric'] = bench_df[benchmark.metric]
    bench_df['benchmark'] = benchmark_name
    bench_df.reset_index(inplace=True)
    new_dfs.append(bench_df[['benchmark', 'algorithm', 'repeat', 'trial', 'max_num_evaluations', 'optimization_metric', 'mode', 'metric', 'elapsed_time', 'metric_elapsed_time', 'st_decision', 'trial_id']])

new_df = pd.concat(new_dfs)
new_df.reset_index(inplace=True)
new_df.set_index(['benchmark', 'algorithm', 'repeat', 'trial'], inplace=True)
new_df.drop(columns=['index'], inplace=True)
new_df

In [None]:
# Preprocessing of the data - cumulative metric and regret calculation
def compute_cumulative_min(group):
    mode = group['mode'].iloc[0]
    if mode == 'min':
        group['cumulative_min'] = group['metric'].cummin()
        group['cumulative'] = group['metric'].cummin()
    elif mode == 'max':
        group['cumulative_min'] = group['metric'].cummax() * -1
        group['cumulative'] = group['metric'].cummax()
    return group.reset_index(['benchmark','algorithm','repeat'], drop=True)

new_df = new_df.groupby(['benchmark', 'algorithm', 'repeat']).apply(compute_cumulative_min)

# Calculate regret for the Wilcoxon test - normalized cumulative metric to 0-1 range
new_df['regret'] = new_df.groupby(['benchmark'])['cumulative_min'].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
new_df

In [None]:
# Serialize the dataframe
new_df.to_feather('../../results/tabular-1.feather')