# Finding the Minimum and Maximum of all YAHPO instances

In [1]:
# Prepare a data frame with all the min/max values and all instances

import pandas as pd
from yahpo_gym import benchmark_set
from yahpo_gym.configuration import list_scenarios
scns = list_scenarios()

dfs = []
for sc in scns:
    bench = benchmark_set.BenchmarkSet(sc)
    dfs += [pd.DataFrame({
        "scenario": sc,
        "instance": bench.instances if bench.instances is not None else ""
    })]

df = pd.concat(dfs)

In [17]:
def get_min_max_rs(row, bs=10000, iters=50):
    res = []
    for _ in range(iters):
        # Set up the surrogate
        bench = benchmark_set.BenchmarkSet(row.scenario)
        if row.scenario != "nb301":
            bench.set_instance(row.instance)
        # Sample batch size configurations & evaluate
        value = bench.config_space.sample_configuration(bs)
        xs = bench.objective_function(value)
        tmp = pd.DataFrame(xs).apply([min,max]).rename_axis("statistic").reset_index().melt(id_vars='statistic', var_name='metric')
        res += [tmp]
    all = pd.concat(res, ignore_index=True)

    df = pd.concat([
        all[all.statistic=="min"].groupby(['metric', 'statistic']).apply("min"),
        all[all.statistic=="max"].groupby(['metric', 'statistic']).apply("max")
    ])
    df['scenario'] = row.scenario
    df['instance'] = row.instance
    df.to_csv('yahpo_min_max.csv', mode='a', header=False)
    return "procced"
    
# Header:
# metric,statistic,value,scenario,instance

In [18]:
import dask
from dask.distributed import Client, progress
from dask.diagnostics import ProgressBar
done = pd.read_csv('yahpo_min_max.csv')

client = Client(threads_per_worker=1, n_workers=6)
procd = []

for _, rw in df.iterrows():
   # Only run instances that were not run before
   if len(done[(done.scenario == rw.scenario) & (done.instance == rw.instance)]) == 0:
      procd.append(dask.delayed(get_min_max_rs)(rw))

with ProgressBar():
    dask.compute(*procd)