In [1]:
from typing import List

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

os.makedirs("figs/", exist_ok=True)

plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = 24
plt.rcParams['mathtext.fontset'] = 'stix'  # The setting of math font

In [2]:
config_table = pd.read_csv("results/setting-table.csv", index_col=0)
results = pd.read_csv("results/summary.csv")
target_col = results["target"]
target_names = target_col.unique()
OPTUNA_INDICES = [60, 108]  # 60: univariate, 108: multivariate
NON_OPTUNA_FLAG = ~((results.setting_index == OPTUNA_INDICES[0]) | (results.setting_index == OPTUNA_INDICES[1]))

BENCH05d_TARGETS = [name for name in target_names if "05d" in name]
BENCH10d_TARGETS = [name for name in target_names if "10d" in name]
BENCH30d_TARGETS = [name for name in target_names if "30d" in name]
BENCH_TARGETS = BENCH05d_TARGETS + BENCH10d_TARGETS + BENCH30d_TARGETS
JAHS_TARGETS = ["fashion_mnist", "cifar10"]
HPOLIB_TARGETS = ["protein_structure", "parkinsons_telemonitoring", "slice_localization", "naval_propulsion"]
HPOBENCH_TARGETS = ["vehicle", "segment", "car", "australian", "credit_g", "blood_transfusion", "kc1", "phoneme"]
TABULAR_TARGETS = HPOBENCH_TARGETS + HPOLIB_TARGETS

feat_names = [
    "multivariate",
    "quantile",
    "alpha",
    "weight",
    "min_bandwidth_factor",
    "min_bandwidth_factor_for_discrete",
]

print(results.keys())
print(target_names)

Index(['setting_index', 'target', 'mean@n_evals050', 'mean@n_evals100',
       'mean@n_evals150', 'mean@n_evals200', 'ste@n_evals050',
       'ste@n_evals100', 'ste@n_evals150', 'ste@n_evals200'],
      dtype='object')
['fashion_mnist' 'cifar10' 'Styblinski_05d' 'Ackley_05d' 'Schwefel_30d'
 'Schwefel_10d' 'Rosenbrock_30d' 'KTablet_30d' 'Ackley_30d' 'Griewank_10d'
 'Ackley_10d' 'Rastrigin_30d' 'Sphere_10d' 'Schwefel_05d' 'Griewank_05d'
 'WeightedSphere_30d' 'WeightedSphere_05d' 'KTablet_10d' 'Rosenbrock_10d'
 'Levy_30d' 'Sphere_30d' 'Levy_10d' 'Perm_30d' 'Sphere_05d' 'KTablet_05d'
 'WeightedSphere_10d' 'Rastrigin_05d' 'Rastrigin_10d' 'Rosenbrock_05d'
 'Styblinski_30d' 'Perm_10d' 'Perm_05d' 'Styblinski_10d' 'Levy_05d'
 'Griewank_30d' 'vehicle' 'parkinsons_telemonitoring' 'protein_structure'
 'segment' 'car' 'australian' 'credit_g' 'naval_propulsion'
 'blood_transfusion' 'kc1' 'phoneme' 'slice_localization']


In [3]:
def compute_rank_loss(targets: List[str], key: str = "mean@n_evals200", return_sorted_index: bool = True):
    indices = np.sort(results.setting_index[results.target == targets[0]].to_numpy())
    n_settings = indices.size
    rank_loss = np.zeros(n_settings)

    for target_name in targets:
        sorted_df = results[target_col == target_name].sort_values(by=key)
        order = np.argsort(sorted_df.setting_index)
        rank_loss[order] += n_settings - np.arange(n_settings)
    
    if return_sorted_index:
        order = np.argsort(rank_loss)
        return rank_loss, indices[order]
    else:
        return rank_loss


def get_counts(targets: List[str], key: str = "mean@n_evals200", top: int = 20):
    vals = {feat_name: [] for feat_name in feat_names}
    results_without_optuna = results[NON_OPTUNA_FLAG]
    target_col_without_optuna = target_col[NON_OPTUNA_FLAG]

    for target_name in targets:
        sorted_df = results_without_optuna[target_col_without_optuna == target_name].sort_values(by=key)
        top_indices = sorted_df.setting_index.head(top).to_numpy()
        top_configs = config_table.iloc[top_indices]
        for feat_name in feat_names:
            vals[feat_name].append(top_configs[feat_name].to_numpy())

    vals = {k: np.asarray(v).flatten() for k, v in vals.items()}
    counters = {
        feat_name: np.unique(vals[feat_name], return_counts=True)
        for feat_name in feat_names
    }
    return counters

In [4]:
target_name, key = "cifar10", "mean@n_evals200"
sorted_df = results[target_col == target_name].sort_values(by=key)
top20_indices = sorted_df.setting_index.head(20).to_numpy()
config_table.iloc[top20_indices]

Unnamed: 0,multivariate,quantile,alpha,weight,min_bandwidth_factor,min_bandwidth_factor_for_discrete
148,True,linear,0.05,expected-improvement,0.1,0.5
455,True,linear,0.05,expected-improvement,0.02,0.5
293,True,linear,0.05,expected-improvement,0.01,0.5
36,True,linear,0.05,expected-improvement,0.2,0.5
901,False,sqrt,0.25,older-smaller,0.02,1.0
619,False,sqrt,0.25,older-smaller,0.2,1.0
714,False,sqrt,0.25,older-smaller,0.1,1.0
543,False,sqrt,0.25,older-smaller,0.01,1.0
790,True,linear,0.05,uniform,0.1,0.5
171,True,linear,0.05,uniform,0.2,0.5


In [30]:
for name, bench in [
    ("05d", BENCH05d_TARGETS),
    ("10d", BENCH10d_TARGETS),
    ("30d", BENCH30d_TARGETS),
    ("JAHS", JAHS_TARGETS),
    ("HPOLib", HPOLIB_TARGETS),
    ("HPOBench", HPOBENCH_TARGETS),
]:
    print(name)
    counters = get_counts(targets=bench, top=5, key="mean@n_evals100")
    for k, v in counters.items():
        if not isinstance(v[0][0], str) and np.isnan(v[0][0]):
            continue

        print(k)
        print("\t", v[0].tolist())
        print("\t", v[1].tolist())

    print()

05d
multivariate
	 [False, True]
	 [3, 52]
quantile
	 ['linear', 'sqrt']
	 [24, 31]
alpha
	 [0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 1.0]
	 [17, 3, 1, 3, 14, 8, 4, 5]
weight
	 ['expected-improvement', 'older-smaller', 'uniform']
	 [30, 7, 18]
min_bandwidth_factor
	 [0.01, 0.02]
	 [36, 19]

10d
multivariate
	 [False, True]
	 [4, 51]
quantile
	 ['linear', 'sqrt']
	 [28, 27]
alpha
	 [0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 1.0]
	 [16, 7, 2, 3, 17, 6, 2, 2]
weight
	 ['expected-improvement', 'uniform']
	 [41, 14]
min_bandwidth_factor
	 [0.01, 0.02]
	 [28, 27]

30d
multivariate
	 [False, True]
	 [5, 50]
quantile
	 ['linear', 'sqrt']
	 [22, 33]
alpha
	 [0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75, 1.0]
	 [6, 6, 7, 3, 3, 13, 10, 7]
weight
	 ['expected-improvement', 'older-smaller', 'uniform']
	 [43, 5, 7]
min_bandwidth_factor
	 [0.01, 0.02]
	 [21, 34]

JAHS
multivariate
	 [False, True]
	 [9, 1]
quantile
	 ['linear', 'sqrt']
	 [1, 9]
alpha
	 [0.05, 0.25, 0.75, 1.0]
	 [1, 4, 1, 4]
weight
	 ['expected-im

True