In [1]:
from yahpo_gym import benchmark_set, local_config
import numpy as np
from idhb import *
import os
from py_experimenter.result_processor import ResultProcessor
from py_experimenter.experimenter import PyExperimenter
import time


local_config.init_config()
local_config.set_data_path("yahpodata")

class YAHPOEvaluationFunction:
    def __init__(self, bench, metric, maximum_budget, repl):
        self.bench = bench
        self.metric = metric
        self.maximum_budget = maximum_budget
        self.repl = repl

    def evaluate(self, candidate, budget):
        xs = candidate.get_dictionary()
        # print("evaluate candidate ", xs, " for budget " , budget)
        xs["trainsize"] = budget / self.maximum_budget
        xs["repl"] = self.repl % 10 + 1
        return (-1) * self.bench.objective_function(xs)[0][self.metric]


class YAHPOCandidateSampler:
    def __init__(self, cs, seed):
        self.bracket_random_state = dict()
        self.global_random_state = seed
        self.cs = cs

    def get(self, bracket, n):
        candidates = list()
        # draw a list of n candidates
        for i in range(n):
            # retrieve random state to set seed for sampling
            if bracket in self.bracket_random_state:
                random_state = self.bracket_random_state[bracket]
            else:
                np.random.seed(self.global_random_state)
                self.global_random_state = np.random.randint(0, 2 ** 16 - 1)
                random_state = np.random.randint(0, 2 ** 16 - 1)
            np.random.seed(random_state)
            random_state = np.random.randint(0, 2 ** 16 -1)
            self.bracket_random_state[bracket] = random_state
            
            self.cs.seed(seed=random_state)
            candidates.append(Candidate(candidate=self.cs.sample_configuration(1)))
        return candidates


def run_experiment(keyfields: dict, result_processor: ResultProcessor, custom_fields: dict):
    debug = False
    seed = int(keyfields['seed'])
    benchmark = keyfields['benchmark']
    instance = keyfields['instance']
    algo = keyfields['algo']
    metric = keyfields['metric']
    eta = int(keyfields['eta'])
    initial_max_budget = int(keyfields['init_max_budget'])

    bench = benchmark_set.BenchmarkSet(benchmark)
    bench.set_instance(instance)
    cs = bench.get_opt_space(drop_fidelity_params=True)
    
    eval = BudgetTrackingPerformanceMeasure(YAHPOEvaluationFunction(bench, metric, initial_max_budget * eta, seed).evaluate)

    sampler = YAHPOCandidateSampler(cs=cs, seed=seed)
    if algo == "pid-hb":
        hb = IDHyperband(max_budget=initial_max_budget, eta=eta, eval_func=eval.evaluate, conservative=True, strict=False, debug=debug)
    elif algo == "did-hb":
        hb = IDHyperband(max_budget=initial_max_budget, eta=eta, eval_func=eval.evaluate, conservative=True, strict=True, debug=debug)
    else:
        hb = IDHyperband(max_budget=initial_max_budget, eta=eta, eval_func=eval.evaluate, conservative=False, debug=debug)
    hb.hyperband(sampler)

    if algo == "ih-hb":
        sampler = YAHPOCandidateSampler(cs=cs, seed=seed)
        hb = IDHyperband(max_budget=initial_max_budget * eta, eta=eta, eval_func=eval.evaluate, conservative=False, debug=debug)
        res = hb.hyperband(sampler)
    elif algo == "eid-hb" or algo == "pid-hb" or algo == "did-hb":
        hb.incrementMaxBudget()
        res = hb.hyperband(sampler)
    else:
        print("Not supported algo!")

    results = {
        'final_incumbent': str(res.getCandidate()),
        'performance': (-1) * res.performanceMap[initial_max_budget * eta],
        'total_budget': eval.getAccumulatedBudget()
    }
    result_processor.process_results(results)


In [2]:
experimenter = PyExperimenter(experiment_configuration_file_path="config/yahpo_fraction.cfg", database_credential_file_path="config/database_credentials.cfg")


In [15]:
experimenter.fill_table_from_config()

In [33]:
#experimenter.reset_experiments({'status': 'done'})


instance_list = []
for benchmark in ["rbv2_xgboost", "rbv2_svm", "rbv2_ranger"]:
    bench = benchmark_set.BenchmarkSet(benchmark)
    for inst in bench.instances:
        if inst not in instance_list:
            instance_list.append(inst)
    
first = True

instances = ""
for x in instance_list:
    if first:
        first = False
    else:
        instances += ","
    instances += x
print(instances)

print(len(instance_list))

16,40923,41143,470,1487,40499,40966,41164,1497,40975,1461,41278,11,54,300,40984,31,1067,1590,40983,41163,41165,182,1220,41159,41169,42,188,1457,1480,6332,181,1479,40670,40536,41138,41166,6,14,29,458,1056,1462,1494,40701,12,1493,44,307,334,40982,41142,38,1050,469,23381,41157,15,4541,23,4134,40927,40981,41156,3,1049,40900,1063,23512,40979,1040,1068,41161,22,1489,41027,24,4135,23517,1053,1468,312,377,1515,18,1476,1510,41162,28,375,1464,40685,40996,41146,41216,40668,41212,32,60,4538,40496,41150,37,46,554,1475,1485,1501,1111,4534,41168,151,4154,40978,40994,50,1478,1486,40498
119


In [4]:
experimenter.execute(experiment_function=run_experiment, max_experiments=-1, random_order=True)

In [42]:
keyfields = {
    'benchmark': 'rbv2_svm',
    'instance': '16',
    'seed': '12',
    'eta': 2,
    'init_max_budget': 16,
    'metric': 'acc'
}

class MyResProc:
    def process_results(self, res):
        print(res)

for algo in ['did-hb', 'eih-hb', 'ih-hb']:
    keyfields['algo'] = algo
    run_experiment(keyfields, MyResProc(), dict())

s 4 r 1.0 R 16 eta 2
Sample 16 new candidates because we need 16 candidates in the first iteration of this bracket and 0 are already assigned to the bracket
Sampled  16  new candidates
n_i 16  candidates  16
n_i 8  candidates  8
n_i 4  candidates  4
n_i 2  candidates  2
n_i 1  candidates  1
Result of bracket  0 :  Configuration(values={
  'cost': 2720.425375035524,
  'gamma': 0.00012672946405209006,
  'kernel': 'radial',
  'num.impute.selected.cpo': 'impute.hist',
  'task_id': '16',
  'tolerance': 0.0009600200764008336,
})
 Performances: {1.0: -0.8450555801391602, 2.0: -0.8813543319702148, 4.0: -0.92461097240448, 8.0: -0.9452806115150452, 16.0: -0.95566326379776}
The result of bracket  0  yielded a new incumbent with performance  -0.95566326379776
s 3 r 2.0 R 16 eta 2
Sample 10 new candidates because we need 10 candidates in the first iteration of this bracket and 0 are already assigned to the bracket
Sampled  10  new candidates
n_i 10  candidates  10
n_i 5  candidates  5
n_i 2  candid

UnboundLocalError: local variable 'res' referenced before assignment