# Benchmarking

We want to test the behavior of ProcessOptimizer on different model systems with
different settings. First, we create a list of dicts, where each dict contains the
settings to use.

In [1]:
from itertools import product
from typing import Union

from ProcessOptimizer.model_systems.benchmark import run_test_optimization

MODEL_SYSTEM_NAMES = [
    #"branin_hoo",
    "hart3",
    "hart6",
]
EXPERIMENT_BUDGET = 100 # How many evaluations we can do in total per optimization
NUM_REPLICATIONS = 2 # How many times to perform each optimization
NOISE_LEVELS = [0.0, 0.2]#, 1.0, 5.0, 10.0] # What to multiply the noise of the modelsystem by
TARGET_LEVEL = [0.01, 0.1] # How close to the true minimum we want to get
N_INITIAL_POINTS = [4, 10]

seed = 0 # Seed for "randomly" making noise, ensures reproducibility
tests : list[dict[str, Union[str, float, int]]] = [] # Consider making a "test" dataclass for better typing
for model_system_name, relative_noise_level, n_initial_points, target_level in product(
    MODEL_SYSTEM_NAMES,
    NOISE_LEVELS,
    N_INITIAL_POINTS,
    TARGET_LEVEL
):
    for _ in range(NUM_REPLICATIONS): # Adding NUM_REPLICATIONS tests for each combination
        seed += 1 # Each test should have a different seed
        test = {
            "model_system_name": model_system_name,
            "noise_level": relative_noise_level,
            "n_initial_points": n_initial_points,
            "target_level": target_level,
            "experiment_budget": EXPERIMENT_BUDGET,
            "seed": seed
        }
        tests.append(test)

In [2]:
print(tests)

[{'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 1}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 2}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 3}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 4}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 5}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 6}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 7}, {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial

Then, we create the function to run the test on each member of the list.

We can run one of the tests.

In [18]:
run_test_optimization(tests[0])

({'model_system_name': 'hart3',
  'noise_level': 0.0,
  'n_initial_points': 4,
  'target_level': 0.01,
  'experiment_budget': 100,
  'seed': 1},
 15,
 True)

In [2]:
for test in tests:
    test, n_evaluations, finished = run_test_optimization(test)
    print(f"Test {test} took {n_evaluations} evaluations and {'finished' if finished else 'did not finish'}")

Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 1} took 15 evaluations and finished
Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 2} took 15 evaluations and finished
Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 3} took 1 evaluations and finished
Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 4} took 1 evaluations and finished
Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 5} took 14 evaluations and finished
Test {'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 6} took 14 e

We can run all of the tests in parallel.

In [5]:
from multiprocessing import Pool

if __name__ == '__main__':
    with Pool() as p:
        result = p.map(run_test_optimization, tests)

In [4]:
print(result)

[({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 1}, 15, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 2}, 15, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 3}, 1, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 4, 'target_level': 0.1, 'experiment_budget': 100, 'seed': 4}, 1, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 5}, 16, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.01, 'experiment_budget': 100, 'seed': 6}, 14, True), ({'model_system_name': 'hart3', 'noise_level': 0.0, 'n_initial_points': 10, 'target_level': 0.1, 'experiment_budget': 100, 's