From 616f44ef787f2c8fc847133f1a4ca6bc5ba6656e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 21 Feb 2024 15:34:05 -0800 Subject: [PATCH 001/121] do not pass the optimizer into _run() --- mlos_bench/mlos_bench/run.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index fc78d08055..bc571ad0ab 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -109,7 +109,8 @@ def _optimize(*, opt_context.bulk_register(configs, scores, status) # Complete any pending trials. for trial in exp.pending_trials(datetime.utcnow(), running=True): - _run(env_context, opt_context, trial, global_config) + (status, score) = _run(env_context, trial, global_config) + opt_context.register(trial.tunables, status, score) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -144,7 +145,8 @@ def _optimize(*, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, }) - _run(env_context, opt_context, trial, global_config) + (status, score) = _run(env_context, trial, global_config) + opt_context.register(trial.tunables, status, score) if do_teardown: env_context.teardown() @@ -154,7 +156,8 @@ def _optimize(*, return (best_score, best_config) -def _run(env: Environment, opt: Optimizer, trial: Storage.Trial, global_config: Dict[str, Any]) -> None: +def _run(env: Environment, trial: Storage.Trial, + global_config: Dict[str, Any]) -> Tuple[Status, Optional[Dict[str, float]]]: """ Run a single trial. @@ -162,8 +165,6 @@ def _run(env: Environment, opt: Optimizer, trial: Storage.Trial, global_config: ---------- env : Environment Benchmarking environment context to run the optimization on. - opt : Optimizer - An interface to mlos_core optimizers. storage : Storage A storage system to persist the experiment data. global_config : dict @@ -175,8 +176,7 @@ def _run(env: Environment, opt: Optimizer, trial: Storage.Trial, global_config: _LOG.warning("Setup failed: %s :: %s", env, trial.tunables) # FIXME: Use the actual timestamp from the environment. trial.update(Status.FAILED, datetime.utcnow()) - opt.register(trial.tunables, Status.FAILED) - return + return (Status.FAILED, None) (status, timestamp, results) = env.run() # Block and wait for the final result. _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) @@ -193,7 +193,7 @@ def _run(env: Environment, opt: Optimizer, trial: Storage.Trial, global_config: # Filter out non-numeric scores from the optimizer. scores = results if not isinstance(results, dict) \ else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} - opt.register(trial.tunables, status, scores) + return (status, scores) if __name__ == "__main__": From 33e332a419ce5ae3d88785a9244441426d01e3ae Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 21 Feb 2024 15:44:18 -0800 Subject: [PATCH 002/121] mypy fixes --- mlos_bench/mlos_bench/run.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index bc571ad0ab..deb8891ad2 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -109,8 +109,8 @@ def _optimize(*, opt_context.bulk_register(configs, scores, status) # Complete any pending trials. for trial in exp.pending_trials(datetime.utcnow(), running=True): - (status, score) = _run(env_context, trial, global_config) - opt_context.register(trial.tunables, status, score) + (trial_status, trial_score) = _run(env_context, trial, global_config) + opt_context.register(trial.tunables, trial_status, trial_score) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -145,8 +145,8 @@ def _optimize(*, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, }) - (status, score) = _run(env_context, trial, global_config) - opt_context.register(trial.tunables, status, score) + (trial_status, trial_score) = _run(env_context, trial, global_config) + opt_context.register(trial.tunables, trial_status, trial_score) if do_teardown: env_context.teardown() @@ -169,6 +169,11 @@ def _run(env: Environment, trial: Storage.Trial, A storage system to persist the experiment data. global_config : dict Global configuration parameters. + + Returns + ------- + (trial_status, trial_score) : (Status, Optional[Dict[str, float]]) + Status and results of the trial. """ _LOG.info("Trial: %s", trial) From 0247259ac964788043d9adfc0bdbb1fe4f8b49ad Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 21 Feb 2024 18:36:58 -0800 Subject: [PATCH 003/121] start splitting the optimization loop into two --- mlos_bench/mlos_bench/run.py | 74 +++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index deb8891ad2..49e3eb1ee8 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -30,7 +30,7 @@ def _main() -> None: launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool") - result = _optimize( + result = _optimization_loop( env=launcher.environment, opt=launcher.optimizer, storage=launcher.storage, @@ -43,15 +43,15 @@ def _main() -> None: _LOG.info("Final result: %s", result) -def _optimize(*, - env: Environment, - opt: Optimizer, - storage: Storage, - root_env_config: str, - global_config: Dict[str, Any], - do_teardown: bool, - trial_config_repeat_count: int = 1, - ) -> Tuple[Optional[float], Optional[TunableGroups]]: +def _optimization_loop(*, + env: Environment, + opt: Optimizer, + storage: Storage, + root_env_config: str, + global_config: Dict[str, Any], + do_teardown: bool, + trial_config_repeat_count: int = 1, + ) -> Tuple[Optional[float], Optional[TunableGroups]]: """ Main optimization loop. @@ -109,7 +109,7 @@ def _optimize(*, opt_context.bulk_register(configs, scores, status) # Complete any pending trials. for trial in exp.pending_trials(datetime.utcnow(), running=True): - (trial_status, trial_score) = _run(env_context, trial, global_config) + (trial_status, trial_score) = _run_trial(env_context, trial, global_config) opt_context.register(trial.tunables, trial_status, trial_score) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -145,7 +145,7 @@ def _optimize(*, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, }) - (trial_status, trial_score) = _run(env_context, trial, global_config) + (trial_status, trial_score) = _run_trial(env_context, trial, global_config) opt_context.register(trial.tunables, trial_status, trial_score) if do_teardown: @@ -156,8 +156,54 @@ def _optimize(*, return (best_score, best_config) -def _run(env: Environment, trial: Storage.Trial, - global_config: Dict[str, Any]) -> Tuple[Status, Optional[Dict[str, float]]]: +def _scheduler(*, exp: Storage.Experiment, env_context: Environment, + global_config: Dict[str, Any], running: bool = False) -> None: + """ + Scheduler part of the loop. Check for pending trials in the queue and run them. + """ + for trial in exp.pending_trials(datetime.utcnow(), running=running): + _run_trial(env_context, trial, global_config) + + +def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, + *, last_trial_id: int = -1, trial_config_repeat_count: int = 1) -> None: + """ + Optimizer part of the loop. Load the results of the executed trials + into the optimizer, suggest new configurations, and add them to the queue. + """ + (configs, scores, status) = exp.load(last_trial_id) + opt_context.bulk_register(configs, scores, status) + + tunables = opt_context.suggest() + _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) + + +def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, + tunables: TunableGroups, trial_config_repeat_count: int = 1) -> None: + """ + Add one configuration to the queue of trials. + """ + for repeat_i in range(1, trial_config_repeat_count + 1): + exp.new_trial(tunables, config={ + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + # TODO: Improve for supporting multi-objective + # (e.g., opt_target_1, opt_target_2, ... and opt_direction_1, opt_direction_2, ...) + "optimizer": opt.name, + "opt_target": opt.target, + "opt_direction": opt.direction, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults, + }) + + +def _run_trial(env: Environment, trial: Storage.Trial, + global_config: Dict[str, Any]) -> Tuple[Status, Optional[Dict[str, float]]]: """ Run a single trial. From 483e378ea8977247c7f18f696acc0ae3e27ab60e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Thu, 22 Feb 2024 19:18:30 -0800 Subject: [PATCH 004/121] first complete version of the optimization loop (not tested yet) --- mlos_bench/mlos_bench/run.py | 86 ++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 49e3eb1ee8..bc55b19592 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -72,7 +72,6 @@ def _optimization_loop(*, trial_config_repeat_count : int How many trials to repeat for the same configuration. """ - # pylint: disable=too-many-locals if trial_config_repeat_count <= 0: raise ValueError(f"Invalid trial_config_repeat_count: {trial_config_repeat_count}") @@ -101,52 +100,27 @@ def _optimization_loop(*, _LOG.info("Experiment: %s Env: %s Optimizer: %s", exp, env, opt) + last_trial_id = -1 if opt_context.supports_preload: - # Load (tunable values, benchmark scores) to warm-up the optimizer. - # `.load()` returns data from ALL merged-in experiments and attempts - # to impute the missing tunable values. - (configs, scores, status) = exp.load() - opt_context.bulk_register(configs, scores, status) - # Complete any pending trials. - for trial in exp.pending_trials(datetime.utcnow(), running=True): - (trial_status, trial_score) = _run_trial(env_context, trial, global_config) - opt_context.register(trial.tunables, trial_status, trial_score) + # Complete trials that are pending or in-progress. + _scheduler(exp, env_context, global_config, running=True) + # Load past trials data into the optimizer + last_trial_id = _optimizer(exp, opt_context) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) + if config_id > 0: + tunables = _load_config(exp, env_context, config_id) + last_trial_id = _schedule_trial(exp, opt_context, tunables, + trial_config_repeat_count) + # Now run new trials until the optimizer is done. while opt_context.not_converged(): - - tunables = opt_context.suggest() - - if config_id > 0: - tunable_values = exp.load_tunable_config(config_id) - tunables.assign(tunable_values) - _LOG.info("Load config from storage: %d", config_id) - if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Config %d ::\n%s", - config_id, json.dumps(tunable_values, indent=2)) - config_id = -1 - - for repeat_i in range(1, trial_config_repeat_count + 1): - trial = exp.new_trial(tunables, config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - # TODO: Improve for supporting multi-objective - # (e.g., opt_target_1, opt_target_2, ... and opt_direction_1, opt_direction_2, ...) - "optimizer": opt.name, - "opt_target": opt.target, - "opt_direction": opt.direction, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults, - }) - (trial_status, trial_score) = _run_trial(env_context, trial, global_config) - opt_context.register(trial.tunables, trial_status, trial_score) + # TODO: In the future, _scheduler and _optimizer + # can be run in parallel in two independent loops. + _scheduler(exp, env_context, global_config) + last_trial_id = _optimizer(exp, opt_context, last_trial_id, + trial_config_repeat_count) if do_teardown: env_context.teardown() @@ -156,7 +130,21 @@ def _optimization_loop(*, return (best_score, best_config) -def _scheduler(*, exp: Storage.Experiment, env_context: Environment, +def _load_config(exp: Storage.Experiment, env_context: Environment, + config_id: int) -> TunableGroups: + """ + Load the existing tunable configuration from the storage. + """ + tunable_values = exp.load_tunable_config(config_id) + tunables = env_context.tunable_params.assign(tunable_values) + _LOG.info("Load config from storage: %d", config_id) + if _LOG.isEnabledFor(logging.DEBUG): + _LOG.debug("Config %d ::\n%s", + config_id, json.dumps(tunable_values, indent=2)) + return tunables + + +def _scheduler(exp: Storage.Experiment, env_context: Environment, global_config: Dict[str, Any], running: bool = False) -> None: """ Scheduler part of the loop. Check for pending trials in the queue and run them. @@ -166,7 +154,7 @@ def _scheduler(*, exp: Storage.Experiment, env_context: Environment, def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, - *, last_trial_id: int = -1, trial_config_repeat_count: int = 1) -> None: + last_trial_id: int = -1, trial_config_repeat_count: int = 1) -> int: """ Optimizer part of the loop. Load the results of the executed trials into the optimizer, suggest new configurations, and add them to the queue. @@ -175,16 +163,17 @@ def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, opt_context.bulk_register(configs, scores, status) tunables = opt_context.suggest() - _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) + return _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, - tunables: TunableGroups, trial_config_repeat_count: int = 1) -> None: + tunables: TunableGroups, trial_config_repeat_count: int = 1) -> int: """ - Add one configuration to the queue of trials. + Add a configuration to the queue of trials. """ + last_trial_id = -1 for repeat_i in range(1, trial_config_repeat_count + 1): - exp.new_trial(tunables, config={ + trial = exp.new_trial(tunables, config={ # Add some additional metadata to track for the trial such as the # optimizer config used. # Note: these values are unfortunately mutable at the moment. @@ -200,6 +189,9 @@ def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, }) + last_trial_id = trial.trial_id + + return last_trial_id def _run_trial(env: Environment, trial: Storage.Trial, From e97266f149f42c6831e16cf828e9c028967a9a52 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 14:20:58 -0800 Subject: [PATCH 005/121] allow running mlos_bench.run._main directly from unit tests + add a unit test for bench (not tested) --- mlos_bench/mlos_bench/run.py | 3 ++- .../mlos_bench/tests/launcher_run_test.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index bc55b19592..95548a2366 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -26,7 +26,7 @@ _LOG = logging.getLogger(__name__) -def _main() -> None: +def _main() -> Tuple[Optional[float], Optional[TunableGroups]]: launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool") @@ -41,6 +41,7 @@ def _main() -> None: ) _LOG.info("Final result: %s", result) + return result def _optimization_loop(*, diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index db8339a645..1043adb4f4 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -10,7 +10,9 @@ from typing import List import pytest +from unittest.mock import MagicMock, patch +from mlos_bench.run import _main from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.util import path_join @@ -109,3 +111,19 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic r"_optimize INFO Env: Mock environment best score: 64\.53\d+\s*$", ] ) + + +@patch("sys.argv") +def test_main_bench(mock_argv: MagicMock, root_path: str) -> None: + """ + Run mlos_bench command-line application with given config + and check the results in the log. + """ + mock_argv.sys.argv = [ + "run.py", + "--config", + "mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", + ] + + (score, _config) = _main() + assert pytest.approx(score, 1e-6) == 65.67 From 64771fd536f05339cae617de5990046b9813392c Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 14:34:51 -0800 Subject: [PATCH 006/121] move in-process launch to a separate unit test file --- .../tests/launcher_in_process_test.py | 32 +++++++++++++++++++ .../mlos_bench/tests/launcher_run_test.py | 18 ----------- 2 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/launcher_in_process_test.py diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py new file mode 100644 index 0000000000..cd88442274 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -0,0 +1,32 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Unit tests to check the launcher and the main optimization loop in-process. +""" + +import pytest + +from mlos_bench.launcher import Launcher +from mlos_bench.run import _optimization_loop + + +def test_main_bench() -> None: + """ + Run mlos_bench optimization loop with given config and check the results. + """ + launcher = Launcher("mlos_bench", "TEST RUN", argv=[ + "--config", + "mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", + ]) + (score, _config) = _optimization_loop( + env=launcher.environment, + opt=launcher.optimizer, + storage=launcher.storage, + root_env_config=launcher.root_env_config, + global_config=launcher.global_config, + do_teardown=launcher.teardown, + trial_config_repeat_count=launcher.trial_config_repeat_count, + ) + assert pytest.approx(score, 1e-6) == 65.67 diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index 1043adb4f4..db8339a645 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -10,9 +10,7 @@ from typing import List import pytest -from unittest.mock import MagicMock, patch -from mlos_bench.run import _main from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.util import path_join @@ -111,19 +109,3 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic r"_optimize INFO Env: Mock environment best score: 64\.53\d+\s*$", ] ) - - -@patch("sys.argv") -def test_main_bench(mock_argv: MagicMock, root_path: str) -> None: - """ - Run mlos_bench command-line application with given config - and check the results in the log. - """ - mock_argv.sys.argv = [ - "run.py", - "--config", - "mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", - ] - - (score, _config) = _main() - assert pytest.approx(score, 1e-6) == 65.67 From bd7c55e7131f7fd6dca6511463e896108045ea3c Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 15:21:15 -0800 Subject: [PATCH 007/121] add is_warm_up flag to the optimization step --- mlos_bench/mlos_bench/optimizers/base_optimizer.py | 7 +++++-- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 4 ++-- mlos_bench/mlos_bench/optimizers/mock_optimizer.py | 8 +++++--- mlos_bench/mlos_bench/run.py | 7 ++++--- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 38d3a0d6c0..bca4b4f06e 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -195,7 +195,7 @@ def supports_preload(self) -> bool: @abstractmethod def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None) -> bool: + status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: """ Pre-load the optimizer with the bulk data from previous experiments. @@ -207,13 +207,16 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float Benchmark results from experiments that correspond to `configs`. status : Optional[Sequence[float]] Status of the experiments that correspond to `configs`. + is_warm_up : bool + True for the initial load, False for subsequent calls. Returns ------- is_not_empty : bool True if there is data to register, false otherwise. """ - _LOG.info("Warm-up the optimizer with: %d configs, %d scores, %d status values", + _LOG.info("%s the optimizer with: %d configs, %d scores, %d status values", + "Warm-up" if is_warm_up else "Load", len(configs or []), len(scores or []), len(status or [])) if len(configs or []) != len(scores or []): raise ValueError("Numbers of configs and scores do not match.") diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index a24745d8f9..a02b475bac 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -99,8 +99,8 @@ def name(self) -> str: return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None) -> bool: - if not super().bulk_register(configs, scores, status): + status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: + if not super().bulk_register(configs, scores, status, is_warm_up): return False df_configs = self._to_df(configs) # Impute missing values, if necessary df_scores = pd.Series(scores, dtype=float) * self._opt_sign diff --git a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py index 801da49d8f..11d1b597b1 100644 --- a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py @@ -42,15 +42,17 @@ def __init__(self, self._best_score: Optional[float] = None def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None) -> bool: - if not super().bulk_register(configs, scores, status): + status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: + if not super().bulk_register(configs, scores, status, is_warm_up): return False if status is None: status = [Status.SUCCEEDED] * len(configs) for (params, score, trial_status) in zip(configs, scores, status): tunables = self._tunables.copy().assign(params) self.register(tunables, trial_status, None if score is None else float(score)) - self._iter -= 1 # Do not advance the iteration counter during warm-up. + if is_warm_up: + # Do not advance the iteration counter during warm-up. + self._iter -= 1 if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() _LOG.debug("Warm-up end: %s = %s", self.target, score) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 95548a2366..10ba43436c 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -106,7 +106,7 @@ def _optimization_loop(*, # Complete trials that are pending or in-progress. _scheduler(exp, env_context, global_config, running=True) # Load past trials data into the optimizer - last_trial_id = _optimizer(exp, opt_context) + last_trial_id = _optimizer(exp, opt_context, is_warm_up=True) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -155,13 +155,14 @@ def _scheduler(exp: Storage.Experiment, env_context: Environment, def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, - last_trial_id: int = -1, trial_config_repeat_count: int = 1) -> int: + last_trial_id: int = -1, trial_config_repeat_count: int = 1, + is_warm_up: bool = False) -> int: """ Optimizer part of the loop. Load the results of the executed trials into the optimizer, suggest new configurations, and add them to the queue. """ (configs, scores, status) = exp.load(last_trial_id) - opt_context.bulk_register(configs, scores, status) + opt_context.bulk_register(configs, scores, status, is_warm_up) tunables = opt_context.suggest() return _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) From 9f15aeedeb66d2d1ad3a8256186e34b3648b78bc Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 15:37:57 -0800 Subject: [PATCH 008/121] in-process optimizaiton loop invocation works! --- mlos_bench/mlos_bench/run.py | 2 +- mlos_bench/mlos_bench/tests/launcher_in_process_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 10ba43436c..0f5f0e9970 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -161,7 +161,7 @@ def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, Optimizer part of the loop. Load the results of the executed trials into the optimizer, suggest new configurations, and add them to the queue. """ - (configs, scores, status) = exp.load(last_trial_id) + (configs, scores, status) = exp.load(last_trial_id - 1) opt_context.bulk_register(configs, scores, status, is_warm_up) tunables = opt_context.suggest() diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index cd88442274..4365a8fc26 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -29,4 +29,4 @@ def test_main_bench() -> None: do_teardown=launcher.teardown, trial_config_repeat_count=launcher.trial_config_repeat_count, ) - assert pytest.approx(score, 1e-6) == 65.67 + assert pytest.approx(score, 1e-6) == 65.6742 From 65cd07242c74d0df3231a54f4e00061f1221eb32 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:03:44 -0800 Subject: [PATCH 009/121] add multi-iteration optimization to in-process test; fix the mlos_core bulk registration (check for is_warm_up) --- .../optimizers/mlos_core_optimizer.py | 2 ++ .../tests/launcher_in_process_test.py | 23 ++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index a02b475bac..aee3b7662f 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -111,6 +111,8 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float df_configs = df_configs[df_status_completed] df_scores = df_scores[df_status_completed] self._opt.register(df_configs, df_scores) + if not is_warm_up: + self._iter += len(df_scores) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() _LOG.debug("Warm-up end: %s = %s", self.target, score) diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index 4365a8fc26..fa4979f818 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -6,20 +6,31 @@ Unit tests to check the launcher and the main optimization loop in-process. """ +from typing import List + import pytest from mlos_bench.launcher import Launcher from mlos_bench.run import _optimization_loop -def test_main_bench() -> None: +@pytest.mark.parametrize( + ("argv", "expected_score"), [ + ([ + "--config", "mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", + ], 65.6742), + ([ + "--config", "mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc", + "--trial_config_repeat_count", "3", + "--max_iterations", "3", + ], 64.53), + ] +) +def test_main_bench(argv: List[str], expected_score: float) -> None: """ Run mlos_bench optimization loop with given config and check the results. """ - launcher = Launcher("mlos_bench", "TEST RUN", argv=[ - "--config", - "mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", - ]) + launcher = Launcher("mlos_bench", "TEST RUN", argv=argv) (score, _config) = _optimization_loop( env=launcher.environment, opt=launcher.optimizer, @@ -29,4 +40,4 @@ def test_main_bench() -> None: do_teardown=launcher.teardown, trial_config_repeat_count=launcher.trial_config_repeat_count, ) - assert pytest.approx(score, 1e-6) == 65.6742 + assert pytest.approx(score, 1e-6) == expected_score From c010d957dbc04e3f99d9d81ca19687be36298611 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:06:06 -0800 Subject: [PATCH 010/121] make in-process launcerh tests pass --- mlos_bench/mlos_bench/tests/launcher_in_process_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index fa4979f818..74c9c084d8 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -23,7 +23,7 @@ "--config", "mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc", "--trial_config_repeat_count", "3", "--max_iterations", "3", - ], 64.53), + ], 64.8847), ] ) def test_main_bench(argv: List[str], expected_score: float) -> None: From 7cfef3acd7170493b448ad2ac560640048b31e7f Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:16:03 -0800 Subject: [PATCH 011/121] remove unnecessary local variables to make pylint happy --- mlos_bench/mlos_bench/run.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 0f5f0e9970..016fd191c9 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -79,10 +79,6 @@ def _optimization_loop(*, if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", env.pprint()) - experiment_id = global_config["experiment_id"].strip() - trial_id = int(global_config["trial_id"]) - config_id = int(global_config.get("config_id", -1)) - # Start new or resume the existing experiment. Verify that the # experiment configuration is compatible with the previous runs. # If the `merge` config parameter is present, merge in the data @@ -90,8 +86,8 @@ def _optimization_loop(*, with env as env_context, \ opt as opt_context, \ storage.experiment( - experiment_id=experiment_id, - trial_id=trial_id, + experiment_id=global_config["experiment_id"].strip(), + trial_id=int(global_config["trial_id"]), root_env_config=root_env_config, description=env.name, tunables=env.tunable_params, @@ -110,6 +106,7 @@ def _optimization_loop(*, else: _LOG.warning("Skip pending trials and warm-up: %s", opt) + config_id = int(global_config.get("config_id", -1)) if config_id > 0: tunables = _load_config(exp, env_context, config_id) last_trial_id = _schedule_trial(exp, opt_context, tunables, From 7233180bf50df8d68f3610554dc99f3397936646 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:19:19 -0800 Subject: [PATCH 012/121] move trial_config_repeat_count checks to the launcher --- mlos_bench/mlos_bench/launcher.py | 6 +++++- mlos_bench/mlos_bench/run.py | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 22dc7d4666..e851581ec8 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -76,7 +76,11 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st else: config = {} - self.trial_config_repeat_count: int = args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) + self.trial_config_repeat_count: int = ( + args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) + ) + if self.trial_config_repeat_count <= 0: + raise ValueError(f"Invalid trial_config_repeat_count: {self.trial_config_repeat_count}") log_level = args.log_level or config.get("log_level", _LOG_LEVEL) try: diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 016fd191c9..e6ba50600a 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -73,9 +73,6 @@ def _optimization_loop(*, trial_config_repeat_count : int How many trials to repeat for the same configuration. """ - if trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {trial_config_repeat_count}") - if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", env.pprint()) From be7dcecd6c7b8be402398a0c26f415b3cd4aa6d6 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:45:17 -0800 Subject: [PATCH 013/121] make experiment.load() return trial_ids and use them in the optimization loop --- mlos_bench/mlos_bench/run.py | 21 ++++++++----------- mlos_bench/mlos_bench/storage/base_storage.py | 7 ++++--- .../mlos_bench/storage/sql/experiment.py | 8 ++++--- .../mlos_bench/tests/storage/exp_load_test.py | 7 +++++-- .../tests/storage/trial_schedule_test.py | 14 +++++++------ 5 files changed, 31 insertions(+), 26 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index e6ba50600a..ed005349f1 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -99,23 +99,21 @@ def _optimization_loop(*, # Complete trials that are pending or in-progress. _scheduler(exp, env_context, global_config, running=True) # Load past trials data into the optimizer - last_trial_id = _optimizer(exp, opt_context, is_warm_up=True) + _optimizer(exp, opt_context, is_warm_up=True) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) config_id = int(global_config.get("config_id", -1)) if config_id > 0: tunables = _load_config(exp, env_context, config_id) - last_trial_id = _schedule_trial(exp, opt_context, tunables, - trial_config_repeat_count) + _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) # Now run new trials until the optimizer is done. while opt_context.not_converged(): # TODO: In the future, _scheduler and _optimizer # can be run in parallel in two independent loops. _scheduler(exp, env_context, global_config) - last_trial_id = _optimizer(exp, opt_context, last_trial_id, - trial_config_repeat_count) + _optimizer(exp, opt_context, last_trial_id, trial_config_repeat_count) if do_teardown: env_context.teardown() @@ -154,20 +152,22 @@ def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, """ Optimizer part of the loop. Load the results of the executed trials into the optimizer, suggest new configurations, and add them to the queue. + Return the last trial ID processed by the optimizer. """ - (configs, scores, status) = exp.load(last_trial_id - 1) + (trial_ids, configs, scores, status) = exp.load(last_trial_id) opt_context.bulk_register(configs, scores, status, is_warm_up) tunables = opt_context.suggest() - return _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) + _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) + + return max(trial_ids, default=last_trial_id) def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, - tunables: TunableGroups, trial_config_repeat_count: int = 1) -> int: + tunables: TunableGroups, trial_config_repeat_count: int = 1) -> None: """ Add a configuration to the queue of trials. """ - last_trial_id = -1 for repeat_i in range(1, trial_config_repeat_count + 1): trial = exp.new_trial(tunables, config={ # Add some additional metadata to track for the trial such as the @@ -185,9 +185,6 @@ def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, }) - last_trial_id = trial.trial_id - - return last_trial_id def _run_trial(env: Environment, trial: Storage.Trial, diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 687d01ca4a..e8bc9cdcac 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -258,7 +258,8 @@ def load_telemetry(self, trial_id: int) -> List[Tuple[datetime, str, Any]]: @abstractmethod def load(self, last_trial_id: int = -1, - opt_target: Optional[str] = None) -> Tuple[List[dict], List[Optional[float]], List[Status]]: + opt_target: Optional[str] = None + ) -> Tuple[List[int], List[dict], List[Optional[float]], List[Status]]: """ Load (tunable values, benchmark scores, status) to warm-up the optimizer. @@ -275,8 +276,8 @@ def load(self, Returns ------- - (configs, scores, status) : Tuple[List[dict], List[Optional[float]], List[Status]] - Tunable values, benchmark scores, and status of the trials. + (trial_ids, configs, scores, status) : ([dict], [Optional[float]], [Status]) + Trial ids, Tunable values, benchmark scores, and status of the trials. """ @abstractmethod diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 7b56a424dc..0be5bc64d2 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -124,9 +124,10 @@ def load_telemetry(self, trial_id: int) -> List[Tuple[datetime, str, Any]]: def load(self, last_trial_id: int = -1, - opt_target: Optional[str] = None) -> Tuple[List[dict], List[Optional[float]], List[Status]]: + opt_target: Optional[str] = None + ) -> Tuple[List[int], List[dict], List[Optional[float]], List[Status]]: opt_target = opt_target or self._opt_target - (configs, scores, status) = ([], [], []) + (trial_ids, configs, scores, status) = ([], [], [], []) with self._engine.connect() as conn: cur_trials = conn.execute( self._schema.trial.select().with_only_columns( @@ -154,10 +155,11 @@ def load(self, for trial in cur_trials.fetchall(): tunables = self._get_params( conn, self._schema.config_param, config_id=trial.config_id) + trial_ids.append(trial.trial_id) configs.append(tunables) scores.append(None if trial.metric_value is None else float(trial.metric_value)) status.append(Status[trial.status]) - return (configs, scores, status) + return (trial_ids, configs, scores, status) @staticmethod def _get_params(conn: Connection, table: Table, **kwargs: Any) -> Dict[str, Any]: diff --git a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py index df19fe7729..3067156a4f 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py @@ -18,7 +18,8 @@ def test_exp_load_empty(exp_storage: Storage.Experiment) -> None: """ Try to retrieve old experimental data from the empty storage. """ - (configs, scores, status) = exp_storage.load() + (trial_ids, configs, scores, status) = exp_storage.load() + assert not trial_ids assert not configs assert not scores assert not status @@ -93,6 +94,7 @@ def test_exp_trial_update_categ(exp_storage: Storage.Experiment, trial = exp_storage.new_trial(tunable_groups) trial.update(Status.SUCCEEDED, datetime.utcnow(), {"score": 99.9, "benchmark": "test"}) assert exp_storage.load() == ( + [trial.trial_id], [{ 'idle': 'halt', 'kernel_sched_latency_ns': '2000000', @@ -133,7 +135,8 @@ def test_exp_trial_pending_3(exp_storage: Storage.Experiment, (pending,) = list(exp_storage.pending_trials(datetime.utcnow(), running=True)) assert pending.trial_id == trial_pend.trial_id - (configs, scores, status) = exp_storage.load() + (trial_ids, configs, scores, status) = exp_storage.load() + assert trial_ids == [trial_fail.trial_id, trial_succ.trial_id] assert len(configs) == 2 assert scores == [None, score] assert status == [Status.FAILED, Status.SUCCEEDED] diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index cac6ddd9b4..3a582d559d 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -72,14 +72,14 @@ def test_schedule_trial(exp_storage: Storage.Experiment, # Optimizer side: get trials completed after some known trial: # No completed trials yet: - assert exp_storage.load() == ([], [], []) + assert exp_storage.load() == ([], [], [], []) # Update the status of some trials: trial_now1.update(Status.RUNNING, timestamp + timedelta_1min) trial_now2.update(Status.RUNNING, timestamp + timedelta_1min) # Still no completed trials: - assert exp_storage.load() == ([], [], []) + assert exp_storage.load() == ([], [], [], []) # Get trials scheduled to run within the next 3 hours: pending_ids = _trial_ids( @@ -107,11 +107,13 @@ def test_schedule_trial(exp_storage: Storage.Experiment, trial_1h.update(Status.SUCCEEDED, timestamp + timedelta_1hr * 2, metrics={"score": 1.0}) # Check that three trials have completed so far: - (trial_configs, _scores, trial_status) = exp_storage.load() - assert len(trial_configs) == 3 + (trial_ids, trial_configs, trial_scores, trial_status) = exp_storage.load() + assert trial_ids == [trial_now1.trial_id, trial_now2.trial_id, trial_1h.trial_id] + assert len(trial_configs) == len(trial_scores) == 3 assert trial_status == [Status.SUCCEEDED, Status.FAILED, Status.SUCCEEDED] # Get only trials completed after trial_now2: - (trial_configs, _scores, trial_status) = exp_storage.load(last_trial_id=trial_now2.trial_id) - assert len(trial_configs) == 1 + (trial_ids, trial_configs, trial_scores, trial_status) = exp_storage.load(last_trial_id=trial_now2.trial_id) + assert trial_ids == [trial_1h.trial_id] + assert len(trial_configs) == len(trial_scores) == 1 assert trial_status == [Status.SUCCEEDED] From 3c52e038a95e70308b1883ce7d84a0f783d9509e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 16:47:45 -0800 Subject: [PATCH 014/121] use proper last_trial_id in the main loop; fix the unit tests --- mlos_bench/mlos_bench/run.py | 4 ++-- mlos_bench/mlos_bench/tests/launcher_in_process_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index ed005349f1..3293aec3ae 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -99,7 +99,7 @@ def _optimization_loop(*, # Complete trials that are pending or in-progress. _scheduler(exp, env_context, global_config, running=True) # Load past trials data into the optimizer - _optimizer(exp, opt_context, is_warm_up=True) + last_trial_id = _optimizer(exp, opt_context, is_warm_up=True) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -113,7 +113,7 @@ def _optimization_loop(*, # TODO: In the future, _scheduler and _optimizer # can be run in parallel in two independent loops. _scheduler(exp, env_context, global_config) - _optimizer(exp, opt_context, last_trial_id, trial_config_repeat_count) + last_trial_id = _optimizer(exp, opt_context, last_trial_id, trial_config_repeat_count) if do_teardown: env_context.teardown() diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index 74c9c084d8..e8a60ad29c 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -23,7 +23,7 @@ "--config", "mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc", "--trial_config_repeat_count", "3", "--max_iterations", "3", - ], 64.8847), + ], 64.2758), ] ) def test_main_bench(argv: List[str], expected_score: float) -> None: From 0d9dc97aad5f81ec9ce7491cb7563f5637609ad1 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 17:12:59 -0800 Subject: [PATCH 015/121] update launcher tests with the new output patterns --- mlos_bench/mlos_bench/tests/launcher_run_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index db8339a645..021cead6ba 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -81,7 +81,7 @@ def test_launch_main_app_bench(root_path: str, local_exec_service: LocalExecServ "--config mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", [ f"^{_RE_DATE} run\\.py:\\d+ " + - r"_optimize INFO Env: Mock environment best score: 65\.67\d+\s*$", + r"_optimization_loop INFO Env: Mock environment best score: 65\.67\d+\s*$", ] ) @@ -97,15 +97,15 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + - r"register DEBUG Score: 64\.88\d+ Dataframe:\s*$", + r"bulk_register DEBUG Warm-up end: score = 64\.88\d+$", # Iteration 2: The result may not always be deterministic f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + - r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$", + r"bulk_register DEBUG Warm-up end: score = \d+\.\d+$", # Iteration 3: non-deterministic (depends on the optimizer) f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + - r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$", + r"bulk_register DEBUG Warm-up end: score = \d+\.\d+$", # Final result: baseline is the optimum for the mock environment f"^{_RE_DATE} run\\.py:\\d+ " + - r"_optimize INFO Env: Mock environment best score: 64\.53\d+\s*$", + r"_optimization_loop INFO Env: Mock environment best score: 64\.27\d+\s*$", ] ) From 4e171e0b5f0f35fc9f22ec3a75a0258b75ef8b69 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 23 Feb 2024 17:21:19 -0800 Subject: [PATCH 016/121] remove unused variable --- mlos_bench/mlos_bench/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 3293aec3ae..8328a26fdc 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -169,7 +169,7 @@ def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, Add a configuration to the queue of trials. """ for repeat_i in range(1, trial_config_repeat_count + 1): - trial = exp.new_trial(tunables, config={ + exp.new_trial(tunables, config={ # Add some additional metadata to track for the trial such as the # optimizer config used. # Note: these values are unfortunately mutable at the moment. From 52adab86f1a8751d068d4271351ae1a733f72714 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 26 Feb 2024 13:43:34 -0800 Subject: [PATCH 017/121] better naming for functions in the optimization loop --- mlos_bench/mlos_bench/run.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 8328a26fdc..18bf779180 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -97,9 +97,9 @@ def _optimization_loop(*, last_trial_id = -1 if opt_context.supports_preload: # Complete trials that are pending or in-progress. - _scheduler(exp, env_context, global_config, running=True) + _run_schedule(exp, env_context, global_config, running=True) # Load past trials data into the optimizer - last_trial_id = _optimizer(exp, opt_context, is_warm_up=True) + last_trial_id = _get_optimizer_suggestions(exp, opt_context, is_warm_up=True) else: _LOG.warning("Skip pending trials and warm-up: %s", opt) @@ -112,8 +112,8 @@ def _optimization_loop(*, while opt_context.not_converged(): # TODO: In the future, _scheduler and _optimizer # can be run in parallel in two independent loops. - _scheduler(exp, env_context, global_config) - last_trial_id = _optimizer(exp, opt_context, last_trial_id, trial_config_repeat_count) + _run_schedule(exp, env_context, global_config) + last_trial_id = _get_optimizer_suggestions(exp, opt_context, last_trial_id, trial_config_repeat_count) if do_teardown: env_context.teardown() @@ -137,8 +137,8 @@ def _load_config(exp: Storage.Experiment, env_context: Environment, return tunables -def _scheduler(exp: Storage.Experiment, env_context: Environment, - global_config: Dict[str, Any], running: bool = False) -> None: +def _run_schedule(exp: Storage.Experiment, env_context: Environment, + global_config: Dict[str, Any], running: bool = False) -> None: """ Scheduler part of the loop. Check for pending trials in the queue and run them. """ @@ -146,9 +146,9 @@ def _scheduler(exp: Storage.Experiment, env_context: Environment, _run_trial(env_context, trial, global_config) -def _optimizer(exp: Storage.Experiment, opt_context: Optimizer, - last_trial_id: int = -1, trial_config_repeat_count: int = 1, - is_warm_up: bool = False) -> int: +def _get_optimizer_suggestions(exp: Storage.Experiment, opt_context: Optimizer, + last_trial_id: int = -1, trial_config_repeat_count: int = 1, + is_warm_up: bool = False) -> int: """ Optimizer part of the loop. Load the results of the executed trials into the optimizer, suggest new configurations, and add them to the queue. From df893d9af09d2e7a0f5e4ded30c931f8f5d1255a Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 26 Feb 2024 16:02:04 -0800 Subject: [PATCH 018/121] start implementing the scheduler class --- mlos_bench/mlos_bench/schedulers/__init__.py | 15 ++ .../mlos_bench/schedulers/base_scheduler.py | 186 ++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 mlos_bench/mlos_bench/schedulers/__init__.py create mode 100644 mlos_bench/mlos_bench/schedulers/base_scheduler.py diff --git a/mlos_bench/mlos_bench/schedulers/__init__.py b/mlos_bench/mlos_bench/schedulers/__init__.py new file mode 100644 index 0000000000..c54e3c0efc --- /dev/null +++ b/mlos_bench/mlos_bench/schedulers/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Interfaces and implementations of the optimization loop scheduling policies. +""" + +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.schedulers.sync_scheduler import SyncScheduler + +__all__ = [ + 'Scheduler', + 'SyncScheduler', +] diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py new file mode 100644 index 0000000000..3bfb6d7147 --- /dev/null +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -0,0 +1,186 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Base class for the optimization loop scheduling policies. +""" + +import json +import logging +from datetime import datetime + +from abc import ABCMeta, abstractmethod +from types import TracebackType +from typing import Any, Dict, Optional, Tuple, Type +from typing_extensions import Literal + +from mlos_bench.environments.base_environment import Environment +from mlos_bench.environments.status import Status +from mlos_bench.optimizers.base_optimizer import Optimizer +from mlos_bench.storage.base_storage import Storage +from mlos_bench.tunables.tunable_groups import TunableGroups + +_LOG = logging.getLogger(__name__) + + +class Scheduler(metaclass=ABCMeta): + + def __init__(self, *, + environment: Environment, + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + global_config: Dict[str, Any]): + """" + Initialize the scheduler. + """ + self.environment = environment + self.optimizer = optimizer + self.storage = storage + self.root_env_config = root_env_config + self.global_config = global_config + self.experiment: Optional[Storage.Experiment] = None + self._trial_config_repeat_count = 1 # TODO: Make this configurable. + + def __enter__(self) -> 'Scheduler': + """ + Enter the scheduler's context. + """ + _LOG.debug("Optimizer START :: %s", self) + assert self.experiment is None + self.environment.__enter__() + self.optimizer.__enter__() + self.experiment = self.storage.experiment( + experiment_id=self.global_config["experiment_id"].strip(), + trial_id=int(self.global_config["trial_id"]), + root_env_config=self.root_env_config, + description=self.environment.name, + tunables=self.environment.tunable_params, + opt_target=self.optimizer.target, + opt_direction=self.optimizer.direction, + ).__enter__() + self._in_context = True + return self + + def __exit__(self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType]) -> Literal[False]: + """ + Exit the context of the scheduler. + """ + if ex_val is None: + _LOG.debug("Scheduler END :: %s", self) + else: + assert ex_type and ex_val + _LOG.warning("Scheduler END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) + assert self.experiment is not None + self.experiment.__exit__(ex_type, ex_val, ex_tb) + self.optimizer.__exit__(ex_type, ex_val, ex_tb) + self.environment.__exit__(ex_type, ex_val, ex_tb) + self.experiment = None + return False # Do not suppress exceptions + + def _load_config(self, config_id: int) -> TunableGroups: + """ + Load the existing tunable configuration from the storage. + """ + assert self.experiment is not None + tunable_values = self.experiment.load_tunable_config(config_id) + tunables = self.environment.tunable_params.assign(tunable_values) + _LOG.info("Load config from storage: %d", config_id) + if _LOG.isEnabledFor(logging.DEBUG): + _LOG.debug("Config %d ::\n%s", + config_id, json.dumps(tunable_values, indent=2)) + return tunables + + def _run_schedule(self, running: bool = False) -> None: + """ + Scheduler part of the loop. Check for pending trials in the queue and run them. + """ + assert self.experiment is not None + for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): + self._run_trial(trial) + + def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = False) -> int: + """ + Optimizer part of the loop. Load the results of the executed trials + into the optimizer, suggest new configurations, and add them to the queue. + Return the last trial ID processed by the optimizer. + """ + assert self.experiment is not None + (trial_ids, configs, scores, status) = self.experiment.load(last_trial_id) + self.optimizer.bulk_register(configs, scores, status, is_warm_up) + + tunables = self.optimizer.suggest() + self._schedule_trial(tunables) + + return max(trial_ids, default=last_trial_id) + + def _schedule_trial(self, tunables: TunableGroups) -> None: + """ + Add a configuration to the queue of trials. + """ + assert self.experiment is not None + for repeat_i in range(1, self._trial_config_repeat_count + 1): + self.experiment.new_trial(tunables, config={ + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + # TODO: Improve for supporting multi-objective + # (e.g., opt_target_1, opt_target_2, ... and opt_direction_1, opt_direction_2, ...) + "optimizer": self.optimizer.name, + "opt_target": self.optimizer.target, + "opt_direction": self.optimizer.direction, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults, + }) + + def _run_trial(self, trial: Storage.Trial) -> Tuple[Status, Optional[Dict[str, float]]]: + """ + Run a single trial. + + Parameters + ---------- + env : Environment + Benchmarking environment context to run the optimization on. + storage : Storage + A storage system to persist the experiment data. + global_config : dict + Global configuration parameters. + + Returns + ------- + (trial_status, trial_score) : (Status, Optional[Dict[str, float]]) + Status and results of the trial. + """ + _LOG.info("Trial: %s", trial) + assert self.experiment is not None + + if not self.environment.setup(trial.tunables, trial.config(self.global_config)): + _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) + # FIXME: Use the actual timestamp from the environment. + trial.update(Status.FAILED, datetime.utcnow()) + return (Status.FAILED, None) + + (status, timestamp, results) = self.environment.run() # Block and wait for the final result. + _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) + + # In async mode (TODO), poll the environment for status and telemetry + # and update the storage with the intermediate results. + (_status, _timestamp, telemetry) = self.environment.status() + + # Use the status and timestamp from `.run()` as it is the final status of the experiment. + # TODO: Use the `.status()` output in async mode. + trial.update_telemetry(status, timestamp, telemetry) + + trial.update(status, timestamp, results) + # Filter out non-numeric scores from the optimizer. + scores = results if not isinstance(results, dict) \ + else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} + return (status, scores) From 5aca76450973458c35102bf6cbce1ca66cc0ab6b Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 26 Feb 2024 16:10:59 -0800 Subject: [PATCH 019/121] change the default value for is_warm_up parameter to False --- mlos_bench/mlos_bench/optimizers/base_optimizer.py | 2 +- mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py | 2 +- mlos_bench/mlos_bench/optimizers/mock_optimizer.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index bca4b4f06e..e41693e88a 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -195,7 +195,7 @@ def supports_preload(self) -> bool: @abstractmethod def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: + status: Optional[Sequence[Status]] = None, is_warm_up: bool = False) -> bool: """ Pre-load the optimizer with the bulk data from previous experiments. diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index aee3b7662f..6c1f885444 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -99,7 +99,7 @@ def name(self) -> str: return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: + status: Optional[Sequence[Status]] = None, is_warm_up: bool = False) -> bool: if not super().bulk_register(configs, scores, status, is_warm_up): return False df_configs = self._to_df(configs) # Impute missing values, if necessary diff --git a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py index 11d1b597b1..ed156fe44b 100644 --- a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py @@ -42,7 +42,7 @@ def __init__(self, self._best_score: Optional[float] = None def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float]], - status: Optional[Sequence[Status]] = None, is_warm_up: bool = True) -> bool: + status: Optional[Sequence[Status]] = None, is_warm_up: bool = False) -> bool: if not super().bulk_register(configs, scores, status, is_warm_up): return False if status is None: From 309e10cb3a46049633b0b2815c7b7cac608b72c4 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 26 Feb 2024 16:27:34 -0800 Subject: [PATCH 020/121] started to implement teh start() method of the sync scheduler --- .../mlos_bench/schedulers/base_scheduler.py | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 3bfb6d7147..b41a6422a1 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -51,6 +51,10 @@ def __enter__(self) -> 'Scheduler': assert self.experiment is None self.environment.__enter__() self.optimizer.__enter__() + # Start new or resume the existing experiment. Verify that the + # experiment configuration is compatible with the previous runs. + # If the `merge` config parameter is present, merge in the data + # from other experiments and check for compatibility. self.experiment = self.storage.experiment( experiment_id=self.global_config["experiment_id"].strip(), trial_id=int(self.global_config["trial_id"]), @@ -82,6 +86,44 @@ def __exit__(self, self.experiment = None return False # Do not suppress exceptions + def start(self) -> Tuple[Optional[float], Optional[TunableGroups]]: + """ + Start the optimization loop. + """ + assert self.experiment is not None + _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", + self.experiment, self.environment, self.optimizer) + if _LOG.isEnabledFor(logging.INFO): + _LOG.info("Root Environment:\n%s", self.environment.pprint()) + + last_trial_id = -1 + if self.optimizer.supports_preload: + # Complete trials that are pending or in-progress. + self._run_schedule(running=True) + # Load past trials data into the optimizer + last_trial_id = self._get_optimizer_suggestions(is_warm_up=True) + else: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + + config_id = int(self.global_config.get("config_id", -1)) + if config_id > 0: + tunables = self._load_config(config_id) + self._schedule_trial(tunables) + + # Now run new trials until the optimizer is done. + while self.optimizer.not_converged(): + # TODO: In the future, _scheduler and _optimizer + # can be run in parallel in two independent loops. + self._run_schedule() + last_trial_id = self._get_optimizer_suggestions(last_trial_id) + + if self._do_teardown: + self.environment.teardown() + + (best_score, best_config) = self.optimizer.get_best_observation() + _LOG.info("Env: %s best score: %s", self.environment, best_score) + return (best_score, best_config) + def _load_config(self, config_id: int) -> TunableGroups: """ Load the existing tunable configuration from the storage. @@ -95,14 +137,6 @@ def _load_config(self, config_id: int) -> TunableGroups: config_id, json.dumps(tunable_values, indent=2)) return tunables - def _run_schedule(self, running: bool = False) -> None: - """ - Scheduler part of the loop. Check for pending trials in the queue and run them. - """ - assert self.experiment is not None - for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): - self._run_trial(trial) - def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = False) -> int: """ Optimizer part of the loop. Load the results of the executed trials @@ -141,6 +175,14 @@ def _schedule_trial(self, tunables: TunableGroups) -> None: "is_defaults": tunables.is_defaults, }) + def _run_schedule(self, running: bool = False) -> None: + """ + Scheduler part of the loop. Check for pending trials in the queue and run them. + """ + assert self.experiment is not None + for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): + self._run_trial(trial) + def _run_trial(self, trial: Storage.Trial) -> Tuple[Status, Optional[Dict[str, float]]]: """ Run a single trial. From ffe23e1bc43283fac2cfe3c9fd3e04bc8fb9a18e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 13:38:21 -0800 Subject: [PATCH 021/121] implement proper Scheduler constructor --- .../mlos_bench/schedulers/base_scheduler.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index b41a6422a1..7e58958d3d 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -20,6 +20,7 @@ from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import merge_parameters _LOG = logging.getLogger(__name__) @@ -27,21 +28,31 @@ class Scheduler(metaclass=ABCMeta): def __init__(self, *, + config: Dict[str, Any], + global_config: Dict[str, Any], environment: Environment, optimizer: Optimizer, storage: Storage, - root_env_config: str, - global_config: Dict[str, Any]): - """" - Initialize the scheduler. + root_env_config: str): """ + Create a new instance of the scheduler. The constructor of this + and the derived classes is called by the persistence service + after reading the class JSON configuration. Other objects like + the Environment and Optimizer are provided by the Launcher. + """ + self.global_config = global_config + config = merge_parameters(dest=config.copy(), source=global_config) + + self._experiment_id = config["experiment_id"].strip() + self._trial_id = int(config["trial_id"]) + self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) + self._do_teardown = bool(config.get("teardown", True)) + + self.experiment: Optional[Storage.Experiment] = None self.environment = environment self.optimizer = optimizer self.storage = storage - self.root_env_config = root_env_config - self.global_config = global_config - self.experiment: Optional[Storage.Experiment] = None - self._trial_config_repeat_count = 1 # TODO: Make this configurable. + self._root_env_config = root_env_config def __enter__(self) -> 'Scheduler': """ @@ -56,9 +67,9 @@ def __enter__(self) -> 'Scheduler': # If the `merge` config parameter is present, merge in the data # from other experiments and check for compatibility. self.experiment = self.storage.experiment( - experiment_id=self.global_config["experiment_id"].strip(), - trial_id=int(self.global_config["trial_id"]), - root_env_config=self.root_env_config, + experiment_id=self._experiment_id, + trial_id=self._trial_id, + root_env_config=self._root_env_config, description=self.environment.name, tunables=self.environment.tunable_params, opt_target=self.optimizer.target, From cb863e07de1cf64a9ad73d7664c7771d087d3180 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 14:00:44 -0800 Subject: [PATCH 022/121] more clean-ups to the base scheduler --- .../mlos_bench/schedulers/base_scheduler.py | 74 ++++++++----------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 7e58958d3d..f8f950a6b3 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -47,6 +47,7 @@ def __init__(self, *, self._trial_id = int(config["trial_id"]) self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) self._do_teardown = bool(config.get("teardown", True)) + self._last_trial_id = -1 self.experiment: Optional[Storage.Experiment] = None self.environment = environment @@ -97,7 +98,7 @@ def __exit__(self, self.experiment = None return False # Do not suppress exceptions - def start(self) -> Tuple[Optional[float], Optional[TunableGroups]]: + def start(self) -> None: """ Start the optimization loop. """ @@ -107,30 +108,31 @@ def start(self) -> Tuple[Optional[float], Optional[TunableGroups]]: if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", self.environment.pprint()) - last_trial_id = -1 - if self.optimizer.supports_preload: - # Complete trials that are pending or in-progress. - self._run_schedule(running=True) - # Load past trials data into the optimizer - last_trial_id = self._get_optimizer_suggestions(is_warm_up=True) - else: - _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - - config_id = int(self.global_config.get("config_id", -1)) - if config_id > 0: - tunables = self._load_config(config_id) - self._schedule_trial(tunables) - - # Now run new trials until the optimizer is done. - while self.optimizer.not_converged(): - # TODO: In the future, _scheduler and _optimizer - # can be run in parallel in two independent loops. - self._run_schedule() - last_trial_id = self._get_optimizer_suggestions(last_trial_id) - - if self._do_teardown: - self.environment.teardown() + self._last_trial_id = -1 + if self.optimizer.supports_preload: + # Complete trials that are pending or in-progress. + self._run_schedule(running=True) + # Load past trials data into the optimizer + self._last_trial_id = self._get_optimizer_suggestions(is_warm_up=True) + else: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + + config_id = int(self.global_config.get("config_id", -1)) + if config_id > 0: + tunables = self._load_config(config_id) + self._schedule_trial(tunables) + + # Now run new trials until the optimizer is done. + while self.optimizer.not_converged(): + # TODO: In the future, _scheduler and _optimizer + # can be run in parallel in two independent loops. + self._run_schedule() + self._last_trial_id = self._get_optimizer_suggestions(self._last_trial_id) + if self._do_teardown: + self.environment.teardown() + + def get_best_observation(self) -> Tuple[Optional[float], Optional[TunableGroups]]: (best_score, best_config) = self.optimizer.get_best_observation() _LOG.info("Env: %s best score: %s", self.environment, best_score) return (best_score, best_config) @@ -194,23 +196,9 @@ def _run_schedule(self, running: bool = False) -> None: for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): self._run_trial(trial) - def _run_trial(self, trial: Storage.Trial) -> Tuple[Status, Optional[Dict[str, float]]]: + def _run_trial(self, trial: Storage.Trial) -> None: """ - Run a single trial. - - Parameters - ---------- - env : Environment - Benchmarking environment context to run the optimization on. - storage : Storage - A storage system to persist the experiment data. - global_config : dict - Global configuration parameters. - - Returns - ------- - (trial_status, trial_score) : (Status, Optional[Dict[str, float]]) - Status and results of the trial. + Set up and run a single trial. Save the results in the storage. """ _LOG.info("Trial: %s", trial) assert self.experiment is not None @@ -219,7 +207,7 @@ def _run_trial(self, trial: Storage.Trial) -> Tuple[Status, Optional[Dict[str, f _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) # FIXME: Use the actual timestamp from the environment. trial.update(Status.FAILED, datetime.utcnow()) - return (Status.FAILED, None) + return (status, timestamp, results) = self.environment.run() # Block and wait for the final result. _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) @@ -233,7 +221,3 @@ def _run_trial(self, trial: Storage.Trial) -> Tuple[Status, Optional[Dict[str, f trial.update_telemetry(status, timestamp, telemetry) trial.update(status, timestamp, results) - # Filter out non-numeric scores from the optimizer. - scores = results if not isinstance(results, dict) \ - else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} - return (status, scores) From 990b01998b5180cc902a071b78eccce89abc45f2 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 16:41:48 -0800 Subject: [PATCH 023/121] minor pylint fixes --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index f8f950a6b3..85960fa357 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -26,6 +26,10 @@ class Scheduler(metaclass=ABCMeta): + # pylint: disable=too-many-instance-attributes + """ + Base class for the optimization loop scheduling policies. + """ def __init__(self, *, config: Dict[str, Any], @@ -76,7 +80,6 @@ def __enter__(self) -> 'Scheduler': opt_target=self.optimizer.target, opt_direction=self.optimizer.direction, ).__enter__() - self._in_context = True return self def __exit__(self, @@ -133,6 +136,9 @@ def start(self) -> None: self.environment.teardown() def get_best_observation(self) -> Tuple[Optional[float], Optional[TunableGroups]]: + """ + Get the best observation from the optimizer. + """ (best_score, best_config) = self.optimizer.get_best_observation() _LOG.info("Env: %s best score: %s", self.environment, best_score) return (best_score, best_config) From 2ac0520d60ff004d756844d4a3c5c456c582a4aa Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 17:18:43 -0800 Subject: [PATCH 024/121] add _add_trial_to_queue() method --- .../mlos_bench/schedulers/base_scheduler.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 85960fa357..13f52d0a36 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -49,6 +49,7 @@ def __init__(self, *, self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) + self._config_id = int(config.get("config_id", -1)) self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) self._do_teardown = bool(config.get("teardown", True)) self._last_trial_id = -1 @@ -120,9 +121,8 @@ def start(self) -> None: else: _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - config_id = int(self.global_config.get("config_id", -1)) - if config_id > 0: - tunables = self._load_config(config_id) + if self._config_id > 0: + tunables = self._load_config(self._config_id) self._schedule_trial(tunables) # Now run new trials until the optimizer is done. @@ -175,9 +175,8 @@ def _schedule_trial(self, tunables: TunableGroups) -> None: """ Add a configuration to the queue of trials. """ - assert self.experiment is not None for repeat_i in range(1, self._trial_config_repeat_count + 1): - self.experiment.new_trial(tunables, config={ + self._add_trial_to_queue(tunables, config={ # Add some additional metadata to track for the trial such as the # optimizer config used. # Note: these values are unfortunately mutable at the moment. @@ -194,6 +193,16 @@ def _schedule_trial(self, tunables: TunableGroups) -> None: "is_defaults": tunables.is_defaults, }) + def _add_trial_to_queue(self, tunables: TunableGroups, + ts_start: Optional[datetime] = None, + config: Optional[Dict[str, Any]] = None) -> None: + """ + Add a configuration to the queue of trials. + A wrapper for the `Experiment.new_trial` method. + """ + assert self.experiment is not None + self.experiment.new_trial(tunables, ts_start, config) + def _run_schedule(self, running: bool = False) -> None: """ Scheduler part of the loop. Check for pending trials in the queue and run them. From b95100ae994b249f91d0840c7138c71d4c4511bb Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 17:42:07 -0800 Subject: [PATCH 025/121] better handling of warm-up phase (no redundant code) --- .../mlos_bench/schedulers/base_scheduler.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 13f52d0a36..9d95614ca5 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -52,7 +52,6 @@ def __init__(self, *, self._config_id = int(config.get("config_id", -1)) self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) self._do_teardown = bool(config.get("teardown", True)) - self._last_trial_id = -1 self.experiment: Optional[Storage.Experiment] = None self.environment = environment @@ -112,25 +111,19 @@ def start(self) -> None: if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", self.environment.pprint()) - self._last_trial_id = -1 - if self.optimizer.supports_preload: - # Complete trials that are pending or in-progress. - self._run_schedule(running=True) - # Load past trials data into the optimizer - self._last_trial_id = self._get_optimizer_suggestions(is_warm_up=True) - else: - _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - if self._config_id > 0: tunables = self._load_config(self._config_id) self._schedule_trial(tunables) - # Now run new trials until the optimizer is done. + last_trial_id = -1 + is_warm_up = self.optimizer.supports_preload + if not is_warm_up: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + while self.optimizer.not_converged(): - # TODO: In the future, _scheduler and _optimizer - # can be run in parallel in two independent loops. - self._run_schedule() - self._last_trial_id = self._get_optimizer_suggestions(self._last_trial_id) + self._run_schedule(is_warm_up) + last_trial_id = self._get_optimizer_suggestions(last_trial_id, is_warm_up) + is_warm_up = False if self._do_teardown: self.environment.teardown() From e15033d3294cc2163deae9a1ea6135e2378e8b8e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 17:51:22 -0800 Subject: [PATCH 026/121] split the sccheduler implementation into the base class and the sync scheduler implementation --- .../mlos_bench/schedulers/base_scheduler.py | 17 ++++----- .../mlos_bench/schedulers/sync_scheduler.py | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 mlos_bench/mlos_bench/schedulers/sync_scheduler.py diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 9d95614ca5..b985318246 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -101,6 +101,7 @@ def __exit__(self, self.experiment = None return False # Do not suppress exceptions + @abstractmethod def start(self) -> None: """ Start the optimization loop. @@ -115,16 +116,12 @@ def start(self) -> None: tunables = self._load_config(self._config_id) self._schedule_trial(tunables) - last_trial_id = -1 - is_warm_up = self.optimizer.supports_preload - if not is_warm_up: - _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - - while self.optimizer.not_converged(): - self._run_schedule(is_warm_up) - last_trial_id = self._get_optimizer_suggestions(last_trial_id, is_warm_up) - is_warm_up = False - + def _teardown(self) -> None: + """ + Tear down the environment. + Call this method at the end of the `.start()` implementation (?). + """ + assert self.experiment is not None if self._do_teardown: self.environment.teardown() diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py new file mode 100644 index 0000000000..5127d8a4c3 --- /dev/null +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -0,0 +1,37 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" + A simple single-threaded synchronous optimization loop implementation. +""" + +import logging + +from mlos_bench.schedulers.base_scheduler import Scheduler + +_LOG = logging.getLogger(__name__) + + +class SyncScheduler(Scheduler): + """ + A simple single-threaded synchronous optimization loop implementation. + """ + + def start(self) -> None: + """ + Start the optimization loop. + """ + super().start() + + last_trial_id = -1 + is_warm_up = self.optimizer.supports_preload + if not is_warm_up: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + + while self.optimizer.not_converged(): + self._run_schedule(is_warm_up) + last_trial_id = self._get_optimizer_suggestions(last_trial_id, is_warm_up) + is_warm_up = False + + self._teardown() From 6eab1b01a0d62b99e80b593a1fa641fa367d4508 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Tue, 27 Feb 2024 18:12:10 -0800 Subject: [PATCH 027/121] use the new scheduler in _main() --- mlos_bench/mlos_bench/run.py | 219 ++---------------- .../mlos_bench/schedulers/base_scheduler.py | 6 + .../tests/launcher_in_process_test.py | 14 +- 3 files changed, 24 insertions(+), 215 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 18bf779180..163b097e81 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -11,226 +11,39 @@ See `--help` output for details. """ -import json import logging -from datetime import datetime -from typing import Optional, Tuple, Dict, Any +from typing import List, Optional, Tuple from mlos_bench.launcher import Launcher -from mlos_bench.optimizers.base_optimizer import Optimizer -from mlos_bench.environments.base_environment import Environment -from mlos_bench.storage.base_storage import Storage -from mlos_bench.environments.status import Status from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.schedulers.sync_scheduler import SyncScheduler _LOG = logging.getLogger(__name__) -def _main() -> Tuple[Optional[float], Optional[TunableGroups]]: +def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[TunableGroups]]: - launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool") + launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool", argv=argv) - result = _optimization_loop( - env=launcher.environment, - opt=launcher.optimizer, + scheduler = SyncScheduler( + config={ + "teardown": launcher.teardown, + "trial_config_repeat_count": launcher.trial_config_repeat_count, + }, + global_config=launcher.global_config, + environment=launcher.environment, + optimizer=launcher.optimizer, storage=launcher.storage, root_env_config=launcher.root_env_config, - global_config=launcher.global_config, - do_teardown=launcher.teardown, - trial_config_repeat_count=launcher.trial_config_repeat_count, ) + with scheduler.context() as scheduler_context: + scheduler_context.start() + + result = scheduler.get_best_observation() _LOG.info("Final result: %s", result) return result -def _optimization_loop(*, - env: Environment, - opt: Optimizer, - storage: Storage, - root_env_config: str, - global_config: Dict[str, Any], - do_teardown: bool, - trial_config_repeat_count: int = 1, - ) -> Tuple[Optional[float], Optional[TunableGroups]]: - """ - Main optimization loop. - - Parameters - ---------- - env : Environment - benchmarking environment to run the optimization on. - opt : Optimizer - An interface to mlos_core optimizers. - storage : Storage - A storage system to persist the experiment data. - root_env_config : str - A path to the root JSON configuration file of the benchmarking environment. - global_config : dict - Global configuration parameters. - do_teardown : bool - If True, teardown the environment at the end of the experiment - trial_config_repeat_count : int - How many trials to repeat for the same configuration. - """ - if _LOG.isEnabledFor(logging.INFO): - _LOG.info("Root Environment:\n%s", env.pprint()) - - # Start new or resume the existing experiment. Verify that the - # experiment configuration is compatible with the previous runs. - # If the `merge` config parameter is present, merge in the data - # from other experiments and check for compatibility. - with env as env_context, \ - opt as opt_context, \ - storage.experiment( - experiment_id=global_config["experiment_id"].strip(), - trial_id=int(global_config["trial_id"]), - root_env_config=root_env_config, - description=env.name, - tunables=env.tunable_params, - opt_target=opt.target, - opt_direction=opt.direction, - ) as exp: - - _LOG.info("Experiment: %s Env: %s Optimizer: %s", exp, env, opt) - - last_trial_id = -1 - if opt_context.supports_preload: - # Complete trials that are pending or in-progress. - _run_schedule(exp, env_context, global_config, running=True) - # Load past trials data into the optimizer - last_trial_id = _get_optimizer_suggestions(exp, opt_context, is_warm_up=True) - else: - _LOG.warning("Skip pending trials and warm-up: %s", opt) - - config_id = int(global_config.get("config_id", -1)) - if config_id > 0: - tunables = _load_config(exp, env_context, config_id) - _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) - - # Now run new trials until the optimizer is done. - while opt_context.not_converged(): - # TODO: In the future, _scheduler and _optimizer - # can be run in parallel in two independent loops. - _run_schedule(exp, env_context, global_config) - last_trial_id = _get_optimizer_suggestions(exp, opt_context, last_trial_id, trial_config_repeat_count) - - if do_teardown: - env_context.teardown() - - (best_score, best_config) = opt.get_best_observation() - _LOG.info("Env: %s best score: %s", env, best_score) - return (best_score, best_config) - - -def _load_config(exp: Storage.Experiment, env_context: Environment, - config_id: int) -> TunableGroups: - """ - Load the existing tunable configuration from the storage. - """ - tunable_values = exp.load_tunable_config(config_id) - tunables = env_context.tunable_params.assign(tunable_values) - _LOG.info("Load config from storage: %d", config_id) - if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Config %d ::\n%s", - config_id, json.dumps(tunable_values, indent=2)) - return tunables - - -def _run_schedule(exp: Storage.Experiment, env_context: Environment, - global_config: Dict[str, Any], running: bool = False) -> None: - """ - Scheduler part of the loop. Check for pending trials in the queue and run them. - """ - for trial in exp.pending_trials(datetime.utcnow(), running=running): - _run_trial(env_context, trial, global_config) - - -def _get_optimizer_suggestions(exp: Storage.Experiment, opt_context: Optimizer, - last_trial_id: int = -1, trial_config_repeat_count: int = 1, - is_warm_up: bool = False) -> int: - """ - Optimizer part of the loop. Load the results of the executed trials - into the optimizer, suggest new configurations, and add them to the queue. - Return the last trial ID processed by the optimizer. - """ - (trial_ids, configs, scores, status) = exp.load(last_trial_id) - opt_context.bulk_register(configs, scores, status, is_warm_up) - - tunables = opt_context.suggest() - _schedule_trial(exp, opt_context, tunables, trial_config_repeat_count) - - return max(trial_ids, default=last_trial_id) - - -def _schedule_trial(exp: Storage.Experiment, opt: Optimizer, - tunables: TunableGroups, trial_config_repeat_count: int = 1) -> None: - """ - Add a configuration to the queue of trials. - """ - for repeat_i in range(1, trial_config_repeat_count + 1): - exp.new_trial(tunables, config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - # TODO: Improve for supporting multi-objective - # (e.g., opt_target_1, opt_target_2, ... and opt_direction_1, opt_direction_2, ...) - "optimizer": opt.name, - "opt_target": opt.target, - "opt_direction": opt.direction, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults, - }) - - -def _run_trial(env: Environment, trial: Storage.Trial, - global_config: Dict[str, Any]) -> Tuple[Status, Optional[Dict[str, float]]]: - """ - Run a single trial. - - Parameters - ---------- - env : Environment - Benchmarking environment context to run the optimization on. - storage : Storage - A storage system to persist the experiment data. - global_config : dict - Global configuration parameters. - - Returns - ------- - (trial_status, trial_score) : (Status, Optional[Dict[str, float]]) - Status and results of the trial. - """ - _LOG.info("Trial: %s", trial) - - if not env.setup(trial.tunables, trial.config(global_config)): - _LOG.warning("Setup failed: %s :: %s", env, trial.tunables) - # FIXME: Use the actual timestamp from the environment. - trial.update(Status.FAILED, datetime.utcnow()) - return (Status.FAILED, None) - - (status, timestamp, results) = env.run() # Block and wait for the final result. - _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) - - # In async mode (TODO), poll the environment for status and telemetry - # and update the storage with the intermediate results. - (_status, _timestamp, telemetry) = env.status() - - # Use the status and timestamp from `.run()` as it is the final status of the experiment. - # TODO: Use the `.status()` output in async mode. - trial.update_telemetry(status, timestamp, telemetry) - - trial.update(status, timestamp, results) - # Filter out non-numeric scores from the optimizer. - scores = results if not isinstance(results, dict) \ - else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} - return (status, scores) - - if __name__ == "__main__": _main() diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index b985318246..4548485c80 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -101,6 +101,12 @@ def __exit__(self, self.experiment = None return False # Do not suppress exceptions + def context(self) -> 'Scheduler': + """ + Return the current context. + """ + return self + @abstractmethod def start(self) -> None: """ diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index e8a60ad29c..d3b7733872 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -10,8 +10,7 @@ import pytest -from mlos_bench.launcher import Launcher -from mlos_bench.run import _optimization_loop +from mlos_bench.run import _main @pytest.mark.parametrize( @@ -30,14 +29,5 @@ def test_main_bench(argv: List[str], expected_score: float) -> None: """ Run mlos_bench optimization loop with given config and check the results. """ - launcher = Launcher("mlos_bench", "TEST RUN", argv=argv) - (score, _config) = _optimization_loop( - env=launcher.environment, - opt=launcher.optimizer, - storage=launcher.storage, - root_env_config=launcher.root_env_config, - global_config=launcher.global_config, - do_teardown=launcher.teardown, - trial_config_repeat_count=launcher.trial_config_repeat_count, - ) + (score, _config) = _main(argv) assert pytest.approx(score, 1e-6) == expected_score From 9c7f2ccd5311b02578eb57a10c8476337876ebff Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 13:54:44 -0800 Subject: [PATCH 028/121] add scheduler config parameters that can be overridden from global config --- mlos_bench/mlos_bench/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 163b097e81..ac97422f8f 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -27,8 +27,11 @@ def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[T scheduler = SyncScheduler( config={ - "teardown": launcher.teardown, + "experiment_id": "UNDEFINED - override from global config", + "trial_id": 0, # Override from global config + "config_id": -1, # Override from global config "trial_config_repeat_count": launcher.trial_config_repeat_count, + "teardown": launcher.teardown, }, global_config=launcher.global_config, environment=launcher.environment, From 479a5ed21c4910dcf98af2a360ff6dd3da46a3e6 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 14:04:23 -0800 Subject: [PATCH 029/121] add todo comments --- mlos_bench/mlos_bench/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index ac97422f8f..61b3e7385a 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -25,6 +25,7 @@ def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[T launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool", argv=argv) + # TODO: Instantiate Scheduler from JSON config scheduler = SyncScheduler( config={ "experiment_id": "UNDEFINED - override from global config", From 50dad9fa55033f95fb8dc1e6cc284894f07ac7f9 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 14:22:02 -0800 Subject: [PATCH 030/121] update the scores for launcher unit tests + fix teh regexps --- mlos_bench/mlos_bench/run.py | 5 +++-- mlos_bench/mlos_bench/tests/launcher_in_process_test.py | 2 +- mlos_bench/mlos_bench/tests/launcher_run_test.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 61b3e7385a..3cd146f286 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -44,8 +44,9 @@ def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[T with scheduler.context() as scheduler_context: scheduler_context.start() - result = scheduler.get_best_observation() - _LOG.info("Final result: %s", result) + (score, _config) = result = scheduler.get_best_observation() + # NOTE: This log line is used in test_launch_main_app_* unit tests: + _LOG.info("Final score: %s", score) return result diff --git a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py index d3b7733872..5f7ac7f437 100644 --- a/mlos_bench/mlos_bench/tests/launcher_in_process_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_in_process_test.py @@ -22,7 +22,7 @@ "--config", "mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc", "--trial_config_repeat_count", "3", "--max_iterations", "3", - ], 64.2758), + ], 64.53897), ] ) def test_main_bench(argv: List[str], expected_score: float) -> None: diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index 021cead6ba..965be6ce5a 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -81,7 +81,7 @@ def test_launch_main_app_bench(root_path: str, local_exec_service: LocalExecServ "--config mlos_bench/mlos_bench/tests/config/cli/mock-bench.jsonc", [ f"^{_RE_DATE} run\\.py:\\d+ " + - r"_optimization_loop INFO Env: Mock environment best score: 65\.67\d+\s*$", + r"_main INFO Final score: 65\.67\d+\s*$", ] ) @@ -97,7 +97,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + - r"bulk_register DEBUG Warm-up end: score = 64\.88\d+$", + r"bulk_register DEBUG Warm-up end: score = 64\.53\d+$", # Iteration 2: The result may not always be deterministic f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + r"bulk_register DEBUG Warm-up end: score = \d+\.\d+$", @@ -106,6 +106,6 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic r"bulk_register DEBUG Warm-up end: score = \d+\.\d+$", # Final result: baseline is the optimum for the mock environment f"^{_RE_DATE} run\\.py:\\d+ " + - r"_optimization_loop INFO Env: Mock environment best score: 64\.27\d+\s*$", + r"_main INFO Final score: 64\.53\d+\s*$", ] ) From 220ece1e7bc5df9a3850556916b5c157f188b5f0 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 14:37:57 -0800 Subject: [PATCH 031/121] add logging to the sync optimization loop --- mlos_bench/mlos_bench/schedulers/sync_scheduler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 5127d8a4c3..6d8da6d529 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -30,6 +30,8 @@ def start(self) -> None: _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) while self.optimizer.not_converged(): + _LOG.info("Optimization loop: %s Last trial ID: %d", + "Warm-up" if is_warm_up else "Run", last_trial_id) self._run_schedule(is_warm_up) last_trial_id = self._get_optimizer_suggestions(last_trial_id, is_warm_up) is_warm_up = False From 29cec19a444dc8e09007b706f64c26fd96c98c80 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 14:58:33 -0800 Subject: [PATCH 032/121] add more logging to the scheduler class --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 4548485c80..8345db1266 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -160,6 +160,7 @@ def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = """ assert self.experiment is not None (trial_ids, configs, scores, status) = self.experiment.load(last_trial_id) + _LOG.info("QUEUE: Update the optimizer with trial results: %s", trial_ids) self.optimizer.bulk_register(configs, scores, status, is_warm_up) tunables = self.optimizer.suggest() @@ -197,7 +198,8 @@ def _add_trial_to_queue(self, tunables: TunableGroups, A wrapper for the `Experiment.new_trial` method. """ assert self.experiment is not None - self.experiment.new_trial(tunables, ts_start, config) + trial = self.experiment.new_trial(tunables, ts_start, config) + _LOG.info("QUEUE: Add new trial: %s", trial) def _run_schedule(self, running: bool = False) -> None: """ @@ -211,12 +213,13 @@ def _run_trial(self, trial: Storage.Trial) -> None: """ Set up and run a single trial. Save the results in the storage. """ - _LOG.info("Trial: %s", trial) assert self.experiment is not None + _LOG.info("QUEUE: Execute trial: %s", trial) if not self.environment.setup(trial.tunables, trial.config(self.global_config)): _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) # FIXME: Use the actual timestamp from the environment. + _LOG.info("QUEUE: Update trial results: %s :: %s", trial, Status.FAILED) trial.update(Status.FAILED, datetime.utcnow()) return @@ -232,3 +235,4 @@ def _run_trial(self, trial: Storage.Trial) -> None: trial.update_telemetry(status, timestamp, telemetry) trial.update(status, timestamp, results) + _LOG.info("QUEUE: Update trial results: %s :: %s %s", trial, status, results) From 6f8bb2cffc0ef5cdb61cfe81110d4aa7a5af727e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Wed, 28 Feb 2024 16:12:46 -0800 Subject: [PATCH 033/121] move (sync) implementation of the run_trial() to SyncScheduler; other minor refactorings --- mlos_bench/mlos_bench/run.py | 3 +- .../mlos_bench/schedulers/base_scheduler.py | 47 ++++--------------- .../mlos_bench/schedulers/sync_scheduler.py | 30 +++++++++++- 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 3cd146f286..d58283c0a3 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -41,8 +41,9 @@ def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[T root_env_config=launcher.root_env_config, ) - with scheduler.context() as scheduler_context: + with scheduler as scheduler_context: scheduler_context.start() + scheduler_context.teardown() (score, _config) = result = scheduler.get_best_observation() # NOTE: This log line is used in test_launch_main_app_* unit tests: diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 8345db1266..eda0a5e661 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -16,7 +16,6 @@ from typing_extensions import Literal from mlos_bench.environments.base_environment import Environment -from mlos_bench.environments.status import Status from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups @@ -101,12 +100,6 @@ def __exit__(self, self.experiment = None return False # Do not suppress exceptions - def context(self) -> 'Scheduler': - """ - Return the current context. - """ - return self - @abstractmethod def start(self) -> None: """ @@ -119,13 +112,13 @@ def start(self) -> None: _LOG.info("Root Environment:\n%s", self.environment.pprint()) if self._config_id > 0: - tunables = self._load_config(self._config_id) - self._schedule_trial(tunables) + tunables = self.load_config(self._config_id) + self.schedule_trial(tunables) - def _teardown(self) -> None: + def teardown(self) -> None: """ Tear down the environment. - Call this method at the end of the `.start()` implementation (?). + Call it after the completion of the `.start()` in the scheduler context. """ assert self.experiment is not None if self._do_teardown: @@ -139,7 +132,7 @@ def get_best_observation(self) -> Tuple[Optional[float], Optional[TunableGroups] _LOG.info("Env: %s best score: %s", self.environment, best_score) return (best_score, best_config) - def _load_config(self, config_id: int) -> TunableGroups: + def load_config(self, config_id: int) -> TunableGroups: """ Load the existing tunable configuration from the storage. """ @@ -164,11 +157,11 @@ def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = self.optimizer.bulk_register(configs, scores, status, is_warm_up) tunables = self.optimizer.suggest() - self._schedule_trial(tunables) + self.schedule_trial(tunables) return max(trial_ids, default=last_trial_id) - def _schedule_trial(self, tunables: TunableGroups) -> None: + def schedule_trial(self, tunables: TunableGroups) -> None: """ Add a configuration to the queue of trials. """ @@ -207,32 +200,12 @@ def _run_schedule(self, running: bool = False) -> None: """ assert self.experiment is not None for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): - self._run_trial(trial) + self.run_trial(trial) - def _run_trial(self, trial: Storage.Trial) -> None: + @abstractmethod + def run_trial(self, trial: Storage.Trial) -> None: """ Set up and run a single trial. Save the results in the storage. """ assert self.experiment is not None _LOG.info("QUEUE: Execute trial: %s", trial) - - if not self.environment.setup(trial.tunables, trial.config(self.global_config)): - _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) - # FIXME: Use the actual timestamp from the environment. - _LOG.info("QUEUE: Update trial results: %s :: %s", trial, Status.FAILED) - trial.update(Status.FAILED, datetime.utcnow()) - return - - (status, timestamp, results) = self.environment.run() # Block and wait for the final result. - _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) - - # In async mode (TODO), poll the environment for status and telemetry - # and update the storage with the intermediate results. - (_status, _timestamp, telemetry) = self.environment.status() - - # Use the status and timestamp from `.run()` as it is the final status of the experiment. - # TODO: Use the `.status()` output in async mode. - trial.update_telemetry(status, timestamp, telemetry) - - trial.update(status, timestamp, results) - _LOG.info("QUEUE: Update trial results: %s :: %s %s", trial, status, results) diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 6d8da6d529..5294d56dbd 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -7,8 +7,11 @@ """ import logging +from datetime import datetime +from mlos_bench.environments.status import Status from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.storage.base_storage import Storage _LOG = logging.getLogger(__name__) @@ -36,4 +39,29 @@ def start(self) -> None: last_trial_id = self._get_optimizer_suggestions(last_trial_id, is_warm_up) is_warm_up = False - self._teardown() + def run_trial(self, trial: Storage.Trial) -> None: + """ + Set up and run a single trial. Save the results in the storage. + """ + super().run_trial(trial) + + if not self.environment.setup(trial.tunables, trial.config(self.global_config)): + _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) + # FIXME: Use the actual timestamp from the environment. + _LOG.info("QUEUE: Update trial results: %s :: %s", trial, Status.FAILED) + trial.update(Status.FAILED, datetime.utcnow()) + return + + (status, timestamp, results) = self.environment.run() # Block and wait for the final result. + _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) + + # In async mode (TODO), poll the environment for status and telemetry + # and update the storage with the intermediate results. + (_status, _timestamp, telemetry) = self.environment.status() + + # Use the status and timestamp from `.run()` as it is the final status of the experiment. + # TODO: Use the `.status()` output in async mode. + trial.update_telemetry(status, timestamp, telemetry) + + trial.update(status, timestamp, results) + _LOG.info("QUEUE: Update trial results: %s :: %s %s", trial, status, results) From 6adb2d061345dd62b67c7564a665bf6480de4da4 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Mar 2024 22:17:49 +0000 Subject: [PATCH 034/121] wip --- .../environments/common-environment-subschemas.json | 8 ++++++++ .../mlos_bench/environments/base_environment.py | 13 +++++++++++++ .../mlos_bench/environments/local/local_env.py | 1 + 3 files changed, 22 insertions(+) diff --git a/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json b/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json index 2e801b6827..46ad1db67c 100644 --- a/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json +++ b/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json @@ -76,10 +76,18 @@ "description": "The command lines to execute for the run phase.", "$ref": "#/$defs/command_lines" }, + "status": { + "description": "The command lines (or scripts) to execute for (async) status polling checks.", + "$ref": "#/$defs/command_lines" + }, "teardown": { "description": "The command lines to execute for the teardown phase.", "$ref": "#/$defs/command_lines" }, + "status_stdout_pattern": { + "description": "A regex to parse the stdout of the status phase for results.", + "type": "string" + }, "results_stdout_pattern": { "description": "A regex to parse the stdout of the run phase for results.", "type": "string" diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 38b7bd1142..1bab2a0d94 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -398,6 +398,19 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: (status, timestamp, _) = self.status() return (status, timestamp, None) + async def async_status_poll(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: + """ + Run a command to check the status of the benchmark environment. + + Returns + ------- + (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + 3-tuple of (benchmark status, timestamp, telemetry) values. + `timestamp` is UTC time stamp of the status; it's current time by default. + `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. + """ + raise NotImplementedError("TODO") + def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: """ Check the status of the benchmark environment. diff --git a/mlos_bench/mlos_bench/environments/local/local_env.py b/mlos_bench/mlos_bench/environments/local/local_env.py index 7ccf187a8c..375f5a393a 100644 --- a/mlos_bench/mlos_bench/environments/local/local_env.py +++ b/mlos_bench/mlos_bench/environments/local/local_env.py @@ -255,6 +255,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: assert self._temp_dir is not None try: + # TODO: support multiple telemetry files fname = self._config_loader_service.resolve_path( self._read_telemetry_file, extra_paths=[self._temp_dir]) From 41a0c37da9a813e68076094c375e832bf945fb1e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 5 Mar 2024 18:05:09 +0000 Subject: [PATCH 035/121] start tracking which trial runner a trial is assigned to --- mlos_bench/mlos_bench/storage/base_storage.py | 7 ++++ .../mlos_bench/storage/base_trial_data.py | 16 +++++++-- mlos_bench/mlos_bench/storage/sql/common.py | 36 ++++++++++++++++--- mlos_bench/mlos_bench/storage/sql/schema.py | 1 + .../mlos_bench/storage/sql/trial_data.py | 4 ++- 5 files changed, 56 insertions(+), 8 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index e8bc9cdcac..8f28b651fd 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -359,6 +359,13 @@ def tunable_config_id(self) -> int: """ return self._tunable_config_id + @property + def trial_runner_id(self) -> Optional[int]: + """ + ID of the TrialRunner this trial is assigned to. + """ + return self._config.get("trial_runner_id") + @property def opt_target(self) -> str: """ diff --git a/mlos_bench/mlos_bench/storage/base_trial_data.py b/mlos_bench/mlos_bench/storage/base_trial_data.py index d9aecd7b54..67cf47ae47 100644 --- a/mlos_bench/mlos_bench/storage/base_trial_data.py +++ b/mlos_bench/mlos_bench/storage/base_trial_data.py @@ -34,16 +34,19 @@ def __init__(self, *, tunable_config_id: int, ts_start: datetime, ts_end: Optional[datetime], - status: Status): + status: Status, + trial_runner_id: Optional[int] = None): self._experiment_id = experiment_id self._trial_id = trial_id self._tunable_config_id = tunable_config_id self._ts_start = ts_start self._ts_end = ts_end self._status = status + self._trial_runner_id = trial_runner_id def __repr__(self) -> str: - return f"Trial :: {self._experiment_id}:{self._trial_id} cid:{self._tunable_config_id} {self._status.name}" + return f"Trial :: {self._experiment_id}:{self._trial_id} cid:{self._tunable_config_id} " \ + + f"rid:{self._trial_runner_id} {self._status.name}" def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): @@ -64,6 +67,15 @@ def trial_id(self) -> int: """ return self._trial_id + @property + def trial_runner_id(self) -> Optional[int]: + """ + ID of the TrialRunner. + """ + if not self._trial_runner_id: + self._trial_runner_id = self.metadata_dict.get("trial_runner_id") + return self._trial_runner_id + @property def ts_start(self) -> datetime: """ diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index ce08e839b3..d4d813f2e0 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -29,7 +29,13 @@ def get_trials( from mlos_bench.storage.sql.trial_data import TrialSqlData # pylint: disable=import-outside-toplevel,cyclic-import with engine.connect() as conn: # Build up sql a statement for fetching trials. - stmt = schema.trial.select().where( + stmt = schema.trial.select().join( + schema.trial_param, + schema.trial.c.trial_id == schema.trial_param.c.trial_id + and schema.trial.c.exp_id == schema.trial_param.c.exp_id + and schema.trial_param.c.param_id == "trial_runner_id", + isouter=True, + ).where( schema.trial.c.exp_id == experiment_id, ).order_by( schema.trial.c.exp_id.asc(), @@ -51,6 +57,7 @@ def get_trials( ts_start=trial.ts_start, ts_end=trial.ts_end, status=Status[trial.status], + trial_runner_id=trial.param_value, ) for trial in trials.fetchall() } @@ -90,6 +97,12 @@ def get_results_df( cur_trials_stmt = select( schema.trial, tunable_config_trial_group_id_subquery, + ).join( + schema.trial_param, + schema.trial.c.trial_id == schema.trial_param.c.trial_id + and schema.trial.c.exp_id == schema.trial_param.c.exp_id + and schema.trial_param.c.param_id == "trial_runner_id", + isouter=True, ).where( schema.trial.c.exp_id == experiment_id, and_( @@ -106,10 +119,23 @@ def get_results_df( schema.trial.c.config_id == tunable_config_id, ) cur_trials = conn.execute(cur_trials_stmt) - trials_df = pandas.DataFrame( - [(row.trial_id, row.ts_start, row.ts_end, row.config_id, row.tunable_config_trial_group_id, row.status) - for row in cur_trials.fetchall()], - columns=['trial_id', 'ts_start', 'ts_end', 'tunable_config_id', 'tunable_config_trial_group_id', 'status']) + trials_df = pandas.DataFrame([ + ( + row.trial_id, + row.ts_start, row.ts_end, + row.config_id, row.tunable_config_trial_group_id, + row.status, + row.param_value, + ) + for row in cur_trials.fetchall() + ], + columns=[ + 'trial_id', + 'ts_start', 'ts_end', + 'tunable_config_id', 'tunable_config_trial_group_id', + 'status', + 'trial_runner_id', + ]) # Get each trial's config in wide format. configs_stmt = schema.trial.select().with_only_columns( diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 9fc801b3eb..96c67b3c6b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -137,6 +137,7 @@ def __init__(self, engine: Engine): # Values of additional non-tunable parameters of the trial, # e.g., scheduled execution time, VM name / location, number of repeats, etc. + # In particular, the trial_runner_id is stored here (in part to avoid updating the trial table schema). self.trial_param = Table( "trial_param", self._meta, diff --git a/mlos_bench/mlos_bench/storage/sql/trial_data.py b/mlos_bench/mlos_bench/storage/sql/trial_data.py index e59664272e..adb267c652 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial_data.py +++ b/mlos_bench/mlos_bench/storage/sql/trial_data.py @@ -34,7 +34,8 @@ def __init__(self, *, config_id: int, ts_start: datetime, ts_end: Optional[datetime], - status: Status): + status: Status, + trial_runner_id: Optional[int] = None): super().__init__( experiment_id=experiment_id, trial_id=trial_id, @@ -42,6 +43,7 @@ def __init__(self, *, ts_start=ts_start, ts_end=ts_end, status=status, + trial_runner_id=trial_runner_id, ) self._engine = engine self._schema = schema From 8a32e5ac0bf0436f8071f716c4c1d2f136d02c82 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 5 Mar 2024 23:09:12 +0000 Subject: [PATCH 036/121] wip: adding trial runner --- .../config/schemas/cli/cli-schema.json | 7 ++ mlos_bench/mlos_bench/launcher.py | 37 ++++-- mlos_bench/mlos_bench/run.py | 18 +-- mlos_bench/mlos_bench/trial_runner.py | 117 ++++++++++++++++++ 4 files changed, 164 insertions(+), 15 deletions(-) create mode 100644 mlos_bench/mlos_bench/trial_runner.py diff --git a/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json b/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json index 39d755151c..53d85ed378 100644 --- a/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json @@ -74,6 +74,13 @@ "examples": [3, 5] }, + "trial_runners": { + "description": "Number of trial runners to run in parallel. Individual TrialRunners can be identified in configs with $trial_runner_id.", + "type": "integer", + "minimum": 1, + "examples": [1, 3, 5, 10] + }, + "storage": { "description": "Path to the json config describing the storage backend to use.", "$ref": "#/$defs/json_config_path" diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index e851581ec8..b87892141f 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -23,6 +23,7 @@ from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.environments.base_environment import Environment +from mlos_bench.trial_runner import TrialRunner from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -82,6 +83,12 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st if self.trial_config_repeat_count <= 0: raise ValueError(f"Invalid trial_config_repeat_count: {self.trial_config_repeat_count}") + self.num_trial_runners: int = ( + args.trial_runners or config.get("trial_runners", 1) + ) + if self.num_trial_runners <= 0: + raise ValueError(f"Invalid trial_runners: {self.num_trial_runners}") + log_level = args.log_level or config.get("log_level", _LOG_LEVEL) try: log_level = int(log_level) @@ -127,12 +134,21 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st " Run `mlos_bench --help` and consult `README.md` for more info.") self.root_env_config = self._config_loader.resolve_path(env_path) - self.environment: Environment = self._config_loader.load_environment( - self.root_env_config, TunableGroups(), self.global_config, service=self._parent_service) - _LOG.info("Init environment: %s", self.environment) - - # NOTE: Init tunable values *after* the Environment, but *before* the Optimizer + self.trial_runners: List[TrialRunner] = [] + for trial_runner_id in range(0, self.num_trial_runners): + # Create a new global config for each Environment with a unique trial_runner_id for it. + global_config = self.global_config.copy() + global_config["trial_runner_id"] = trial_runner_id + env = self._config_loader.load_environment( + self.root_env_config, TunableGroups(), global_config, service=self._parent_service) + self.trial_runners[trial_runner_id] = TrialRunner(trial_runner_id, env) + _LOG.info("Init %d trial runners for environments: %s", + self.trial_runners, list(trial_runner.environment for trial_runner in self.trial_runners)) + + # NOTE: Init tunable values *after* the Environment(s), but *before* the Optimizer + # TODO: should we assign the same or different tunables for all TrialRunner Environments? self.tunables = self._init_tunable_values( + self.trial_runners[0].environment, args.random_init or config.get("random_init", False), config.get("random_seed") if args.random_seed is None else args.random_seed, config.get("tunable_values", []) + (args.tunable_values or []) @@ -206,6 +222,11 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, default=1, help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + parser.add_argument( + '--trial_runners', '--trial-runners', required=False, type=int, default=1, + help='Number of trial runners to run in parallel. ' + + 'Individual TrialRunners can be identified in configs with $trial_runner_id.') + parser.add_argument( '--storage', required=False, help='Path to the storage configuration file.' + @@ -307,13 +328,13 @@ def _load_config(self, global_config["config_path"] = config_path return global_config - def _init_tunable_values(self, random_init: bool, seed: Optional[int], + def _init_tunable_values(self, env: Environment, random_init: bool, seed: Optional[int], args_tunables: Optional[str]) -> TunableGroups: """ Initialize the tunables and load key/value pairs of the tunable values from given JSON files, if specified. """ - tunables = self.environment.tunable_params + tunables = env.tunable_params _LOG.debug("Init tunables: default = %s", tunables) if random_init: @@ -322,6 +343,8 @@ def _init_tunable_values(self, random_init: bool, seed: Optional[int], config={"start_with_defaults": False, "seed": seed}).suggest() _LOG.debug("Init tunables: random = %s", tunables) + # TODO: should we assign the same or different tunables for all TrialRunner Environments? + if args_tunables is not None: for data_file in args_tunables: values = self._config_loader.load_config(data_file, ConfigSchema.TUNABLE_VALUES) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index 18bf779180..1032d61896 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -14,13 +14,14 @@ import json import logging from datetime import datetime -from typing import Optional, Tuple, Dict, Any +from typing import Any, Dict, List, Optional, Tuple from mlos_bench.launcher import Launcher from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.environments.base_environment import Environment from mlos_bench.storage.base_storage import Storage from mlos_bench.environments.status import Status +from mlos_bench.trial_runner import TrialRunner from mlos_bench.tunables.tunable_groups import TunableGroups _LOG = logging.getLogger(__name__) @@ -31,7 +32,7 @@ def _main() -> Tuple[Optional[float], Optional[TunableGroups]]: launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool") result = _optimization_loop( - env=launcher.environment, + trial_runners=launcher.trial_runners, opt=launcher.optimizer, storage=launcher.storage, root_env_config=launcher.root_env_config, @@ -45,7 +46,7 @@ def _main() -> Tuple[Optional[float], Optional[TunableGroups]]: def _optimization_loop(*, - env: Environment, + trial_runners: List[TrialRunner], opt: Optimizer, storage: Storage, root_env_config: str, @@ -58,10 +59,10 @@ def _optimization_loop(*, Parameters ---------- - env : Environment - benchmarking environment to run the optimization on. + trial_runners : List[TrialRunner] + TrialRunner (benchmarking Environment) to run the optimization Trials on. opt : Optimizer - An interface to mlos_core optimizers. + An interface to mlos_core or other optimizers. storage : Storage A storage system to persist the experiment data. root_env_config : str @@ -80,8 +81,9 @@ def _optimization_loop(*, # experiment configuration is compatible with the previous runs. # If the `merge` config parameter is present, merge in the data # from other experiments and check for compatibility. - with env as env_context, \ - opt as opt_context, \ + env = trial_runners[0].environment +# HERE: continue replacing env context with trial_runners ... + with opt as opt_context, \ storage.experiment( experiment_id=global_config["experiment_id"].strip(), trial_id=int(global_config["trial_id"]), diff --git a/mlos_bench/mlos_bench/trial_runner.py b/mlos_bench/mlos_bench/trial_runner.py new file mode 100644 index 0000000000..f21d604dac --- /dev/null +++ b/mlos_bench/mlos_bench/trial_runner.py @@ -0,0 +1,117 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Simple class to run an individual Trial on a given Environment. +""" + +from typing import Any, Dict, Literal, Optional, Tuple + +from datetime import datetime +import logging + +from mlos_bench.environments.base_environment import Environment +from mlos_bench.environments.status import Status +from mlos_bench.storage.base_storage import Storage +from mlos_bench.event_loop_context import EventLoopContext + + +_LOG = logging.getLogger(__name__) + + +class TrialRunner: + """ + Simple class to help run an individual Trial on an environment. + + TrialRunner manages the lifecycle of a single trial, including setup, run, teardown, + and async status polling via EventLoopContext background threads. + + Multiple TrialRunners can be used in a multi-processing pool to run multiple trials + in parallel. + """ + + def __init__(self, trial_runner_id: int, env: Environment) -> None: + self._trial_runner_id = trial_runner_id + self._env = env + assert self._env.parameters["trial_runner_id"] == self._trial_runner_id + self._in_context = False + self._event_loop_context = EventLoopContext() + + @property + def trial_runner_id(self) -> int: + """ + Get the TrialRunner's id. + """ + return self._trial_runner_id + + @property + def environment(self) -> Environment: + """ + Get the Environment. + """ + return self._env + + # TODO: improve context mangement support + + def __enter__(self) -> "TrialRunner": + assert not self._in_context + # TODO: Improve logging. + self._event_loop_context.enter() + self._env.__enter__() + self._in_context = True + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> Literal[False]: + assert self._in_context + # TODO: Improve logging. + self._env.__exit__(exc_type, exc_value, traceback) + self._event_loop_context.exit() + self._in_context = False + return False # Do not suppress exceptions + + def run(self, + trial: Storage.Trial, + global_config: Optional[Dict[str, Any]] = None) -> Tuple[Status, Optional[Dict[str, float]]]: + """ + Run a single trial on this TrialRunner's Environment. + + Parameters + ---------- + trial : Storage.Trial + A Storage class based Trial used to persist the experiment data. + global_config : dict + Global configuration parameters. + + Returns + ------- + (trial_status, trial_score) : (Status, Optional[Dict[str, float]]) + Status and results of the trial. + """ + assert self._in_context + _LOG.info("Trial: %s", trial) + + if not self._env.setup(trial.tunables, trial.config(global_config)): + _LOG.warning("Setup failed: %s :: %s", self._env, trial.tunables) + # FIXME: Use the actual timestamp from the environment. + trial.update(Status.FAILED, datetime.utcnow()) + return (Status.FAILED, None) + + # TODO: start background status polling of the environments in the event loop. + + (status, timestamp, results) = self._env.run() # Block and wait for the final result. + _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) + + # In async mode (TODO), poll the environment for status and telemetry + # and update the storage with the intermediate results. + (_status, _timestamp, telemetry) = self._env.status() + + # Use the status and timestamp from `.run()` as it is the final status of the experiment. + # TODO: Use the `.status()` output in async mode. + trial.update_telemetry(status, timestamp, telemetry) + + trial.update(status, timestamp, results) + # Filter out non-numeric scores from the optimizer. + scores = results if not isinstance(results, dict) \ + else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} + return (status, scores) From e55f33eba9408c797f0e7769cedf7cb896bc24f0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 6 Mar 2024 22:00:59 +0000 Subject: [PATCH 037/121] wip: integrating trial runner to merged branch --- .../mlos_bench/schedulers/base_scheduler.py | 51 ++++++++++++------- .../{ => schedulers}/trial_runner.py | 8 +++ mlos_bench/mlos_bench/storage/base_storage.py | 2 +- 3 files changed, 41 insertions(+), 20 deletions(-) rename mlos_bench/mlos_bench/{ => schedulers}/trial_runner.py (94%) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 0f32899e47..a87d3eed26 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -12,14 +12,14 @@ from abc import ABCMeta, abstractmethod from types import TracebackType -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import Any, Dict, List, Iterable, Optional, Tuple, Type from typing_extensions import Literal from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.trial_runner import TrialRunner +from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.util import merge_parameters _LOG = logging.getLogger(__name__) @@ -59,6 +59,7 @@ def __init__(self, *, self._optimizer = optimizer self._storage = storage self._root_env_config = root_env_config + self._current_trial_runner_idx = 0 @property def experiment(self) -> Optional[Storage.Experiment]: @@ -66,15 +67,20 @@ def experiment(self) -> Optional[Storage.Experiment]: return self._experiment @property - def environment(self) -> Environment: - """Gets the Environment.""" - return self._trial_runners[0] + def root_environment(self) -> Environment: + """Gets the root Environment from the first TrialRunner.""" + return self._trial_runners[0].environment @property def trial_runners(self) -> List[TrialRunner]: """Gets the list of Trial Runners.""" return self._trial_runners + @property + def environments(self) -> Iterable[Environment]: + """Gets the Environment from the TrialRunners.""" + return (trial_runner.environment for trial_runner in self._trial_runners) + @property def optimizer(self) -> Optimizer: """Gets the Optimizer.""" @@ -100,8 +106,8 @@ def __enter__(self) -> 'Scheduler': experiment_id=self._experiment_id, trial_id=self._trial_id, root_env_config=self._root_env_config, - description=self._trial_runners[0].environment.name, - tunables=self._trial_runners[0].environment.tunable_params, + description=self.root_environment.name, + tunables=self.root_environment.tunable_params, opt_target=self._optimizer.target, opt_direction=self._optimizer.direction, ).__enter__() @@ -132,43 +138,44 @@ def start(self) -> None: """ assert self.experiment is not None _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", - self._experiment, self._environment, self.optimizer) + self._experiment, self.root_environment, self.optimizer) if _LOG.isEnabledFor(logging.INFO): - _LOG.info("Root Environment:\n%s", self.environment.pprint()) + _LOG.info("Root Environment:\n%s", self.root_environment.pprint()) if self._config_id > 0: - tunables = self.load_config(self._config_id) + tunables = self.load_tunable_config(self._config_id) self.schedule_trial(tunables) def teardown(self) -> None: """ - Tear down the environment. + Tear down the Environment(s). Call it after the completion of the `.start()` in the scheduler context. """ assert self.experiment is not None if self._do_teardown: - self.environment.teardown() + for trial_runner in self.trial_runners: + trial_runner.teardown() def get_best_observation(self) -> Tuple[Optional[float], Optional[TunableGroups]]: """ Get the best observation from the optimizer. """ (best_score, best_config) = self.optimizer.get_best_observation() - _LOG.info("Env: %s best score: %s", self.environment, best_score) + _LOG.info("Env: %s best score: %s", self.root_environment, best_score) return (best_score, best_config) - def load_config(self, config_id: int) -> TunableGroups: + def load_tunable_config(self, config_id: int) -> TunableGroups: """ Load the existing tunable configuration from the storage. """ assert self.experiment is not None tunable_values = self.experiment.load_tunable_config(config_id) - tunables = self.environment.tunable_params.assign(tunable_values) + for environment in self.environments: + tunables = environment.tunable_params.assign(tunable_values) _LOG.info("Load config from storage: %d", config_id) if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Config %d ::\n%s", - config_id, json.dumps(tunable_values, indent=2)) - return tunables + _LOG.debug("Config %d ::\n%s", config_id, json.dumps(tunable_values, indent=2)) + return tunables.copy() def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = False) -> int: """ @@ -190,6 +197,9 @@ def schedule_trial(self, tunables: TunableGroups) -> None: """ Add a configuration to the queue of trials. """ + # TODO: Alternative scheduling policies may prefer to expand repeats over + # time as well as space, or adjust the number of repeats (budget) of a given + # trial based on whether initial results are promising. for repeat_i in range(1, self._trial_config_repeat_count + 1): self._add_trial_to_queue(tunables, config={ # Add some additional metadata to track for the trial such as the @@ -206,13 +216,16 @@ def schedule_trial(self, tunables: TunableGroups) -> None: "opt_direction": self.optimizer.direction, "repeat_i": repeat_i, "is_defaults": tunables.is_defaults, + "trial_runner_id": self._trial_runners[self._current_trial_runner_idx].trial_runner_id, }) + # Rotate which TrialRunner the Trial is assigned to. + self._current_trial_runner_idx = (self._current_trial_runner_idx + 1) % len(self._trial_runners) def _add_trial_to_queue(self, tunables: TunableGroups, ts_start: Optional[datetime] = None, config: Optional[Dict[str, Any]] = None) -> None: """ - Add a configuration to the queue of trials. + Add a configuration to the queue of trials in the Storage backend. A wrapper for the `Experiment.new_trial` method. """ assert self.experiment is not None diff --git a/mlos_bench/mlos_bench/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py similarity index 94% rename from mlos_bench/mlos_bench/trial_runner.py rename to mlos_bench/mlos_bench/schedulers/trial_runner.py index f21d604dac..bffe109b2a 100644 --- a/mlos_bench/mlos_bench/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -115,3 +115,11 @@ def run(self, scores = results if not isinstance(results, dict) \ else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} return (status, scores) + + def teardown(self) -> None: + """ + Tear down the Environment. + Call it after the completion of one (or more) `.run()` in the TrialRunner context. + """ + assert self._in_context + self._env.teardown() diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 8f28b651fd..91670cbe90 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -343,7 +343,7 @@ def __init__(self, *, self._config = config or {} def __repr__(self) -> str: - return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}" + return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}:{self.trial_runner_id}" @property def trial_id(self) -> int: From 7df0770e3d023b8529fbc43cade9d0955f60ab0d Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Thu, 7 Mar 2024 16:58:30 -0800 Subject: [PATCH 038/121] Roll back forceful assignment of PATH when invoking a local process --- mlos_bench/mlos_bench/services/local/local_exec.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/services/local/local_exec.py b/mlos_bench/mlos_bench/services/local/local_exec.py index f09ff67e05..2ca567dfd4 100644 --- a/mlos_bench/mlos_bench/services/local/local_exec.py +++ b/mlos_bench/mlos_bench/services/local/local_exec.py @@ -195,10 +195,8 @@ def _local_exec_script(self, script_line: str, cmd = [token for subcmd in subcmds for token in subcmd] env: Dict[str, str] = {} - # Need to include at least some basic environment variables to run the script. - env["PATH"] = environ["PATH"] if env_params: - env.update({key: str(val) for (key, val) in env_params.items()}) + env = {key: str(val) for (key, val) in env_params.items()} if sys.platform == 'win32': # A hack to run Python on Windows with env variables set: From da55c5ece2245c97bb5ec50576ebe713856669af Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 14:58:36 -0800 Subject: [PATCH 039/121] instantiate Scheduler from JSON config in the launcher (no JSON schema yet) --- mlos_bench/mlos_bench/launcher.py | 55 ++++++++++++++----- mlos_bench/mlos_bench/run.py | 21 +------ .../mlos_bench/schedulers/base_scheduler.py | 4 ++ .../mlos_bench/services/config_persistence.py | 30 ++++++++++ mlos_bench/mlos_bench/util.py | 5 +- 5 files changed, 80 insertions(+), 35 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index e851581ec8..59061f8681 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -34,6 +34,8 @@ from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.schedulers.base_scheduler import Scheduler + from mlos_bench.services.types.config_loader_type import SupportsConfigLoading @@ -76,12 +78,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st else: config = {} - self.trial_config_repeat_count: int = ( - args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) - ) - if self.trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {self.trial_config_repeat_count}") - log_level = args.log_level or config.get("log_level", _LOG_LEVEL) try: log_level = int(log_level) @@ -105,6 +101,9 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest, {key: val for (key, val) in config.items() if key not in vars(args)}, ) + self.global_config["teardown"] = bool( + args.teardown if args.teardown is not None else config.get("teardown", True) + ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: @@ -145,7 +144,8 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage(args.storage or config.get("storage")) _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = bool(args.teardown) if args.teardown is not None else bool(config.get("teardown", True)) + self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) + _LOG.info("Init scheduler: %s", self.scheduler) @property def config_loader(self) -> ConfigPersistenceService: @@ -203,8 +203,9 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T ' a single trial with default (or specified in --tunable_values).') parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, default=1, - help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + '--scheduler', required=False, + help='Path to the scheduler configuration file. By default, use' + + ' a single-threaded synchronous scheduler.') parser.add_argument( '--storage', required=False, @@ -337,17 +338,13 @@ def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: in the --optimizer command line option. If config file not specified, create a one-shot optimizer to run a single benchmark trial. """ - if 'max_iterations' in self.global_config: - self.global_config['max_iterations'] *= self.trial_config_repeat_count if args_optimizer is None: # global_config may contain additional properties, so we need to - # strip those out before instantiating the basic oneshot optimizer. + # strip those out before instantiating the basic one-shot optimizer. config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS} return OneShotOptimizer( self.tunables, config=config, service=self._parent_service) class_config = self._config_loader.load_config(args_optimizer, ConfigSchema.OPTIMIZER) - if 'max_iterations' in class_config: - class_config['max_iterations'] *= self.trial_config_repeat_count assert isinstance(class_config, Dict) optimizer = self._config_loader.build_optimizer(tunables=self.tunables, service=self._parent_service, @@ -376,3 +373,33 @@ def _load_storage(self, args_storage: Optional[str]) -> Storage: config=class_config, global_config=self.global_config) return storage + + def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: + """ + Instantiate the Scheduler object from JSON file provided in the --scheduler + command line parameter. + Create a simple synchronous single-threaded scheduler if omitted. + """ + if args_scheduler is None: + # pylint: disable=import-outside-toplevel + from mlos_bench.schedulers.sync_scheduler import SyncScheduler + return SyncScheduler( + # All config values can be overridden from global config + config={ + "experiment_id": "UNDEFINED - override from global config", + "trial_id": 0, + "config_id": -1, + "trial_config_repeat_count": 1, + "teardown": False, + }, + global_config=self.global_config, + environment=self.environment, + optimizer=self.optimizer, + storage=self.storage, + root_env_config=self.root_env_config, + ) + class_config = self._config_loader.load_config(args_scheduler, ConfigSchema.STORAGE) + assert isinstance(class_config, Dict) + return self._config_loader.build_scheduler(service=self._parent_service, + config=class_config, + global_config=self.global_config) diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index d58283c0a3..a80b2a93d1 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -16,7 +16,6 @@ from mlos_bench.launcher import Launcher from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.schedulers.sync_scheduler import SyncScheduler _LOG = logging.getLogger(__name__) @@ -25,27 +24,11 @@ def _main(argv: Optional[List[str]] = None) -> Tuple[Optional[float], Optional[T launcher = Launcher("mlos_bench", "Systems autotuning and benchmarking tool", argv=argv) - # TODO: Instantiate Scheduler from JSON config - scheduler = SyncScheduler( - config={ - "experiment_id": "UNDEFINED - override from global config", - "trial_id": 0, # Override from global config - "config_id": -1, # Override from global config - "trial_config_repeat_count": launcher.trial_config_repeat_count, - "teardown": launcher.teardown, - }, - global_config=launcher.global_config, - environment=launcher.environment, - optimizer=launcher.optimizer, - storage=launcher.storage, - root_env_config=launcher.root_env_config, - ) - - with scheduler as scheduler_context: + with launcher.scheduler as scheduler_context: scheduler_context.start() scheduler_context.teardown() - (score, _config) = result = scheduler.get_best_observation() + (score, _config) = result = launcher.scheduler.get_best_observation() # NOTE: This log line is used in test_launch_main_app_* unit tests: _LOG.info("Final score: %s", score) return result diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index c92ae22896..fefbac85f1 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -49,7 +49,11 @@ def __init__(self, *, self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) self._config_id = int(config.get("config_id", -1)) + self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) + if self._trial_config_repeat_count <= 0: + raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") + self._do_teardown = bool(config.get("teardown", True)) self.experiment: Optional[Storage.Experiment] = None diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index 6c2dd19f7c..b75487ab38 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -22,6 +22,7 @@ from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer +from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.services.base_service import Service from mlos_bench.services.types.config_loader_type import SupportsConfigLoading from mlos_bench.tunables.tunable import TunableValue @@ -303,6 +304,35 @@ def build_storage(self, *, _LOG.info("Created: Storage %s", inst) return inst + def build_scheduler(self, *, + service: Service, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None) -> "Scheduler": + """ + Instantiation of mlos_bench Scheduler. + + Parameters + ---------- + service: Service + An optional service object (e.g., providing methods to load config files, etc.) + config : dict + Configuration of the class to instantiate, as loaded from JSON. + global_config : dict + Global configuration parameters (optional). + + Returns + ------- + inst : Scheduler + A new instance of the Scheduler. + """ + (class_name, class_config) = self.prepare_class_load(config, global_config) + inst = instantiate_from_config(Scheduler, class_name, # type: ignore[type-abstract] + config=class_config, + global_config=global_config, + service=service) + _LOG.info("Created: Scheduler %s", inst) + return inst + def build_environment(self, # pylint: disable=too-many-arguments config: Dict[str, Any], tunables: TunableGroups, diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 1c3ffb55f8..478722c889 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -20,12 +20,13 @@ if TYPE_CHECKING: from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer + from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.services.base_service import Service from mlos_bench.storage.base_storage import Storage # BaseTypeVar is a generic with a constraint of the three base classes. -BaseTypeVar = TypeVar("BaseTypeVar", "Environment", "Optimizer", "Service", "Storage") -BaseTypes = Union["Environment", "Optimizer", "Service", "Storage"] +BaseTypeVar = TypeVar("BaseTypeVar", "Environment", "Optimizer", "Scheduler", "Service", "Storage") +BaseTypes = Union["Environment", "Optimizer", "Scheduler", "Service", "Storage"] def preprocess_dynamic_configs(*, dest: dict, source: Optional[dict] = None) -> dict: From f6eb5ef11eff46c4654fb3913f3e039005915aea Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 15:29:28 -0800 Subject: [PATCH 040/121] fix unit tests --- .../tests/config/cli/test_load_cli_config_examples.py | 3 +-- .../mlos_bench/tests/launcher_parse_args_test.py | 11 ++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py index 0e9d3eeb2c..0012eb783f 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py @@ -137,8 +137,7 @@ def test_load_cli_config_examples_via_launcher(config_loader_service: ConfigPers # TODO: Check that the log_file handler is set correctly. - expected_teardown = config.get('teardown', True) - assert launcher.teardown == expected_teardown + assert launcher.global_config["teardown"] == config.get('teardown', True) # Note: Testing of "globals" processing handled in launcher_parse_args_test.py diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 13b44790cf..0226fa3bae 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -95,7 +95,7 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown + assert launcher.global_config["teardown"] is True # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) assert check_class_name(launcher.environment, env_config['class']) @@ -103,7 +103,6 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.trial_config_repeat_count == 1 # default when left unspecified assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer @@ -150,7 +149,7 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert not launcher.teardown + assert launcher.global_config["teardown"] is False config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] @@ -165,13 +164,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: opt_config_file = config['optimizer'] opt_config = launcher.config_loader.load_config(opt_config_file, ConfigSchema.OPTIMIZER) globals_file_config = launcher.config_loader.load_config(globals_file, ConfigSchema.GLOBALS) - assert launcher.trial_config_repeat_count == 3 + assert launcher.global_config["trial_config_repeat_count"] == 3 # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. orig_max_iters = globals_file_config.get('max_iterations', opt_config.get('config', {}).get('max_iterations', 100)) - assert launcher.optimizer.max_iterations \ - == launcher.trial_config_repeat_count * orig_max_iters \ - == launcher.global_config['max_iterations'] + assert orig_max_iters == launcher.optimizer.max_iterations # Check that the optimizer got initialized with random values instead of the defaults. # Note: the environment doesn't get updated until suggest() is called to From 97438e7d0d40caca063de5262dd7012464f942c9 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 15:33:03 -0800 Subject: [PATCH 041/121] add test for Launcher scheduler load in test_load_cli_config_examples_via_launcher --- .../tests/config/cli/test_load_cli_config_examples.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py index 0012eb783f..c7f621363a 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py @@ -20,6 +20,7 @@ from mlos_bench.environments import Environment from mlos_bench.optimizers import Optimizer from mlos_bench.storage import Storage +from mlos_bench.schedulers import Scheduler from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.launcher import Launcher from mlos_bench.util import path_join @@ -158,4 +159,9 @@ def test_load_cli_config_examples_via_launcher(config_loader_service: ConfigPers storage_config = launcher.config_loader.load_config(config["storage"], ConfigSchema.STORAGE) assert check_class_name(launcher.storage, storage_config["class"]) + assert isinstance(launcher.scheduler, Scheduler) + if "scheduler" in config: + scheduler_config = launcher.config_loader.load_config(config["scheduler"], ConfigSchema.STORAGE) + assert check_class_name(launcher.scheduler, scheduler_config["class"]) + # TODO: Check that the launcher assigns the tunables values as expected. From 034aef9b198e06e58abf90a8f7a24433507b0b3d Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:12:54 -0800 Subject: [PATCH 042/121] fix the way launcher handles trial_config_repeat_count --- mlos_bench/mlos_bench/launcher.py | 16 +++++++++++++--- .../mlos_bench/tests/launcher_parse_args_test.py | 5 ++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 59061f8681..d8eec80077 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -101,9 +101,15 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest, {key: val for (key, val) in config.items() if key not in vars(args)}, ) - self.global_config["teardown"] = bool( - args.teardown if args.teardown is not None else config.get("teardown", True) - ) + + self.global_config.setdefault("teardown", bool( + args.teardown if args.teardown is not None + else config.get("teardown", True) + )) + self.global_config.setdefault("trial_config_repeat_count", int( + args.trial_config_repeat_count if args.trial_config_repeat_count is not None + else config.get("trial_config_repeat_count", 1) + )) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: @@ -237,6 +243,10 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T dest='teardown', action='store_false', help='Disable teardown of the environment after the benchmark.') + parser.add_argument( + '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, default=1, + help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + parser.add_argument( '--experiment_id', '--experiment-id', required=False, default=None, help=""" diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 0226fa3bae..911e105889 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -19,6 +19,7 @@ from mlos_bench.os_environ import environ from mlos_bench.config.schemas import ConfigSchema from mlos_bench.util import path_join +from mlos_bench.schedulers import SyncScheduler from mlos_bench.services.types import ( SupportsAuth, SupportsConfigLoading, @@ -164,7 +165,6 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: opt_config_file = config['optimizer'] opt_config = launcher.config_loader.load_config(opt_config_file, ConfigSchema.OPTIMIZER) globals_file_config = launcher.config_loader.load_config(globals_file, ConfigSchema.GLOBALS) - assert launcher.global_config["trial_config_repeat_count"] == 3 # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. orig_max_iters = globals_file_config.get('max_iterations', opt_config.get('config', {}).get('max_iterations', 100)) @@ -179,6 +179,9 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # values through the stack. # See Also: #495 + assert isinstance(launcher.scheduler, SyncScheduler) + assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable:protected-access + # Check that the value from the file is overridden by the CLI arg. assert config['random_seed'] == 42 # TODO: This isn't actually respected yet because the `--random-init` only From 629236f4a534fde46007147c4793402c2cb26b4c Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:24:03 -0800 Subject: [PATCH 043/121] minor type fixes --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index fefbac85f1..f09576763d 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -50,7 +50,7 @@ def __init__(self, *, self._trial_id = int(config["trial_id"]) self._config_id = int(config.get("config_id", -1)) - self._trial_config_repeat_count: int = config.get("trial_config_repeat_count", 1) + self._trial_config_repeat_count: int = int(config.get("trial_config_repeat_count", 1)) if self._trial_config_repeat_count <= 0: raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") From 049fdb677ed36e4cf2c74383988f9b479cfd5d6e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:26:07 -0800 Subject: [PATCH 044/121] add required_keys for base Scheduler --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index f09576763d..64ecb2c447 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -44,7 +44,8 @@ def __init__(self, *, the Environment and Optimizer are provided by the Launcher. """ self.global_config = global_config - config = merge_parameters(dest=config.copy(), source=global_config) + config = merge_parameters(dest=config.copy(), source=global_config, + required_keys=["experiment_id", "trial_id"]) self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) From 094155c9351a460a3141d40006b0c8de437a73fc Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:27:29 -0800 Subject: [PATCH 045/121] remove unnecessary type annotation --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 64ecb2c447..fa2897395f 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -51,7 +51,7 @@ def __init__(self, *, self._trial_id = int(config["trial_id"]) self._config_id = int(config.get("config_id", -1)) - self._trial_config_repeat_count: int = int(config.get("trial_config_repeat_count", 1)) + self._trial_config_repeat_count = int(config.get("trial_config_repeat_count", 1)) if self._trial_config_repeat_count <= 0: raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") From a6a7283f170845f50f5ad79b277a0b9ec4dee777 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:29:00 -0800 Subject: [PATCH 046/121] typo in pylint exception --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 911e105889..df53c95e60 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -180,7 +180,7 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # See Also: #495 assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable:protected-access + assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable=protected-access # Check that the value from the file is overridden by the CLI arg. assert config['random_seed'] == 42 From 0a94a3721e40fa764e950b04c2d7a5fc25ce708e Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 8 Mar 2024 16:43:18 -0800 Subject: [PATCH 047/121] make all unit tests run --- mlos_bench/mlos_bench/services/config_persistence.py | 4 ++-- mlos_bench/mlos_bench/tests/launcher_run_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index b75487ab38..e18d33dd9a 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -22,7 +22,6 @@ from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer -from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.services.base_service import Service from mlos_bench.services.types.config_loader_type import SupportsConfigLoading from mlos_bench.tunables.tunable import TunableValue @@ -326,7 +325,8 @@ def build_scheduler(self, *, A new instance of the Scheduler. """ (class_name, class_config) = self.prepare_class_load(config, global_config) - inst = instantiate_from_config(Scheduler, class_name, # type: ignore[type-abstract] + from mlos_bench.schedulers.base_scheduler import Scheduler # pylint: disable=import-outside-toplevel + inst = instantiate_from_config(Scheduler, class_name, # type: ignore[type-abstract] config=class_config, global_config=global_config, service=service) diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index 965be6ce5a..3daa413a6f 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -93,7 +93,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic """ _launch_main_app( root_path, local_exec_service, - "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 3", + "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 9", [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + From cf42730d4a0c69f75f22c3b46217628239e0c6e3 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 11:26:41 -0700 Subject: [PATCH 048/121] add a missing import --- mlos_bench/mlos_bench/services/config_persistence.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index e18d33dd9a..ebb0be87c5 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -35,6 +35,7 @@ if TYPE_CHECKING: from mlos_bench.storage.base_storage import Storage + from mlos_bench.schedulers.base_scheduler import Scheduler _LOG = logging.getLogger(__name__) From 6f31a2db4c01fa32b830b891b3c77a3d1b9cf2d7 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 11:44:10 -0700 Subject: [PATCH 049/121] add ConfigSchema.SCHEDULER (not defined yet) --- mlos_bench/mlos_bench/config/schemas/config_schemas.py | 1 + mlos_bench/mlos_bench/launcher.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/config/schemas/config_schemas.py b/mlos_bench/mlos_bench/config/schemas/config_schemas.py index 00f099023e..3508e472e6 100644 --- a/mlos_bench/mlos_bench/config/schemas/config_schemas.py +++ b/mlos_bench/mlos_bench/config/schemas/config_schemas.py @@ -106,6 +106,7 @@ class ConfigSchema(Enum): GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") + SCHEDULER = None # TODO: add scheduler schema SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index d8eec80077..48b54af580 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -408,7 +408,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: storage=self.storage, root_env_config=self.root_env_config, ) - class_config = self._config_loader.load_config(args_scheduler, ConfigSchema.STORAGE) + class_config = self._config_loader.load_config(args_scheduler, ConfigSchema.SCHEDULER) assert isinstance(class_config, Dict) return self._config_loader.build_scheduler(service=self._parent_service, config=class_config, From e6ceb5cd4c543fb3e6d11d7878d28a740283bc4f Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 12:05:29 -0700 Subject: [PATCH 050/121] fix the teardown property propagation issue --- mlos_bench/mlos_bench/launcher.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 48b54af580..a0b72a3ee7 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -101,24 +101,19 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest, {key: val for (key, val) in config.items() if key not in vars(args)}, ) - - self.global_config.setdefault("teardown", bool( - args.teardown if args.teardown is not None - else config.get("teardown", True) - )) - self.global_config.setdefault("trial_config_repeat_count", int( - args.trial_config_repeat_count if args.trial_config_repeat_count is not None - else config.get("trial_config_repeat_count", 1) - )) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: self.global_config['experiment_id'] = args.experiment_id - self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) - assert isinstance(self.global_config, dict) # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. self.global_config.setdefault('trial_id', 1) + # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line + self.global_config.setdefault("trial_config_repeat_count", int( + args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) + )) + self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) + assert isinstance(self.global_config, dict) # --service cli args should override the config file values. service_files: List[str] = config.get("services", []) + (args.service or []) @@ -153,6 +148,10 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) + self.teardown = bool( + args.teardown if args.teardown is not None else config.get("teardown", True) + ) + @property def config_loader(self) -> ConfigPersistenceService: """ @@ -390,6 +389,9 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: command line parameter. Create a simple synchronous single-threaded scheduler if omitted. """ + # Set `teardown` for scheduler only to prevent conflicts with other configs. + global_config = self.global_config.copy() + global_config.setdefault("teardown", self.teardown) if args_scheduler is None: # pylint: disable=import-outside-toplevel from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -400,7 +402,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: "trial_id": 0, "config_id": -1, "trial_config_repeat_count": 1, - "teardown": False, + "teardown": self.teardown, }, global_config=self.global_config, environment=self.environment, From 3121fb0fcca27df5d5d8b32f88353135d42aa116 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 12:12:28 -0700 Subject: [PATCH 051/121] proper ordering of launcher properties initialization --- mlos_bench/mlos_bench/launcher.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index a0b72a3ee7..0053d4f356 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -95,6 +95,9 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + self.teardown = bool( + args.teardown if args.teardown is not None else config.get("teardown", True) + ) self.global_config = self._load_config( config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), @@ -148,10 +151,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) - self.teardown = bool( - args.teardown if args.teardown is not None else config.get("teardown", True) - ) - @property def config_loader(self) -> ConfigPersistenceService: """ From 59515447ca5177916d8efb9b8aabf7596bd43c91 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 12:55:10 -0700 Subject: [PATCH 052/121] fix last unit tests --- .../tests/config/cli/test_load_cli_config_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py index c7f621363a..6b0b6b12ea 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py @@ -138,7 +138,7 @@ def test_load_cli_config_examples_via_launcher(config_loader_service: ConfigPers # TODO: Check that the log_file handler is set correctly. - assert launcher.global_config["teardown"] == config.get('teardown', True) + assert launcher.teardown == config.get('teardown', True) # Note: Testing of "globals" processing handled in launcher_parse_args_test.py From e3f515c9406606670c271b0e15614a3d29735519 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 12:59:36 -0700 Subject: [PATCH 053/121] more unit test fixes --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index df53c95e60..7ec9b6dad8 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -96,7 +96,7 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.global_config["teardown"] is True + assert launcher.teardown is True # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) assert check_class_name(launcher.environment, env_config['class']) @@ -150,7 +150,7 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.global_config["teardown"] is False + assert launcher.teardown is False config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] From 86f155ebef9d1bbeb1f6bbed3143641cc4a28fd0 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 13:20:18 -0700 Subject: [PATCH 054/121] add Scheduler JSON config schema --- .../config/schemas/config_schemas.py | 2 +- .../schemas/schedulers/scheduler-schema.json | 100 ++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json diff --git a/mlos_bench/mlos_bench/config/schemas/config_schemas.py b/mlos_bench/mlos_bench/config/schemas/config_schemas.py index 3508e472e6..9c4a066be5 100644 --- a/mlos_bench/mlos_bench/config/schemas/config_schemas.py +++ b/mlos_bench/mlos_bench/config/schemas/config_schemas.py @@ -106,7 +106,7 @@ class ConfigSchema(Enum): GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") - SCHEDULER = None # TODO: add scheduler schema + SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json") SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json new file mode 100644 index 0000000000..af2c36c457 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -0,0 +1,100 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", + "title": "mlos_bench Scheduler config", + + "$defs": { + "comment": { + "$comment": "This section contains reusable partial schema bits (or just split out for readability)" + }, + + "config_base_scheduler": { + "$comment": "config properties common to all Scheduler types.", + "type": "object", + "properties": { + "experiment_id": { + "$ref": "./common-defs-subschemas.json#/$defs/experiment_id" + }, + "trial_id": { + "$ref": "./common-defs-subschemas.json#/$defs/trial_id" + }, + "config_id": { + "$ref": "./common-defs-subschemas.json#/$defs/config_id" + }, + "teardown": { + "description": "Whether to teardown the experiment after running it.", + "type": "boolean" + }, + "trial_config_repeat_count": { + "description": "Number of times to repeat a config.", + "type": "integer", + "minimum": 1, + "examples": [3, 5] + } + } + } + }, + + "description": "config for the mlos_bench scheduler", + "$comment": "top level schema document rules", + "type": "object", + "properties": { + "$schema": { + "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", + "type": "string", + "$comment": "This is optional, but if provided, should match the name of this file.", + "pattern": "/schemas/schedulers/scheduler-schema.json$" + }, + + "description": { + "description": "Optional description of the config.", + "type": "string" + }, + + "class": { + "description": "The name of the scheduler class to use.", + "$comment": "required", + "enum": [ + "mlos_bench.schedulers.SyncScheduler", + "mlos_bench.schedulers.sync_scheduler.SyncScheduler" + ] + }, + + "config": { + "description": "The scheduler-specific config.", + "$comment": "Stub for scheduler-specific config appended with condition statements below", + "type": "object", + "minProperties": 1 + } + }, + "required": ["class"], + + "oneOf": [ + { + "$comment": "extensions to the 'config' object properties when synchronous scheduler is being used", + "if": { + "properties": { + "class": { + "enum": [ + "mlos_bench.schedulers.SyncScheduler", + "mlos_bench.schedulers.sync_scheduler.SyncScheduler" + ] + } + }, + "required": ["class"] + }, + "then": { + "properties": { + "config": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/config_base_scheduler" }], + "$comment": "disallow other properties", + "unevaluatedProperties": false + } + } + }, + "else": false + } + ], + "unevaluatedProperties": false +} From 928ceffcf460385e132944344fca5e290d5e450f Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 13:26:36 -0700 Subject: [PATCH 055/121] validate scheduler JSON schema --- .../tests/config/cli/test_load_cli_config_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py index 6b0b6b12ea..578e83157e 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py @@ -161,7 +161,7 @@ def test_load_cli_config_examples_via_launcher(config_loader_service: ConfigPers assert isinstance(launcher.scheduler, Scheduler) if "scheduler" in config: - scheduler_config = launcher.config_loader.load_config(config["scheduler"], ConfigSchema.STORAGE) + scheduler_config = launcher.config_loader.load_config(config["scheduler"], ConfigSchema.SCHEDULER) assert check_class_name(launcher.scheduler, scheduler_config["class"]) # TODO: Check that the launcher assigns the tunables values as expected. From 1511c6eddcd943e907dd1005ca81d2a67fefadd0 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 13:45:20 -0700 Subject: [PATCH 056/121] add an example config for sync scheduler --- .../mlos_bench/config/schedulers/sync_scheduler.jsonc | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc new file mode 100644 index 0000000000..b4e691e9f9 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -0,0 +1,11 @@ +// Mock optimizer to test the benchmarking framework. +{ + "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", + + "class": "mlos_bench.schedulers.SyncScheduler", + + "config": { + "trial_config_repeat_count": 1, + "teardown": false + } +} From 38ab4572165c5799d2c367a8720330640b41b5e1 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 14:08:48 -0700 Subject: [PATCH 057/121] fix the instantiation of scheduler config from JSON file --- .../config/schedulers/sync_scheduler.jsonc | 2 +- mlos_bench/mlos_bench/launcher.py | 18 ++++++++------ .../mlos_bench/schedulers/base_scheduler.py | 15 ++++++++++++ .../mlos_bench/services/config_persistence.py | 24 ++++++++++++++----- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc index b4e691e9f9..c3b6438caa 100644 --- a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -5,7 +5,7 @@ "class": "mlos_bench.schedulers.SyncScheduler", "config": { - "trial_config_repeat_count": 1, + "trial_config_repeat_count": 3, "teardown": false } } diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 0053d4f356..1f320c45ce 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -108,13 +108,12 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: self.global_config['experiment_id'] = args.experiment_id + # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line + if args.trial_config_repeat_count: + self.global_config["trial_config_repeat_count"] = args.trial_config_repeat_count # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. self.global_config.setdefault('trial_id', 1) - # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line - self.global_config.setdefault("trial_config_repeat_count", int( - args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) - )) self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) @@ -411,6 +410,11 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: ) class_config = self._config_loader.load_config(args_scheduler, ConfigSchema.SCHEDULER) assert isinstance(class_config, Dict) - return self._config_loader.build_scheduler(service=self._parent_service, - config=class_config, - global_config=self.global_config) + return self._config_loader.build_scheduler( + config=class_config, + global_config=self.global_config, + environment=self.environment, + optimizer=self.optimizer, + storage=self.storage, + root_env_config=self.root_env_config, + ) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index fa2897395f..9f0f1b360c 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -42,6 +42,21 @@ def __init__(self, *, and the derived classes is called by the persistence service after reading the class JSON configuration. Other objects like the Environment and Optimizer are provided by the Launcher. + + Parameters + ---------- + config : dict + The configuration for the scheduler. + global_config : dict + he global configuration for the experiment. + environment : Environment + The environment to benchmark/optimize. + optimizer : Optimizer + The optimizer to use. + storage : Storage + The storage to use. + root_env_config : str + Path to the root environment configuration. """ self.global_config = global_config config = merge_parameters(dest=config.copy(), source=global_config, diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index ebb0be87c5..4329d8f7e3 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -305,20 +305,29 @@ def build_storage(self, *, return inst def build_scheduler(self, *, - service: Service, config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None) -> "Scheduler": + global_config: Dict[str, Any], + environment: Environment, + optimizer: Optimizer, + storage: "Storage", + root_env_config: str) -> "Scheduler": """ Instantiation of mlos_bench Scheduler. Parameters ---------- - service: Service - An optional service object (e.g., providing methods to load config files, etc.) config : dict Configuration of the class to instantiate, as loaded from JSON. global_config : dict - Global configuration parameters (optional). + Global configuration parameters. + environment : Environment + The environment to benchmark/optimize. + optimizer : Optimizer + The optimizer to use. + storage : Storage + The storage to use. + root_env_config : str + Path to the root environment configuration. Returns ------- @@ -330,7 +339,10 @@ def build_scheduler(self, *, inst = instantiate_from_config(Scheduler, class_name, # type: ignore[type-abstract] config=class_config, global_config=global_config, - service=service) + environment=environment, + optimizer=optimizer, + storage=storage, + root_env_config=root_env_config) _LOG.info("Created: Scheduler %s", inst) return inst From 9323a1c30c8910296c9ece20fc5be6943662849c Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 14:44:28 -0700 Subject: [PATCH 058/121] minor logging improvements in the Scheduler --- .../mlos_bench/schedulers/base_scheduler.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 9f0f1b360c..0cd30c0896 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -78,11 +78,24 @@ def __init__(self, *, self.storage = storage self._root_env_config = root_env_config + _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + + def __repr__(self) -> str: + """ + Produce a human-readable version of the Scheduler (mostly for logging). + + Returns + ------- + string : str + A human-readable version of the Scheduler. + """ + return self.__class__.__name__ + def __enter__(self) -> 'Scheduler': """ Enter the scheduler's context. """ - _LOG.debug("Optimizer START :: %s", self) + _LOG.debug("Scheduler START :: %s", self) assert self.experiment is None self.environment.__enter__() self.optimizer.__enter__() From 6b35444a34ebb6d49ee76dedcb18f3a251d72405 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 15:03:14 -0700 Subject: [PATCH 059/121] fix the trial_config_repeat_count default values for CLI --- mlos_bench/mlos_bench/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 1f320c45ce..942ea5a4b2 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -241,7 +241,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Disable teardown of the environment after the benchmark.') parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, default=1, + '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') parser.add_argument( From b242f237df99fe5fac37caa9b9d0634de11e3db8 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:13:10 -0700 Subject: [PATCH 060/121] roll back some unnecessary test fixes --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 8 +++++--- mlos_bench/mlos_bench/tests/launcher_run_test.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 7ec9b6dad8..b881e336e1 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -96,7 +96,7 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown is True + assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) assert check_class_name(launcher.environment, env_config['class']) @@ -150,7 +150,7 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown is False + assert not launcher.teardown config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] @@ -168,7 +168,9 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. orig_max_iters = globals_file_config.get('max_iterations', opt_config.get('config', {}).get('max_iterations', 100)) - assert orig_max_iters == launcher.optimizer.max_iterations + assert launcher.optimizer.max_iterations \ + == orig_max_iters \ + == launcher.global_config['max_iterations'] # Check that the optimizer got initialized with random values instead of the defaults. # Note: the environment doesn't get updated until suggest() is called to diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index 3daa413a6f..965be6ce5a 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -93,7 +93,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic """ _launch_main_app( root_path, local_exec_service, - "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 9", + "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 3", [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + From 208c393cd0448de9e680ec9c2fff100dcef349ce Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:25:06 -0700 Subject: [PATCH 061/121] temporarily rollback the --max_iterations 9 setting in unit test --- mlos_bench/mlos_bench/tests/launcher_run_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_run_test.py b/mlos_bench/mlos_bench/tests/launcher_run_test.py index 965be6ce5a..b088068a6d 100644 --- a/mlos_bench/mlos_bench/tests/launcher_run_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_run_test.py @@ -93,7 +93,8 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic """ _launch_main_app( root_path, local_exec_service, - "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 3", + # TODO: Reset --max_iterations to 3 after fixing the optimizer + "--config mlos_bench/mlos_bench/tests/config/cli/mock-opt.jsonc --trial_config_repeat_count 3 --max_iterations 9", [ # Iteration 1: Expect first value to be the baseline f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " + From 303c25f67d81384516bc36adf177a77664697bcd Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:30:40 -0700 Subject: [PATCH 062/121] roll back another small fix to minimize the diff --- .../tests/config/cli/test_load_cli_config_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py index 578e83157e..6fb341ff44 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py @@ -138,7 +138,8 @@ def test_load_cli_config_examples_via_launcher(config_loader_service: ConfigPers # TODO: Check that the log_file handler is set correctly. - assert launcher.teardown == config.get('teardown', True) + expected_teardown = config.get('teardown', True) + assert launcher.teardown == expected_teardown # Note: Testing of "globals" processing handled in launcher_parse_args_test.py From 16ea2cbc70cf79c700e9ef6ae903cc0aa0563740 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:32:46 -0700 Subject: [PATCH 063/121] undo a fix to LocalExecService that is in a separate PR --- mlos_bench/mlos_bench/services/local/local_exec.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/services/local/local_exec.py b/mlos_bench/mlos_bench/services/local/local_exec.py index 2ca567dfd4..f09ff67e05 100644 --- a/mlos_bench/mlos_bench/services/local/local_exec.py +++ b/mlos_bench/mlos_bench/services/local/local_exec.py @@ -195,8 +195,10 @@ def _local_exec_script(self, script_line: str, cmd = [token for subcmd in subcmds for token in subcmd] env: Dict[str, str] = {} + # Need to include at least some basic environment variables to run the script. + env["PATH"] = environ["PATH"] if env_params: - env = {key: str(val) for (key, val) in env_params.items()} + env.update({key: str(val) for (key, val) in env_params.items()}) if sys.platform == 'win32': # A hack to run Python on Windows with env variables set: From 5ad4b740d7713df9265ce0bb14809bc37994ff75 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:36:16 -0700 Subject: [PATCH 064/121] keep minimizing the diff --- mlos_bench/mlos_bench/launcher.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 942ea5a4b2..2898b78864 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -205,6 +205,10 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Path to the optimizer configuration file. If omitted, run' + ' a single trial with default (or specified in --tunable_values).') + parser.add_argument( + '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, + help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + parser.add_argument( '--scheduler', required=False, help='Path to the scheduler configuration file. By default, use' + @@ -240,10 +244,6 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T dest='teardown', action='store_false', help='Disable teardown of the environment after the benchmark.') - parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, - help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') - parser.add_argument( '--experiment_id', '--experiment-id', required=False, default=None, help=""" @@ -347,7 +347,7 @@ def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: """ if args_optimizer is None: # global_config may contain additional properties, so we need to - # strip those out before instantiating the basic one-shot optimizer. + # strip those out before instantiating the basic oneshot optimizer. config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS} return OneShotOptimizer( self.tunables, config=config, service=self._parent_service) From e0845ea9075630cb1ccdcc3a3da427e138ef7998 Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 11 Mar 2024 16:40:25 -0700 Subject: [PATCH 065/121] minimize diff --- mlos_bench/mlos_bench/launcher.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 2898b78864..fef5d14a84 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -95,9 +95,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) - self.teardown = bool( - args.teardown if args.teardown is not None else config.get("teardown", True) - ) self.global_config = self._load_config( config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), @@ -114,6 +111,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. self.global_config.setdefault('trial_id', 1) + self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) @@ -147,6 +145,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage(args.storage or config.get("storage")) _LOG.info("Init storage: %s", self.storage) + self.teardown: bool = bool(args.teardown) if args.teardown is not None else bool(config.get("teardown", True)) self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) From b204ebc55ba925ec234bf79e3603a156fc2a9b16 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 19:40:57 +0000 Subject: [PATCH 066/121] Fix some storage schema related tests --- mlos_bench/mlos_bench/storage/sql/__init__.py | 5 ++++ .../schemas/storage/test_storage_schemas.py | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/__init__.py b/mlos_bench/mlos_bench/storage/sql/__init__.py index 51b5ba7fea..735e21bcaf 100644 --- a/mlos_bench/mlos_bench/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/storage/sql/__init__.py @@ -5,3 +5,8 @@ """ Interfaces to the SQL-based storage backends for OS Autotune. """ +from mlos_bench.storage.sql.storage import SqlStorage + +__all__ = [ + 'SqlStorage', +] diff --git a/mlos_bench/mlos_bench/tests/config/schemas/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/config/schemas/storage/test_storage_schemas.py index 51f068a28f..7c42b85c4b 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/config/schemas/storage/test_storage_schemas.py @@ -10,8 +10,12 @@ import pytest +from mlos_core.tests import get_all_concrete_subclasses + from mlos_bench.config.schemas import ConfigSchema +from mlos_bench.storage.base_storage import Storage +from mlos_bench.tests import try_resolve_class_name from mlos_bench.tests.config.schemas import (get_schema_test_cases, check_test_case_against_schema, check_test_case_config_with_extra_param) @@ -23,6 +27,28 @@ TEST_CASES = get_schema_test_cases(path.join(path.dirname(__file__), "test-cases")) +# Dynamically enumerate some of the cases we want to make sure we cover. + +expected_mlos_bench_storage_class_names = [subclass.__module__ + "." + subclass.__name__ + for subclass in get_all_concrete_subclasses(Storage, # type: ignore[type-abstract] + pkg_name='mlos_bench')] +assert expected_mlos_bench_storage_class_names + +# Do the full cross product of all the test cases and all the storage types. + + +@pytest.mark.parametrize("test_case_subtype", sorted(TEST_CASES.by_subtype)) +@pytest.mark.parametrize("mlos_bench_storage_type", expected_mlos_bench_storage_class_names) +def test_case_coverage_mlos_bench_storage_type(test_case_subtype: str, mlos_bench_storage_type: str) -> None: + """ + Checks to see if there is a given type of test case for the given mlos_bench storage type. + """ + for test_case in TEST_CASES.by_subtype[test_case_subtype].values(): + if try_resolve_class_name(test_case.config.get("class")) == mlos_bench_storage_type: + return + raise NotImplementedError( + f"Missing test case for subtype {test_case_subtype} for Storage class {mlos_bench_storage_type}") + # Now we actually perform all of those validation tests. @@ -42,3 +68,7 @@ def test_storage_configs_with_extra_param(test_case_name: str) -> None: """ check_test_case_config_with_extra_param(TEST_CASES.by_type["good"][test_case_name], ConfigSchema.STORAGE) check_test_case_config_with_extra_param(TEST_CASES.by_type["good"][test_case_name], ConfigSchema.UNIFIED) + + +if __name__ == '__main__': + pytest.main([__file__, '-n0'],) From 63da0e0b04761b70466f7f0eab9a421565f40757 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:03:21 +0000 Subject: [PATCH 067/121] make local edits scheduler schema aware --- .vscode/settings.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index 0d1efbd801..2c8098f9d9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -68,6 +68,15 @@ ], "url": "./mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json" }, + { + "fileMatch": [ + "mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/**/*.jsonc", + "mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/**/*.json", + "mlos_bench/mlos_bench/config/schedulers/**/*.jsonc", + "mlos_bench/mlos_bench/config/schedulers/**/*.json" + ], + "url": "./mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json" + }, { "fileMatch": [ "mlos_bench/mlos_bench/tests/config/schemas/storage/test-cases/**/*.jsonc", From ba59035447ea80a0eb66d5d27b11b30ae1f138c6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:03:39 +0000 Subject: [PATCH 068/121] include the scheduler schema in the global config --- .../mlos_bench/config/schemas/mlos-bench-config-schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json b/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json index e4fbb68e32..5032fa6dd5 100644 --- a/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json @@ -47,6 +47,11 @@ "type": "object", "$ref": "./optimizers/optimizer-schema.json" }, + { + "description": "scheduler config", + "type": "object", + "$ref": "./schedulers/scheduler-schema.json" + }, { "description": "service config", "type": "object", From 2ca34cdfeec6e547035427aaaef5439e55363873 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:03:47 +0000 Subject: [PATCH 069/121] fixup relative paths --- .../config/schemas/schedulers/scheduler-schema.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index af2c36c457..53a3f02b09 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -13,13 +13,13 @@ "type": "object", "properties": { "experiment_id": { - "$ref": "./common-defs-subschemas.json#/$defs/experiment_id" + "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" }, "trial_id": { - "$ref": "./common-defs-subschemas.json#/$defs/trial_id" + "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" }, "config_id": { - "$ref": "./common-defs-subschemas.json#/$defs/config_id" + "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" }, "teardown": { "description": "Whether to teardown the experiment after running it.", From 946b0c460e8c0a0710c3932ba9ef22e765c733c5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:09:11 +0000 Subject: [PATCH 070/121] basic schema testing --- .../bad/invalid/sync_sched-bad-repeat.jsonc | 6 ++ .../bad/invalid/sync_sched-empty-config.jsonc | 5 ++ .../bad/unhandled/sync_sched-extra.jsonc | 6 ++ .../good/full/sync_sched-full.jsonc | 11 +++ .../good/partial/sync_sched-partial.jsonc | 7 ++ .../schedulers/test_scheduler_schemas.py | 77 +++++++++++++++++++ 6 files changed, 112 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-bad-repeat.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-empty-config.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/sync_sched-extra.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/sync_sched-partial.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-bad-repeat.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-bad-repeat.jsonc new file mode 100644 index 0000000000..c96f79f7a5 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-bad-repeat.jsonc @@ -0,0 +1,6 @@ +{ + "class": "mlos_bench.schedulers.SyncScheduler", + "config": { + "trial_config_repeat_count": 0 + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-empty-config.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-empty-config.jsonc new file mode 100644 index 0000000000..1407b4947a --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/sync_sched-empty-config.jsonc @@ -0,0 +1,5 @@ +{ + "class": "mlos_bench.schedulers.SyncScheduler", + "config": { + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/sync_sched-extra.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/sync_sched-extra.jsonc new file mode 100644 index 0000000000..ad8cf8e9e5 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/sync_sched-extra.jsonc @@ -0,0 +1,6 @@ +{ + "class": "mlos_bench.schedulers.sync_scheduler.SyncScheduler", + "config": { + "extra": "unsupported" + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc new file mode 100644 index 0000000000..63694dac4a --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc @@ -0,0 +1,11 @@ +{ + "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", + "class": "mlos_bench.schedulers.sync_scheduler.SyncScheduler", + "config": { + "trial_config_repeat_count": 3, + "teardown": false, + "experiment_id": "MyExperimentName", + "config_id": 1, + "trial_id": 1 + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/sync_sched-partial.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/sync_sched-partial.jsonc new file mode 100644 index 0000000000..eee634800c --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/sync_sched-partial.jsonc @@ -0,0 +1,7 @@ +{ + "class": "mlos_bench.schedulers.SyncScheduler", + "config": { + "trial_config_repeat_count": 3, + "teardown": false + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py new file mode 100644 index 0000000000..b83e9cbde2 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py @@ -0,0 +1,77 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Tests for schedulers schema validation. +""" + +from os import path +from typing import Optional + +import pytest + +from mlos_core.tests import get_all_concrete_subclasses + +from mlos_bench.config.schemas import ConfigSchema +from mlos_bench.schedulers.base_scheduler import Scheduler + +from mlos_bench.tests import try_resolve_class_name +from mlos_bench.tests.config.schemas import (get_schema_test_cases, + check_test_case_against_schema, + check_test_case_config_with_extra_param) + + +# General testing strategy: +# - hand code a set of good/bad configs (useful to test editor schema checking) +# - enumerate and try to check that we've covered all the cases +# - for each config, load and validate against expected schema + +TEST_CASES = get_schema_test_cases(path.join(path.dirname(__file__), "test-cases")) + + +# Dynamically enumerate some of the cases we want to make sure we cover. + +expected_mlos_bench_scheduler_class_names = [subclass.__module__ + "." + subclass.__name__ + for subclass in get_all_concrete_subclasses(Scheduler, # type: ignore[type-abstract] + pkg_name='mlos_bench')] +assert expected_mlos_bench_scheduler_class_names + +# Do the full cross product of all the test cases and all the scheduler types. + + +@pytest.mark.parametrize("test_case_subtype", sorted(TEST_CASES.by_subtype)) +@pytest.mark.parametrize("mlos_bench_scheduler_type", expected_mlos_bench_scheduler_class_names) +def test_case_coverage_mlos_bench_scheduler_type(test_case_subtype: str, mlos_bench_scheduler_type: str) -> None: + """ + Checks to see if there is a given type of test case for the given mlos_bench scheduler type. + """ + for test_case in TEST_CASES.by_subtype[test_case_subtype].values(): + if try_resolve_class_name(test_case.config.get("class")) == mlos_bench_scheduler_type: + return + raise NotImplementedError( + f"Missing test case for subtype {test_case_subtype} for Scheduler class {mlos_bench_scheduler_type}") + +# Now we actually perform all of those validation tests. + + +@pytest.mark.parametrize("test_case_name", sorted(TEST_CASES.by_path)) +def test_scheduler_configs_against_schema(test_case_name: str) -> None: + """ + Checks that the scheduler config validates against the schema. + """ + check_test_case_against_schema(TEST_CASES.by_path[test_case_name], ConfigSchema.SCHEDULER) + check_test_case_against_schema(TEST_CASES.by_path[test_case_name], ConfigSchema.UNIFIED) + + +@pytest.mark.parametrize("test_case_name", sorted(TEST_CASES.by_type["good"])) +def test_scheduler_configs_with_extra_param(test_case_name: str) -> None: + """ + Checks that the scheduler config fails to validate if extra params are present in certain places. + """ + check_test_case_config_with_extra_param(TEST_CASES.by_type["good"][test_case_name], ConfigSchema.SCHEDULER) + check_test_case_config_with_extra_param(TEST_CASES.by_type["good"][test_case_name], ConfigSchema.UNIFIED) + + +if __name__ == "__main__": + pytest.main([__file__, "-n0"]) From 7985a3ea8b226856e0103cd69cae560550f60fc2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:13:27 +0000 Subject: [PATCH 071/121] add another test case --- .../test-cases/bad/invalid/invalid-scheduler-class.jsonc | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/invalid-scheduler-class.jsonc diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/invalid-scheduler-class.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/invalid-scheduler-class.jsonc new file mode 100644 index 0000000000..29150cabf8 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/invalid-scheduler-class.jsonc @@ -0,0 +1,7 @@ +{ + "class": "mlos_bench.schedulers.UnknownScheduler", + "config": { + "trial_config_repeat_count": 3, + "teardown": false + } +} From 8070c30b1e27f6c1fbdc2ccec8bb077d8f995feb Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Fri, 15 Mar 2024 13:27:37 -0700 Subject: [PATCH 072/121] Update mlos_bench/mlos_bench/launcher.py Co-authored-by: Brian Kroth --- mlos_bench/mlos_bench/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index fef5d14a84..a9aa9e3f46 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -211,7 +211,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T parser.add_argument( '--scheduler', required=False, help='Path to the scheduler configuration file. By default, use' + - ' a single-threaded synchronous scheduler.') + ' a single worker synchronous scheduler.') parser.add_argument( '--storage', required=False, From f3955313b541bd0dab69115ecce0707be1fc043e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 20:53:04 +0000 Subject: [PATCH 073/121] pylint --- .../tests/config/schemas/schedulers/test_scheduler_schemas.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py index b83e9cbde2..6e625b8ef2 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test_scheduler_schemas.py @@ -7,7 +7,6 @@ """ from os import path -from typing import Optional import pytest From 969e4963f5f7efcfd91138c8105877aead727f82 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 21:40:43 +0000 Subject: [PATCH 074/121] remove async status changes for now - future PR --- .../environments/common-environment-subschemas.json | 8 -------- .../mlos_bench/environments/base_environment.py | 13 ------------- .../mlos_bench/environments/local/local_env.py | 1 - 3 files changed, 22 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json b/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json index 46ad1db67c..2e801b6827 100644 --- a/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json +++ b/mlos_bench/mlos_bench/config/schemas/environments/common-environment-subschemas.json @@ -76,18 +76,10 @@ "description": "The command lines to execute for the run phase.", "$ref": "#/$defs/command_lines" }, - "status": { - "description": "The command lines (or scripts) to execute for (async) status polling checks.", - "$ref": "#/$defs/command_lines" - }, "teardown": { "description": "The command lines to execute for the teardown phase.", "$ref": "#/$defs/command_lines" }, - "status_stdout_pattern": { - "description": "A regex to parse the stdout of the status phase for results.", - "type": "string" - }, "results_stdout_pattern": { "description": "A regex to parse the stdout of the run phase for results.", "type": "string" diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 1bab2a0d94..38b7bd1142 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -398,19 +398,6 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: (status, timestamp, _) = self.status() return (status, timestamp, None) - async def async_status_poll(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: - """ - Run a command to check the status of the benchmark environment. - - Returns - ------- - (benchmark_status, timestamp, telemetry) : (Status, datetime, list) - 3-tuple of (benchmark status, timestamp, telemetry) values. - `timestamp` is UTC time stamp of the status; it's current time by default. - `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. - """ - raise NotImplementedError("TODO") - def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: """ Check the status of the benchmark environment. diff --git a/mlos_bench/mlos_bench/environments/local/local_env.py b/mlos_bench/mlos_bench/environments/local/local_env.py index 375f5a393a..7ccf187a8c 100644 --- a/mlos_bench/mlos_bench/environments/local/local_env.py +++ b/mlos_bench/mlos_bench/environments/local/local_env.py @@ -255,7 +255,6 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: assert self._temp_dir is not None try: - # TODO: support multiple telemetry files fname = self._config_loader_service.resolve_path( self._read_telemetry_file, extra_paths=[self._temp_dir]) From 6f4928f901193562e36855a008553211bf79bca8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 15 Mar 2024 22:27:17 +0000 Subject: [PATCH 075/121] wip --- mlos_bench/mlos_bench/launcher.py | 23 ++++++--------- .../mlos_bench/schedulers/base_scheduler.py | 28 ++++++++++++------- .../mlos_bench/schedulers/sync_scheduler.py | 2 ++ .../mlos_bench/schedulers/trial_runner.py | 20 +++++++------ 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 18f7ac5aa9..f274b607ed 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -55,6 +55,7 @@ class Launcher: def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements + # pylint: disable=too-complex _LOG.info("Launch: %s", description) epilog = """ Additional --key=value pairs can be specified to augment or override values listed in --globals. @@ -79,18 +80,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st else: config = {} - self.trial_config_repeat_count: int = ( - args.trial_config_repeat_count or config.get("trial_config_repeat_count", 1) - ) - if self.trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {self.trial_config_repeat_count}") - - self.num_trial_runners: int = ( - args.trial_runners or config.get("trial_runners", 1) - ) - if self.num_trial_runners <= 0: - raise ValueError(f"Invalid trial_runners: {self.num_trial_runners}") - log_level = args.log_level or config.get("log_level", _LOG_LEVEL) try: log_level = int(log_level) @@ -112,15 +101,21 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), args_rest, + # Prime the global config with the command line args and the config file. {key: val for (key, val) in config.items() if key not in vars(args)}, ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: - self.global_config['experiment_id'] = args.experiment_id + self.global_config["experiment_id"] = args.experiment_id # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line if args.trial_config_repeat_count: self.global_config["trial_config_repeat_count"] = args.trial_config_repeat_count + self.global_config.setdefault("num_trial_runners", 1) + if args.num_trial_runners: + self.global_config["num_trial_runners"] = args.num_trial_runners + if self.global_config["num_trial_runners"] <= 0: + raise ValueError(f"Invalid num_trial_runners: {self.global_config['num_trial_runners']}") # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. self.global_config.setdefault('trial_id', 1) @@ -141,7 +136,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.root_env_config = self._config_loader.resolve_path(env_path) self.trial_runners: List[TrialRunner] = [] - for trial_runner_id in range(0, self.num_trial_runners): + for trial_runner_id in range(0, self.global_config["num_trial_runners"]): # Create a new global config for each Environment with a unique trial_runner_id for it. global_config = self.global_config.copy() global_config["trial_runner_id"] = trial_runner_id diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 9bbb1b92f6..36020abdf8 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -42,22 +42,23 @@ def __init__(self, *, Create a new instance of the scheduler. The constructor of this and the derived classes is called by the persistence service after reading the class JSON configuration. Other objects like - the Environment and Optimizer are provided by the Launcher. + the TrialRunner(s) and their Environment(s) and Optimizer are + provided by the Launcher. Parameters ---------- config : dict - The configuration for the scheduler. + The configuration for the Scheduler. global_config : dict he global configuration for the experiment. - environment : Environment - The environment to benchmark/optimize. + trial_runner : List[TrialRunner] + The set of TrialRunner(s) (and associated Environment(s)) to benchmark/optimize. optimizer : Optimizer - The optimizer to use. + The Optimizer to use. storage : Storage The storage to use. root_env_config : str - Path to the root environment configuration. + Path to the root Environment configuration. """ self.global_config = global_config config = merge_parameters(dest=config.copy(), source=global_config, @@ -81,6 +82,8 @@ def __init__(self, *, self._root_env_config = root_env_config self._current_trial_runner_idx = 0 + _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + @property def experiment(self) -> Optional[Storage.Experiment]: """Gets the Experiment Storage.""" @@ -88,7 +91,13 @@ def experiment(self) -> Optional[Storage.Experiment]: @property def root_environment(self) -> Environment: - """Gets the root Environment from the first TrialRunner.""" + """ + Gets the root (prototypical) Environment from the first TrialRunner. + + Note: This all TrialRunners have the same Environment config and are made + unique by their use of the unique trial_runner_id assigned to each + TrialRunner's Environment's global_config. + """ return self._trial_runners[0].environment @property @@ -111,8 +120,6 @@ def storage(self) -> Storage: """Gets the Storage.""" return self._storage - _LOG.debug("Scheduler instantiated: %s :: %s", self, config) - def __repr__(self) -> str: """ Produce a human-readable version of the Scheduler (mostly for logging). @@ -167,7 +174,7 @@ def __exit__(self, @abstractmethod def start(self) -> None: """ - Start the optimization loop. + Start the scheduling loop. """ assert self.experiment is not None _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", @@ -270,6 +277,7 @@ def _run_schedule(self, running: bool = False) -> None: Scheduler part of the loop. Check for pending trials in the queue and run them. """ assert self.experiment is not None + # TODO: allow overriding in order to implement parallelized TrialRunners. for trial in self.experiment.pending_trials(datetime.utcnow(), running=running): self.run_trial(trial) diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index baa058bd57..48c49b278d 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -45,6 +45,8 @@ def run_trial(self, trial: Storage.Trial) -> None: """ super().run_trial(trial) + trial_runner = self._trial_runners[trial.trial_runner_id] + if not self.environment.setup(trial.tunables, trial.config(self.global_config)): _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) # FIXME: Use the actual timestamp from the environment. diff --git a/mlos_bench/mlos_bench/schedulers/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py index bffe109b2a..fa87bb7771 100644 --- a/mlos_bench/mlos_bench/schedulers/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -6,7 +6,8 @@ Simple class to run an individual Trial on a given Environment. """ -from typing import Any, Dict, Literal, Optional, Tuple +from types import TracebackType +from typing import Any, Dict, Literal, Optional, Tuple, Type from datetime import datetime import logging @@ -52,21 +53,22 @@ def environment(self) -> Environment: """ return self._env - # TODO: improve context mangement support - def __enter__(self) -> "TrialRunner": assert not self._in_context - # TODO: Improve logging. - self._event_loop_context.enter() + _LOG.debug("TrialRunner START :: %s", self) + # TODO: self._event_loop_context.enter() self._env.__enter__() self._in_context = True return self - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> Literal[False]: + def __exit__(self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType]) -> Literal[False]: assert self._in_context - # TODO: Improve logging. - self._env.__exit__(exc_type, exc_value, traceback) - self._event_loop_context.exit() + _LOG.debug("TrialRunner END :: %s", self) + self._env.__exit__(ex_type, ex_val, ex_tb) + # TODO: self._event_loop_context.exit() self._in_context = False return False # Do not suppress exceptions From 92382dc5d7302e534f44385b2de52348242e4685 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 19 Mar 2024 20:53:49 +0000 Subject: [PATCH 076/121] wip: refactor running of a trial to a separate class so we can do them in parallel eventually --- mlos_bench/mlos_bench/launcher.py | 6 +-- .../mlos_bench/schedulers/base_scheduler.py | 22 ++++++++- .../mlos_bench/schedulers/sync_scheduler.py | 27 ++-------- .../mlos_bench/schedulers/trial_runner.py | 49 ++++++++++++------- mlos_bench/mlos_bench/storage/base_storage.py | 3 ++ .../mlos_bench/storage/sql/experiment.py | 2 - 6 files changed, 62 insertions(+), 47 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index f274b607ed..c4d9ddbf66 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -138,10 +138,10 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.trial_runners: List[TrialRunner] = [] for trial_runner_id in range(0, self.global_config["num_trial_runners"]): # Create a new global config for each Environment with a unique trial_runner_id for it. - global_config = self.global_config.copy() - global_config["trial_runner_id"] = trial_runner_id + env_global_config = self.global_config.copy() + env_global_config["trial_runner_id"] = trial_runner_id env = self._config_loader.load_environment( - self.root_env_config, TunableGroups(), global_config, service=self._parent_service) + self.root_env_config, TunableGroups(), env_global_config, service=self._parent_service) self.trial_runners[trial_runner_id] = TrialRunner(trial_runner_id, env) _LOG.info("Init %d trial runners for environments: %s", self.trial_runners, list(trial_runner.environment for trial_runner in self.trial_runners)) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 36020abdf8..8a5e2a7533 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -50,7 +50,7 @@ def __init__(self, *, config : dict The configuration for the Scheduler. global_config : dict - he global configuration for the experiment. + The global configuration for the experiment. trial_runner : List[TrialRunner] The set of TrialRunner(s) (and associated Environment(s)) to benchmark/optimize. optimizer : Optimizer @@ -120,6 +120,23 @@ def storage(self) -> Storage: """Gets the Storage.""" return self._storage + def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: + """ + Gets the TrialRunner associated with the given Trial. + + Parameters + ---------- + trial : Storage.Trial + The trial to get the associated TrialRunner for. + + Returns + ------- + TrialRunner + """ + if trial.trial_runner_id is None: + raise ValueError(f"Trial {trial} has no trial_runner_id") + return self._trial_runners[trial.trial_runner_id] + def __repr__(self) -> str: """ Produce a human-readable version of the Scheduler (mostly for logging). @@ -194,6 +211,7 @@ def teardown(self) -> None: assert self.experiment is not None if self._do_teardown: for trial_runner in self.trial_runners: + assert not trial_runner.is_running trial_runner.teardown() def get_best_observation(self) -> Tuple[Optional[float], Optional[TunableGroups]]: @@ -224,6 +242,8 @@ def _get_optimizer_suggestions(self, last_trial_id: int = -1, is_warm_up: bool = Return the last trial ID processed by the optimizer. """ assert self.experiment is not None + # FIXME: In async mode, trial_ids may be returned out of order, so we may + # need to adjust this fetching logic. (trial_ids, configs, scores, status) = self.experiment.load(last_trial_id) _LOG.info("QUEUE: Update the optimizer with trial results: %s", trial_ids) self.optimizer.bulk_register(configs, scores, status, is_warm_up) diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 48c49b278d..ad9ced8c80 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -44,26 +44,7 @@ def run_trial(self, trial: Storage.Trial) -> None: Set up and run a single trial. Save the results in the storage. """ super().run_trial(trial) - - trial_runner = self._trial_runners[trial.trial_runner_id] - - if not self.environment.setup(trial.tunables, trial.config(self.global_config)): - _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) - # FIXME: Use the actual timestamp from the environment. - _LOG.info("QUEUE: Update trial results: %s :: %s", trial, Status.FAILED) - trial.update(Status.FAILED, datetime.utcnow()) - return - - (status, timestamp, results) = self.environment.run() # Block and wait for the final result. - _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) - - # In async mode (TODO), poll the environment for status and telemetry - # and update the storage with the intermediate results. - (_status, _timestamp, telemetry) = self.environment.status() - - # Use the status and timestamp from `.run()` as it is the final status of the experiment. - # TODO: Use the `.status()` output in async mode. - trial.update_telemetry(status, timestamp, telemetry) - - trial.update(status, timestamp, results) - _LOG.info("QUEUE: Update trial results: %s :: %s %s", trial, status, results) + # In the sync scheduler we run each trial on its own TrialRunner in sequence. + trial_runner = self.get_trial_runner(trial) + trial_runner.run_trial(trial, self.global_config) + _LOG.info("QUEUE: Finished trial: %s on %s", trial, trial_runner) diff --git a/mlos_bench/mlos_bench/schedulers/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py index fa87bb7771..1dd2454f08 100644 --- a/mlos_bench/mlos_bench/schedulers/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -29,7 +29,7 @@ class TrialRunner: and async status polling via EventLoopContext background threads. Multiple TrialRunners can be used in a multi-processing pool to run multiple trials - in parallel. + in parallel, for instance. """ def __init__(self, trial_runner_id: int, env: Environment) -> None: @@ -37,6 +37,7 @@ def __init__(self, trial_runner_id: int, env: Environment) -> None: self._env = env assert self._env.parameters["trial_runner_id"] == self._trial_runner_id self._in_context = False + self._is_running = False self._event_loop_context = EventLoopContext() @property @@ -72,16 +73,22 @@ def __exit__(self, self._in_context = False return False # Do not suppress exceptions - def run(self, - trial: Storage.Trial, - global_config: Optional[Dict[str, Any]] = None) -> Tuple[Status, Optional[Dict[str, float]]]: + @property + def is_running(self) -> bool: + """Get the running state of the current TrialRunner.""" + return self._is_running + + def run_trial(self, + trial: Storage.Trial, + global_config: Optional[Dict[str, Any]] = None) -> None: """ - Run a single trial on this TrialRunner's Environment. + Run a single trial on this TrialRunner's Environment and stores the results + in the backend Trial Storage. Parameters ---------- trial : Storage.Trial - A Storage class based Trial used to persist the experiment data. + A Storage class based Trial used to persist the experiment trial data. global_config : dict Global configuration parameters. @@ -91,32 +98,38 @@ def run(self, Status and results of the trial. """ assert self._in_context - _LOG.info("Trial: %s", trial) - if not self._env.setup(trial.tunables, trial.config(global_config)): - _LOG.warning("Setup failed: %s :: %s", self._env, trial.tunables) + assert not self._is_running + self._is_running = True + + assert trial.trial_runner_id == self.trial_runner_id, \ + f"TrialRunner {self} should not run trial {trial} with different trial_runner_id {trial.trial_runner_id}." + + if not self.environment.setup(trial.tunables, trial.config(global_config)): + _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) # FIXME: Use the actual timestamp from the environment. - trial.update(Status.FAILED, datetime.utcnow()) - return (Status.FAILED, None) + _LOG.info("TrialRunner: Update trial results: %s :: %s", trial, Status.FAILED) + status, timestamp = Status.FAILED, datetime.utcnow() + trial.update(status, timestamp) + return # TODO: start background status polling of the environments in the event loop. - (status, timestamp, results) = self._env.run() # Block and wait for the final result. - _LOG.info("Results: %s :: %s\n%s", trial.tunables, status, results) + (status, timestamp, results) = self.environment.run() # Block and wait for the final result. + _LOG.info("TrialRunner Results: %s :: %s\n%s", trial.tunables, status, results) # In async mode (TODO), poll the environment for status and telemetry # and update the storage with the intermediate results. - (_status, _timestamp, telemetry) = self._env.status() + (_status, _timestamp, telemetry) = self.environment.status() # Use the status and timestamp from `.run()` as it is the final status of the experiment. # TODO: Use the `.status()` output in async mode. trial.update_telemetry(status, timestamp, telemetry) trial.update(status, timestamp, results) - # Filter out non-numeric scores from the optimizer. - scores = results if not isinstance(results, dict) \ - else {k: float(v) for (k, v) in results.items() if isinstance(v, (int, float))} - return (status, scores) + _LOG.info("TrialRunner: Update trial results: %s :: %s %s", trial, status, results) + + self._is_running = False def teardown(self) -> None: """ diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 91670cbe90..6ae0d729c8 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -403,6 +403,9 @@ def config(self, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, An config.update(global_config or {}) config["experiment_id"] = self._experiment_id config["trial_id"] = self._trial_id + trial_runner_id = self.trial_runner_id + if trial_runner_id is not None: + config.setdefault("trial_runner_id", trial_runner_id) return config @abstractmethod diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 25998364d2..443a934640 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -151,8 +151,6 @@ def load(self, self._schema.trial.c.trial_id.asc(), ) ) - # Note: this iterative approach is somewhat expensive. - # TODO: Look into a better bulk fetch option. for trial in cur_trials.fetchall(): tunables = self._get_params( conn, self._schema.config_param, config_id=trial.config_id) From e91b7443cbbae570861bcd65b6f3060a7fede545 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 19 Mar 2024 22:06:57 +0000 Subject: [PATCH 077/121] comments --- mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json b/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json index 75b8105e1d..0d11769f22 100644 --- a/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json @@ -79,8 +79,8 @@ "examples": [3, 5] }, - "trial_runners": { - "description": "Number of trial runners to run in parallel. Individual TrialRunners can be identified in configs with $trial_runner_id.", + "num_trial_runners": { + "description": "Number of trial runner instances to use to execute benchmark environments. Individual TrialRunners can be identified in configs with $trial_runner_id and optionally run in parallel.", "type": "integer", "minimum": 1, "examples": [1, 3, 5, 10] From 64e7575a09dedcd7427f034fa80fda675e0a56e3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 19 Mar 2024 22:16:30 +0000 Subject: [PATCH 078/121] consistency --- mlos_bench/mlos_bench/launcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 9ea3b15cb4..76ce326b80 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -226,9 +226,9 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') parser.add_argument( - '--trial_runners', '--trial-runners', required=False, type=int, default=1, - help='Number of trial runners to run in parallel. ' - + 'Individual TrialRunners can be identified in configs with $trial_runner_id.') + '--num_trial_runners', '--num-trial-runners', required=False, type=int, default=1, + help='Number of TrialRunners to use for executing benchmark Environments. ' + + 'Individual TrialRunners can be identified in configs with $trial_runner_id and optionally run in parallel.') parser.add_argument( '--scheduler', required=False, From 32c01c0b7bc21846b1a10fb00c46969ebf2a8a15 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:00:28 +0000 Subject: [PATCH 079/121] fixup --- mlos_bench/mlos_bench/services/config_persistence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index d8debd6c29..adb6e825aa 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -23,7 +23,6 @@ from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.services.base_service import Service -from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.types.config_loader_type import SupportsConfigLoading from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups @@ -37,6 +36,7 @@ if TYPE_CHECKING: from mlos_bench.storage.base_storage import Storage from mlos_bench.schedulers.base_scheduler import Scheduler + from mlos_bench.schedulers.trial_runner import TrialRunner _LOG = logging.getLogger(__name__) @@ -308,7 +308,7 @@ def build_storage(self, *, def build_scheduler(self, *, config: Dict[str, Any], global_config: Dict[str, Any], - trial_runners: List[TrialRunner], + trial_runners: List["TrialRunner"], optimizer: Optimizer, storage: "Storage", root_env_config: str) -> "Scheduler": From 0e89e255babd5db6beb51aa6cbeb96cfc23ec125 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:01:12 +0000 Subject: [PATCH 080/121] schema tests --- .../cli/test-cases/bad/invalid/min-trial-runners-count.jsonc | 3 +++ .../config/schemas/cli/test-cases/good/full/full-cli.jsonc | 1 + 2 files changed, 4 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/bad/invalid/min-trial-runners-count.jsonc diff --git a/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/bad/invalid/min-trial-runners-count.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/bad/invalid/min-trial-runners-count.jsonc new file mode 100644 index 0000000000..251a00c89e --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/bad/invalid/min-trial-runners-count.jsonc @@ -0,0 +1,3 @@ +{ + "num_trial_runners": 0 // too small +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/good/full/full-cli.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/good/full/full-cli.jsonc index 256bd1d687..0373ec3a3e 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/good/full/full-cli.jsonc +++ b/mlos_bench/mlos_bench/tests/config/schemas/cli/test-cases/good/full/full-cli.jsonc @@ -16,6 +16,7 @@ "storage": "storage/sqlite.jsonc", "trial_config_repeat_count": 3, + "num_trial_runners": 3, "random_init": true, "random_seed": 42, From 5549925f6e7d6bff773710479eabac51b39999f2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:20:03 +0000 Subject: [PATCH 081/121] spelling --- .../config/schedulers/sync_scheduler.jsonc | 2 +- .../tests/config/cli/test-cli-config.jsonc | 1 + .../tests/launcher_parse_args_test.py | 18 ++++++++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc index daf95d56fa..3dc8ff167e 100644 --- a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -6,7 +6,7 @@ "config": { "trial_config_repeat_count": 3, - "max_trials": -1, // Limited only in hte Optimizer logic/config. + "max_trials": -1, // Limited only in the Optimizer logic/config. "teardown": false } } diff --git a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc index 9ffaa51180..e6bedda9a8 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc +++ b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc @@ -18,6 +18,7 @@ ], "trial_config_repeat_count": 1, + "num_trial_runners": 3, "random_seed": 42, "random_init": true diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 1445998eb5..cfb64891c3 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -53,6 +53,9 @@ def config_paths() -> List[str]: ] +# TODO: def test_launcher_args_defaults(config_paths: List[str]) -> None: + + def test_launcher_args_parse_1(config_paths: List[str]) -> None: """ Test that using multiple --globals arguments works and that multiple space @@ -71,6 +74,7 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: # This is part of the minimal required args by the Launcher. env_conf_path = 'environments/mock/mock_env.jsonc' cli_args = '--config-paths ' + ' '.join(config_paths) + \ + ' --num-trial-runners 5' + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ ' --scheduler schedulers/sync_scheduler.jsonc' + \ @@ -98,9 +102,14 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: == path_join(os.getcwd(), "foo", abs_path=True) assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' assert launcher.teardown + # Make sure we have the right number of trial runners. + assert len(launcher.trial_runners) == 5 # from cli args # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.trial_runners[0].environment, env_config['class']) + assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + # Make sure that each trial runner has a unique ID. + assert set(trial_runner.environment.config["trial_runner_id"] for trial_runner in launcher.trial_runners) \ + == set(range(0, len(launcher.trial_runners))) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. @@ -161,10 +170,15 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] + # Make sure we have the right number of trial runners. + assert len(launcher.trial_runners) == 3 # from test-cli-config.jsonc # Check that the environment that got loaded looks to be of the right type. env_config_file = config['environment'] env_config = launcher.config_loader.load_config(env_config_file, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.trial_runners[0].environment, env_config['class']) + assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + # Make sure that each trial runner has a unique ID. + assert set(trial_runner.environment.parameters["trial_runner_id"] for trial_runner in launcher.trial_runners) \ + == set(range(0, len(launcher.trial_runners))) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, MlosCoreOptimizer) From 7feba3aed591a28240316876b5c1a638b86a0fd7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:20:28 +0000 Subject: [PATCH 082/121] make sure trial_runner_id shows up by default --- mlos_bench/mlos_bench/environments/base_environment.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 508d78589b..861ab635f4 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -113,6 +113,8 @@ def __init__(self, An optional service object (e.g., providing methods to deploy or reboot a VM/Host, etc.). """ + global_config = global_config or {} + global_config.setdefault("trial_runner_id", -1) self._validate_json_config(config, name) self.name = name self.config = config @@ -132,7 +134,7 @@ def __init__(self, groups = self._expand_groups( config.get("tunable_params", []), - (global_config or {}).get("tunable_params_map", {})) + global_config.get("tunable_params_map", {})) _LOG.debug("Tunable groups for: '%s' :: %s", name, groups) self._tunable_params = tunables.subgroup(groups) @@ -142,8 +144,9 @@ def __init__(self, set(config.get("required_args", [])) - set(self._tunable_params.get_param_values().keys()) ) + req_args.add("trial_runner_id") merge_parameters(dest=self._const_args, source=global_config, required_keys=req_args) - self._const_args = self._expand_vars(self._const_args, global_config or {}) + self._const_args = self._expand_vars(self._const_args, global_config) self._params = self._combine_tunables(self._tunable_params) _LOG.debug("Parameters for '%s' :: %s", name, self._params) From cc7ed4d207390e38569a9f078b3d02e28dbe062c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:20:45 +0000 Subject: [PATCH 083/121] wip: fixups --- mlos_bench/mlos_bench/launcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 76ce326b80..78b12a28f8 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -142,9 +142,9 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st env_global_config["trial_runner_id"] = trial_runner_id env = self._config_loader.load_environment( self.root_env_config, TunableGroups(), env_global_config, service=self._parent_service) - self.trial_runners[trial_runner_id] = TrialRunner(trial_runner_id, env) + self.trial_runners.append(TrialRunner(trial_runner_id, env)) _LOG.info("Init %d trial runners for environments: %s", - self.trial_runners, list(trial_runner.environment for trial_runner in self.trial_runners)) + len(self.trial_runners), list(trial_runner.environment for trial_runner in self.trial_runners)) # NOTE: Init tunable values *after* the Environment(s), but *before* the Optimizer # TODO: should we assign the same or different tunables for all TrialRunner Environments? @@ -226,7 +226,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') parser.add_argument( - '--num_trial_runners', '--num-trial-runners', required=False, type=int, default=1, + '--num_trial_runners', '--num-trial-runners', required=False, type=int, help='Number of TrialRunners to use for executing benchmark Environments. ' + 'Individual TrialRunners can be identified in configs with $trial_runner_id and optionally run in parallel.') From 8d794f1135e7de5f4fe7e7fde2bcefb11d0e2374 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 20 Mar 2024 22:20:51 +0000 Subject: [PATCH 084/121] fixme comments --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index cfb64891c3..a57ad43c90 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -54,6 +54,7 @@ def config_paths() -> List[str]: # TODO: def test_launcher_args_defaults(config_paths: List[str]) -> None: +# TODO: Test default trial config repeat count (also derived from config) def test_launcher_args_parse_1(config_paths: List[str]) -> None: From 967b6e29625ffdd08dfdc48f6bd806a736fc26a4 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 20:40:13 +0000 Subject: [PATCH 085/121] Launcher args fixups --- .../environments/base_environment.py | 30 ++++- mlos_bench/mlos_bench/launcher.py | 4 +- .../mlos_bench/schedulers/base_scheduler.py | 10 ++ .../tests/launcher_parse_args_test.py | 122 +++++++++++------- 4 files changed, 117 insertions(+), 49 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 861ab635f4..d7281f725e 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -35,6 +35,14 @@ class Environment(metaclass=abc.ABCMeta): """ An abstract base of all benchmark environments. """ + # Should be provided by the runtime. + _COMMON_CONST_ARGS = { + "trial_runner_id", + } + _COMMON_REQ_ARGS = { + "experiment_id", + "trial_id", + } @classmethod def new(cls, @@ -114,7 +122,11 @@ def __init__(self, deploy or reboot a VM/Host, etc.). """ global_config = global_config or {} - global_config.setdefault("trial_runner_id", -1) + # Make some usual runtime arguments available for tests. + for arg in self._COMMON_CONST_ARGS: + global_config.setdefault(arg, None) + for arg in self._COMMON_REQ_ARGS: + global_config.setdefault(arg, None) self._validate_json_config(config, name) self.name = name self.config = config @@ -144,7 +156,7 @@ def __init__(self, set(config.get("required_args", [])) - set(self._tunable_params.get_param_values().keys()) ) - req_args.add("trial_runner_id") + req_args.update(self._COMMON_CONST_ARGS) merge_parameters(dest=self._const_args, source=global_config, required_keys=req_args) self._const_args = self._expand_vars(self._const_args, global_config) @@ -310,6 +322,18 @@ def tunable_params(self) -> TunableGroups: """ return self._tunable_params + @property + def const_args(self) -> Dict[str, TunableValue]: + """ + Get the constant arguments for this Environment. + + Returns + ------- + parameters : Dict[str, TunableValue] + Key/value pairs of all environment const_args parameters. + """ + return self._const_args.copy() + @property def parameters(self) -> Dict[str, TunableValue]: """ @@ -321,7 +345,7 @@ def parameters(self) -> Dict[str, TunableValue]: parameters : Dict[str, TunableValue] Key/value pairs of all environment parameters (i.e., `const_args` and `tunable_params`). """ - return self._params + return self._params.copy() def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) -> bool: """ diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 78b12a28f8..a86d630445 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -56,6 +56,7 @@ class Launcher: def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements # pylint: disable=too-complex + # pylint: disable=too-many-locals _LOG.info("Launch: %s", description) epilog = """ Additional --key=value pairs can be specified to augment or override values listed in --globals. @@ -97,12 +98,13 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + args_dict = vars(args) self.global_config = self._load_config( config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), args_rest, # Prime the global config with the command line args and the config file. - {key: val for (key, val) in config.items() if key not in vars(args)}, + {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index c68da69ae3..ee1a0115b8 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -89,6 +89,16 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + @property + def trial_config_repeat_count(self) -> int: + """Gets the number of trials to run for a given config.""" + return self._trial_config_repeat_count + + @property + def max_trials(self) -> int: + """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + return self._max_trials + @property def experiment(self) -> Optional[Storage.Experiment]: """Gets the Experiment Storage.""" diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index a57ad43c90..a736592438 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -53,16 +53,11 @@ def config_paths() -> List[str]: ] -# TODO: def test_launcher_args_defaults(config_paths: List[str]) -> None: -# TODO: Test default trial config repeat count (also derived from config) +# This is part of the minimal required args by the Launcher. +ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' -def test_launcher_args_parse_1(config_paths: List[str]) -> None: - """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. - Check $var expansion and Environment loading. - """ +def _get_launcher(desc: str, cli_args: str) -> Launcher: # The VSCode pytest wrapper actually starts in a different directory before # changing into the code directory, but doesn't update the PWD environment # variable so we use a separate variable. @@ -71,24 +66,74 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: if sys.platform == 'win32': # Some env tweaks for platform compatibility. environ['USER'] = environ['USERNAME'] + launcher = Launcher(description=desc, argv=cli_args.split()) + # Check the basic parent service + assert isinstance(launcher.service, SupportsConfigLoading) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in + # All trial runners should have the same Environment class. + assert len(set(trial_runner.environment.__class__ for trial_runner in launcher.trial_runners)) == 1 + # Make sure that each trial runner has a unique ID. + assert set(trial_runner.environment.const_args["trial_runner_id"] for trial_runner in launcher.trial_runners) \ + == set(range(0, len(launcher.trial_runners))) + return launcher + - # This is part of the minimal required args by the Launcher. - env_conf_path = 'environments/mock/mock_env.jsonc' +def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """ + Test that we get the defaults we expect when using minimal config arg examples. + """ + cli_args = '--config-paths ' + ' '.join(config_paths) + \ + f' --environment {ENV_CONF_PATH}' + \ + ' --globals globals/global_test_config.jsonc' + launcher = _get_launcher(__name__, cli_args) + # Check that the first --globals file is loaded and $var expansion is handled. + assert launcher.global_config['experiment_id'] == 'MockExperiment' + assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + # Check that secondary expansion also works. + assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + # Check that we can expand a $var in a config file that references an environment variable. + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ + == path_join(os.getcwd(), "foo", abs_path=True) + assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert launcher.teardown # defaults + # Make sure we have the right number of trial runners. + assert len(launcher.trial_runners) == 1 # defaults + # Check that the environment that got loaded looks to be of the right type. + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" + # All TrialRunners should get the same Environment. + assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + # Check that the optimizer looks right. + assert isinstance(launcher.optimizer, OneShotOptimizer) + # Check that the optimizer got initialized with defaults. + assert launcher.optimizer.tunable_params.is_defaults() + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + # Check that we pick up the right scheduler config: + assert isinstance(launcher.scheduler, SyncScheduler) + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default + + +def test_launcher_args_parse_1(config_paths: List[str]) -> None: + """ + Test that using multiple --globals arguments works and that multiple space + separated options to --config-paths works. + Check $var expansion and Environment loading. + """ + # Here we have multiple paths following --config-paths and --service. cli_args = '--config-paths ' + ' '.join(config_paths) + \ ' --num-trial-runners 5' + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ + ' services/remote/mock/mock_remote_exec_service.jsonc' + \ ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {env_conf_path}' + \ + f' --environment {ENV_CONF_PATH}' + \ ' --globals globals/global_test_config.jsonc' + \ ' --globals globals/global_test_extra_config.jsonc' \ ' --test_global_value_2 from-args' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. assert launcher.global_config['experiment_id'] == 'MockExperiment' assert launcher.global_config['testVmName'] == 'MockExperiment-vm' @@ -106,11 +151,10 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: # Make sure we have the right number of trial runners. assert len(launcher.trial_runners) == 5 # from cli args # Check that the environment that got loaded looks to be of the right type. - env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" + # All TrialRunners should get the same Environment. assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) - # Make sure that each trial runner has a unique ID. - assert set(trial_runner.environment.config["trial_runner_id"] for trial_runner in launcher.trial_runners) \ - == set(range(0, len(launcher.trial_runners))) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. @@ -118,8 +162,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == -1 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: @@ -127,17 +171,9 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - # The VSCode pytest wrapper actually starts in a different directory before - # changing into the code directory, but doesn't update the PWD environment - # variable so we use a separate variable. - # See global_test_config.jsonc for more details. - environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': - # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] - config_file = 'cli/test-cli-config.jsonc' globals_file = 'globals/global_test_config.jsonc' + # Here we have multiple --config-path and --service args, each with their own path. cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ f' --config {config_file}' + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ @@ -149,13 +185,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: ' --random-seed 1234' + \ ' --trial-config-repeat-count 5' + \ ' --max_trials 200' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsFileShareOps) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. assert launcher.global_config['experiment_id'] == 'MockeryExperiment' @@ -176,10 +210,8 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that the environment that got loaded looks to be of the right type. env_config_file = config['environment'] env_config = launcher.config_loader.load_config(env_config_file, ConfigSchema.ENVIRONMENT) + # All TrialRunners should get the same Environment. assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) - # Make sure that each trial runner has a unique ID. - assert set(trial_runner.environment.parameters["trial_runner_id"] for trial_runner in launcher.trial_runners) \ - == set(range(0, len(launcher.trial_runners))) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, MlosCoreOptimizer) @@ -204,8 +236,8 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 5 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == 200 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 5 + assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. assert config['random_seed'] == 42 From f4b13487aec1ddccc08bca0e136bd15915241a32 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 20:50:15 +0000 Subject: [PATCH 086/121] Fixups and testing for cli config file parsing --- .../config/schedulers/sync_scheduler.jsonc | 2 +- mlos_bench/mlos_bench/launcher.py | 5 +- .../mlos_bench/schedulers/base_scheduler.py | 10 ++ .../tests/launcher_parse_args_test.py | 107 +++++++++++------- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc index daf95d56fa..3dc8ff167e 100644 --- a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -6,7 +6,7 @@ "config": { "trial_config_repeat_count": 3, - "max_trials": -1, // Limited only in hte Optimizer logic/config. + "max_trials": -1, // Limited only in the Optimizer logic/config. "teardown": false } } diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index a9aa9e3f46..23e256dcbd 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -95,11 +95,14 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + args_dict = vars(args) self.global_config = self._load_config( config.get("globals", []) + (args.globals or []), (args.config_path or []) + config.get("config_path", []), args_rest, - {key: val for (key, val) in config.items() if key not in vars(args)}, + # Include any item from the cli config file that either isn't in the cli + # args at all or whose cli arg is missing. + {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 6e3da151e5..96c08b9bd0 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -85,6 +85,16 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + @property + def trial_config_repeat_count(self) -> int: + """Gets the number of trials to run for a given config.""" + return self._trial_config_repeat_count + + @property + def max_trials(self) -> int: + """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + return self._max_trials + def __repr__(self) -> str: """ Produce a human-readable version of the Scheduler (mostly for logging). diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 90e52bb880..53b6c955e4 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -53,12 +53,11 @@ def config_paths() -> List[str]: ] -def test_launcher_args_parse_1(config_paths: List[str]) -> None: - """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. - Check $var expansion and Environment loading. - """ +# This is part of the minimal required args by the Launcher. +ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' + + +def _get_launcher(desc: str, cli_args: str) -> Launcher: # The VSCode pytest wrapper actually starts in a different directory before # changing into the code directory, but doesn't update the PWD environment # variable so we use a separate variable. @@ -67,23 +66,65 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: if sys.platform == 'win32': # Some env tweaks for platform compatibility. environ['USER'] = environ['USERNAME'] + launcher = Launcher(description=desc, argv=cli_args.split()) + # Check the basic parent service + assert isinstance(launcher.service, SupportsConfigLoading) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in + return launcher + - # This is part of the minimal required args by the Launcher. - env_conf_path = 'environments/mock/mock_env.jsonc' +def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """ + Test that we get the defaults we expect when using minimal config arg examples. + """ + cli_args = '--config-paths ' + ' '.join(config_paths) + \ + f' --environment {ENV_CONF_PATH}' + \ + ' --globals globals/global_test_config.jsonc' + launcher = _get_launcher(__name__, cli_args) + # Check that the first --globals file is loaded and $var expansion is handled. + assert launcher.global_config['experiment_id'] == 'MockExperiment' + assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + # Check that secondary expansion also works. + assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + # Check that we can expand a $var in a config file that references an environment variable. + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ + == path_join(os.getcwd(), "foo", abs_path=True) + assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert launcher.teardown # defaults + # Check that the environment that got loaded looks to be of the right type. + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" + assert check_class_name(launcher.environment, env_config['class']) + # Check that the optimizer looks right. + assert isinstance(launcher.optimizer, OneShotOptimizer) + # Check that the optimizer got initialized with defaults. + assert launcher.optimizer.tunable_params.is_defaults() + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + # Check that we pick up the right scheduler config: + assert isinstance(launcher.scheduler, SyncScheduler) + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default + + +def test_launcher_args_parse_1(config_paths: List[str]) -> None: + """ + Test that using multiple --globals arguments works and that multiple space + separated options to --config-paths works. + Check $var expansion and Environment loading. + """ + # Here we have multiple paths following --config-paths and --service. cli_args = '--config-paths ' + ' '.join(config_paths) + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ + ' services/remote/mock/mock_remote_exec_service.jsonc' + \ ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {env_conf_path}' + \ + f' --environment {ENV_CONF_PATH}' + \ ' --globals globals/global_test_config.jsonc' + \ ' --globals globals/global_test_extra_config.jsonc' \ ' --test_global_value_2 from-args' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. assert launcher.global_config['experiment_id'] == 'MockExperiment' assert launcher.global_config['testVmName'] == 'MockExperiment-vm' @@ -99,8 +140,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. - env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.environment, env_config['class']) + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. @@ -108,8 +149,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == -1 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: @@ -117,17 +158,9 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - # The VSCode pytest wrapper actually starts in a different directory before - # changing into the code directory, but doesn't update the PWD environment - # variable so we use a separate variable. - # See global_test_config.jsonc for more details. - environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': - # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] - config_file = 'cli/test-cli-config.jsonc' globals_file = 'globals/global_test_config.jsonc' + # Here we have multiple --config-path and --service args, each with their own path. cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ f' --config {config_file}' + \ ' --service services/remote/mock/mock_auth_service.jsonc' + \ @@ -139,13 +172,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: ' --random-seed 1234' + \ ' --trial-config-repeat-count 5' + \ ' --max_trials 200' - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsFileShareOps) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. assert launcher.global_config['experiment_id'] == 'MockeryExperiment' @@ -189,8 +220,8 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 5 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == 200 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args + assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. assert config['random_seed'] == 42 From a08bf724febc3dc2dc4bb3f122055ffda4180700 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 20:57:50 +0000 Subject: [PATCH 087/121] more tests --- .../tests/config/cli/test-cli-config.jsonc | 2 +- .../tests/launcher_parse_args_test.py | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc index 9ffaa51180..436507ce84 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc +++ b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc @@ -17,7 +17,7 @@ "services/remote/mock/mock_fileshare_service.jsonc" ], - "trial_config_repeat_count": 1, + "trial_config_repeat_count": 2, "random_seed": 42, "random_init": true diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 53b6c955e4..455dc4ad06 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -231,5 +231,23 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # assert launcher.optimizer.seed == 1234 +def test_launcher_args_parse_3(config_paths: List[str]) -> None: + """ + Check that cli file values take precedence over other values. + """ + config_file = 'cli/test-cli-config.jsonc' + globals_file = 'globals/global_test_config.jsonc' + # Here we have multiple --config-path and --service args, each with their own path. + cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ + f' --config {config_file}' + \ + f' --globals {globals_file}' + launcher = _get_launcher(__name__, cli_args) + + # Check that CLI file parameter overrides JSON config: + assert isinstance(launcher.scheduler, SyncScheduler) + # from test-cli-config.jsonc (should override scheduler config file) + assert launcher.scheduler.trial_config_repeat_count == 2 + + if __name__ == '__main__': - pytest.main([__file__, "-n1"]) + pytest.main([__file__, "-n0"]) From 0a22b783fe1e47bb7ded0709c0a28e7252e61fb0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 21:00:57 +0000 Subject: [PATCH 088/121] comments --- mlos_bench/mlos_bench/tests/launcher_parse_args_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index 455dc4ad06..d5a92fc30f 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -237,7 +237,8 @@ def test_launcher_args_parse_3(config_paths: List[str]) -> None: """ config_file = 'cli/test-cli-config.jsonc' globals_file = 'globals/global_test_config.jsonc' - # Here we have multiple --config-path and --service args, each with their own path. + # Here we don't override values in test-cli-config with cli args but ensure that + # those take precedence over other config files. cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ f' --config {config_file}' + \ f' --globals {globals_file}' From c09b427e1c774519624ff83d1f33d030922f4283 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 21 Mar 2024 21:14:31 +0000 Subject: [PATCH 089/121] wip --- mlos_bench/mlos_bench/launcher.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 23e256dcbd..81d72b0be0 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -102,8 +102,12 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st args_rest, # Include any item from the cli config file that either isn't in the cli # args at all or whose cli arg is missing. - {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, + # {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, + {key: val for (key, val) in config.items() if key not in args_dict}, ) + # FIXME: Something's changed: + # pytest -n0 mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py -k azure-redis-bench + raise ValueError(f"global_config: {self.global_config}\nargs_dict: {args_dict}") # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: From bcf05f93253df1a9f65f2f0bcb5038ac2d66b428 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 13 May 2024 21:55:37 +0000 Subject: [PATCH 090/121] fixups --- mlos_bench/mlos_bench/launcher.py | 47 +++++++++++++------ .../mlos_bench/optimizers/base_optimizer.py | 11 +++-- .../mlos_bench/schedulers/base_scheduler.py | 17 ++++++- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 81d72b0be0..7b2bf50623 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -64,7 +64,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st """ parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) - (args, args_rest) = self._parse_args(parser, argv) + (args, path_args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. config_path = args.config_path or [] @@ -95,19 +95,23 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + # Prepare global_config from a combination of global config files, cli configs, and cli args. args_dict = vars(args) + # teardown (bool) conflicts with Environment configs that use it for shell + # commands (list), so we exclude it from copying over + excluded_cli_args = path_args + ["teardown"] + # Include (almost) any item from the cli config file that either isn't in the cli + # args at all or whose cli arg is missing. + cli_config_args = {key: val for (key, val) in config.items() + if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args} + self.global_config = self._load_config( - config.get("globals", []) + (args.globals or []), - (args.config_path or []) + config.get("config_path", []), - args_rest, - # Include any item from the cli config file that either isn't in the cli - # args at all or whose cli arg is missing. - # {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, - {key: val for (key, val) in config.items() if key not in args_dict}, + args_globals=config.get("globals", []) + (args.globals or []), + config_path=(args.config_path or []) + config.get("config_path", []), + args_rest=args_rest, + global_config=cli_config_args, ) - # FIXME: Something's changed: - # pytest -n0 mlos_bench/mlos_bench/tests/config/cli/test_load_cli_config_examples.py -k azure-redis-bench - raise ValueError(f"global_config: {self.global_config}\nargs_dict: {args_dict}") + # TODO: Can we generalize these two rules using excluded_cli_args? # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: @@ -171,10 +175,12 @@ def service(self) -> Service: return self._parent_service @staticmethod - def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str]]: + def _parse_args(parser: argparse.ArgumentParser, + argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str], List[str]]: """ Parse the command line arguments. """ + path_args = [] parser.add_argument( '--config', required=False, help='Main JSON5 configuration file. Its keys are the same as the' + @@ -182,10 +188,12 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '\n' + ' See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ ' + ' for additional config examples for this and other arguments.') + path_args.append('config') parser.add_argument( '--log_file', '--log-file', required=False, help='Path to the log file. Use stdout if omitted.') + path_args.append('log_file') parser.add_argument( '--log_level', '--log-level', required=False, type=str, @@ -196,20 +204,26 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '--config_path', '--config-path', '--config-paths', '--config_paths', nargs="+", action='extend', required=False, help='One or more locations of JSON config files.') + path_args.append('config_path') + path_args.append('config_paths') parser.add_argument( '--service', '--services', nargs='+', action='extend', required=False, help='Path to JSON file with the configuration of the service(s) for environment(s) to use.') + path_args.append('service') + path_args.append('services') parser.add_argument( '--environment', required=False, help='Path to JSON file with the configuration of the benchmarking environment(s).') + path_args.append('environment') parser.add_argument( '--optimizer', required=False, help='Path to the optimizer configuration file. If omitted, run' + ' a single trial with default (or specified in --tunable_values).') + path_args.append('optimizer') parser.add_argument( '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, @@ -219,11 +233,13 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T '--scheduler', required=False, help='Path to the scheduler configuration file. By default, use' + ' a single worker synchronous scheduler.') + path_args.append('scheduler') parser.add_argument( '--storage', required=False, help='Path to the storage configuration file.' + ' If omitted, use the ephemeral in-memory SQL storage.') + path_args.append('storage') parser.add_argument( '--random_init', '--random-init', required=False, default=False, @@ -239,11 +255,13 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T help='Path to one or more JSON files that contain values of the tunable' + ' parameters. This can be used for a single trial (when no --optimizer' + ' is specified) or as default values for the first run in optimization.') + path_args.append('tunable_values') parser.add_argument( '--globals', nargs="+", action='extend', required=False, help='Path to one or more JSON files that contain additional' + ' [private] parameters of the benchmarking environment.') + path_args.append('globals') parser.add_argument( '--no_teardown', '--no-teardown', required=False, default=None, @@ -270,7 +288,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T argv = sys.argv[1:].copy() (args, args_rest) = parser.parse_known_args(argv) - return (args, args_rest) + return (args, path_args, args_rest) @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: @@ -303,7 +321,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: _LOG.debug("Parsed config: %s", config) return config - def _load_config(self, + def _load_config(self, *, args_globals: Iterable[str], config_path: Iterable[str], args_rest: Iterable[str], @@ -404,7 +422,6 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: config={ "experiment_id": "UNDEFINED - override from global config", "trial_id": 0, - "config_id": -1, "trial_config_repeat_count": 1, "teardown": self.teardown, }, diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 51cbf9694f..89ee6c9fd1 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -135,20 +135,23 @@ def __exit__(self, ex_type: Optional[Type[BaseException]], @property def current_iteration(self) -> int: """ - The current number of iterations (trials) registered. + The current number of iterations (suggestions) registered. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._iter + # TODO: finish renaming iterations to suggestions. + # See Also: https://github.com/microsoft/MLOS/pull/713 + @property def max_iterations(self) -> int: """ - The maximum number of iterations (trials) to run. + The maximum number of iterations (suggestions) to run. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._max_iter diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 7d510c1e20..1c974da957 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -17,6 +17,7 @@ from pytz import UTC +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage @@ -63,6 +64,7 @@ def __init__(self, *, self.global_config = global_config config = merge_parameters(dest=config.copy(), source=global_config, required_keys=["experiment_id", "trial_id"]) + self._validate_json_config(config) self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) @@ -85,6 +87,19 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + def _validate_json_config(self, config: dict) -> None: + """ + Reconstructs a basic json config that this class might have been + instantiated from in order to validate configs provided outside the + file loading mechanism. + """ + json_config: dict = { + "class": self.__class__.__module__ + "." + self.__class__.__name__, + } + if config: + json_config["config"] = config + ConfigSchema.SCHEDULER.validate(json_config) + @property def trial_config_repeat_count(self) -> int: """Gets the number of trials to run for a given config.""" @@ -92,7 +107,7 @@ def trial_config_repeat_count(self) -> int: @property def max_trials(self) -> int: - """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + """Gets the maximum number of trials to run for a given experiment, or -1 for no limit.""" return self._max_trials def __repr__(self) -> str: From 021db593ef85696291c5f7f43ad1c6cca210cc96 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 15 Jul 2024 20:45:10 +0000 Subject: [PATCH 091/121] cherry picking some files from main --- Makefile | 121 +++++++++++++++++++++++++++++++++++++------- conda-envs/mlos.yml | 2 +- pyproject.toml | 11 +++- 3 files changed, 114 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 128b3dc849..145b641607 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ conda-env: build/conda-env.${CONDA_ENV_NAME}.build-stamp MLOS_CORE_CONF_FILES := mlos_core/pyproject.toml mlos_core/setup.py mlos_core/MANIFEST.in MLOS_BENCH_CONF_FILES := mlos_bench/pyproject.toml mlos_bench/setup.py mlos_bench/MANIFEST.in MLOS_VIZ_CONF_FILES := mlos_viz/pyproject.toml mlos_viz/setup.py mlos_viz/MANIFEST.in -MLOS_GLOBAL_CONF_FILES := setup.cfg # pyproject.toml +MLOS_GLOBAL_CONF_FILES := setup.cfg pyproject.toml MLOS_PKGS := mlos_core mlos_bench mlos_viz MLOS_PKG_CONF_FILES := $(MLOS_CORE_CONF_FILES) $(MLOS_BENCH_CONF_FILES) $(MLOS_VIZ_CONF_FILES) $(MLOS_GLOBAL_CONF_FILES) @@ -69,9 +69,9 @@ ifneq (,$(filter format,$(MAKECMDGOALS))) endif build/format.${CONDA_ENV_NAME}.build-stamp: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp -# TODO: enable isort and black formatters -#build/format.${CONDA_ENV_NAME}.build-stamp: build/isort.${CONDA_ENV_NAME}.build-stamp -#build/format.${CONDA_ENV_NAME}.build-stamp: build/black.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/isort.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/black.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/docformatter.${CONDA_ENV_NAME}.build-stamp build/format.${CONDA_ENV_NAME}.build-stamp: touch $@ @@ -111,8 +111,8 @@ build/isort.${CONDA_ENV_NAME}.build-stamp: # NOTE: when using pattern rules (involving %) we can only add one line of # prerequisities, so we use this pattern to compose the list as variables. -# Both isort and licenseheaders alter files, so only run one at a time, by -# making licenseheaders an order-only prerequisite. +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. ISORT_COMMON_PREREQS := ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) ISORT_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp @@ -126,7 +126,7 @@ build/isort.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) build/isort.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_COMMON_PREREQS) # Reformat python file imports with isort. - conda run -n ${CONDA_ENV_NAME} isort --verbose --only-modified --atomic -j0 $(filter %.py,$?) + conda run -n ${CONDA_ENV_NAME} isort --verbose --only-modified --atomic -j0 $(filter %.py,$+) touch $@ .PHONY: black @@ -142,8 +142,8 @@ build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_viz.${CONDA_ENV_NAME build/black.${CONDA_ENV_NAME}.build-stamp: touch $@ -# Both black, licenseheaders, and isort all alter files, so only run one at a time, by -# making licenseheaders and isort an order-only prerequisite. +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. BLACK_COMMON_PREREQS := ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) BLACK_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp @@ -160,13 +160,52 @@ build/black.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) build/black.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_COMMON_PREREQS) # Reformat python files with black. - conda run -n ${CONDA_ENV_NAME} black $(filter %.py,$?) + conda run -n ${CONDA_ENV_NAME} black $(filter %.py,$+) touch $@ +.PHONY: docformatter +docformatter: build/docformatter.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter docformatter,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/docformatter.${CONDA_ENV_NAME}.build-stamp +endif + +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +DOCFORMATTER_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format isort,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format black,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/black.${CONDA_ENV_NAME}.build-stamp +endif +DOCFORMATTER_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +DOCFORMATTER_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +# docformatter returns non-zero when it changes anything so instead we ignore that +# return code and just have it recheck itself immediately +build/docformatter.%.${CONDA_ENV_NAME}.build-stamp: $(DOCFORMATTER_COMMON_PREREQS) + # Reformat python file docstrings with docformatter. + conda run -n ${CONDA_ENV_NAME} docformatter --in-place $(filter %.py,$+) || true + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) + touch $@ + + .PHONY: check -check: pycodestyle pydocstyle pylint mypy # cspell markdown-link-check -# TODO: Enable isort and black checks -#check: isort-check black-check pycodestyle pydocstyle pylint mypy # cspell markdown-link-check +check: isort-check black-check docformatter-check pycodestyle pydocstyle pylint mypy # cspell markdown-link-check .PHONY: black-check black-check: build/black-check.mlos_core.${CONDA_ENV_NAME}.build-stamp @@ -185,7 +224,27 @@ BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) build/black-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) # Check for import sort order. # Note: if this fails use "make format" or "make black" to fix it. - conda run -n ${CONDA_ENV_NAME} black --verbose --check --diff $(filter %.py,$?) + conda run -n ${CONDA_ENV_NAME} black --verbose --check --diff $(filter %.py,$+) + touch $@ + +.PHONY: docformatter-check +docformatter-check: build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure docformatter format rules run before docformatter-check rules. +build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +BLACK_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) + # Check for import sort order. + # Note: if this fails use "make format" or "make docformatter" to fix it. + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) touch $@ .PHONY: isort-check @@ -204,7 +263,7 @@ ISORT_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) build/isort-check.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_CHECK_COMMON_PREREQS) # Note: if this fails use "make format" or "make isort" to fix it. - conda run -n ${CONDA_ENV_NAME} isort --only-modified --check --diff -j0 $(filter %.py,$?) + conda run -n ${CONDA_ENV_NAME} isort --only-modified --check --diff -j0 $(filter %.py,$+) touch $@ .PHONY: pycodestyle @@ -444,7 +503,8 @@ mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --sdist # Do some sanity checks on the sdist tarball output. BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-base-version` \ - && ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -F $$BASE_VERS + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -F -e $$BASE_VERS -e $$TAG_VERS # Make sure tests were excluded. ! ( tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 tests/ ) # Make sure the py.typed marker file exists. @@ -462,7 +522,8 @@ mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --wheel # Do some sanity checks on the wheel output. BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -o '^[0-9.]+' || echo err-unknown-base-version` \ - && ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -F $$BASE_VERS + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -F -e $$BASE_VERS -e $$TAG_VERS # Check to make sure the tests were excluded from the wheel. ! ( unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 tests/ ) # Make sure the py.typed marker file exists. @@ -547,15 +608,27 @@ PUBLISH_DEPS += build/pytest.${CONDA_ENV_NAME}.build-stamp PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest.tar.gz PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest.tar.gz PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest.tar.gz +PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl PUBLISH_DEPS += build/dist-test.$(PYTHON_VERSION).build-stamp PUBLISH_DEPS += build/check-doc.build-stamp PUBLISH_DEPS += build/linklint-doc.build-stamp build/publish.${CONDA_ENV_NAME}.%.py.build-stamp: $(PUBLISH_DEPS) + # Basic sanity checks on files about to be published. + # Run "make clean-dist && make dist" if these fail. + # Check the tar count. test `ls -1 mlos_core/dist/*.tar.gz | wc -l` -eq 1 test `ls -1 mlos_bench/dist/*.tar.gz | wc -l` -eq 1 test `ls -1 mlos_viz/dist/*.tar.gz | wc -l` -eq 1 test `ls -1 mlos_*/dist/*.tar.gz | wc -l` -eq 3 + # Check the whl count. + test `ls -1 mlos_core/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_bench/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_viz/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_*/dist/*.whl | wc -l` -eq 3 + # Publish the files to the specified repository. repo_name=`echo "$@" | sed -r -e 's|build/publish\.[^.]+\.||' -e 's|\.py\.build-stamp||'` \ && conda run -n ${CONDA_ENV_NAME} python3 -m twine upload --repository $$repo_name \ mlos_*/dist/mlos*-*.tar.gz mlos_*/dist/mlos*-*.whl @@ -723,7 +796,12 @@ clean-doc: .PHONY: clean-format clean-format: - # TODO: add black and isort rules + rm -f build/black.${CONDA_ENV_NAME}.build-stamp + rm -f build/black.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/licenseheaders.${CONDA_ENV_NAME}.build-stamp rm -f build/licenseheaders-prereqs.${CONDA_ENV_NAME}.build-stamp @@ -733,6 +811,13 @@ clean-check: rm -f build/pylint.${CONDA_ENV_NAME}.build-stamp rm -f build/pylint.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/mypy.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.build-stamp + rm -f build/black-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.build-stamp rm -f build/pycodestyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.mlos_*.${CONDA_ENV_NAME}.build-stamp diff --git a/conda-envs/mlos.yml b/conda-envs/mlos.yml index a257197761..5cd35fdbba 100644 --- a/conda-envs/mlos.yml +++ b/conda-envs/mlos.yml @@ -24,10 +24,10 @@ dependencies: # FIXME: https://github.com/microsoft/MLOS/issues/727 - python<3.12 - pip: - - autopep8>=1.7.0 - bump2version - check-jsonschema - isort + - docformatter - licenseheaders - mypy - pandas-stubs diff --git a/pyproject.toml b/pyproject.toml index 65f1e5a02c..f70030a576 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -line-length = 88 +line-length = 99 target-version = ["py38", "py39", "py310", "py311", "py312"] include = '\.pyi?$' @@ -7,3 +7,12 @@ include = '\.pyi?$' profile = "black" py_version = 311 src_paths = ["mlos_core", "mlos_bench", "mlos_viz"] + +[tool.docformatter] +recursive = true +black = true +style = "numpy" +pre-summary-newline = true +close-quotes-on-newline = true + +# TODO: move pylintrc and some setup.cfg configs here From 1b96ca2323fb01499a7cd65f37867ee5c76fb512 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 15 Jul 2024 20:53:44 +0000 Subject: [PATCH 092/121] selected reformats --- .../environments/base_environment.py | 167 ++++---- mlos_bench/mlos_bench/launcher.py | 357 +++++++++++------- .../mlos_bench/schedulers/base_scheduler.py | 177 +++++---- .../mlos_bench/schedulers/sync_scheduler.py | 16 +- .../mlos_bench/schedulers/trial_runner.py | 53 ++- .../mlos_bench/services/config_persistence.py | 351 ++++++++++------- mlos_bench/mlos_bench/storage/base_storage.py | 185 +++++---- .../mlos_bench/storage/base_trial_data.py | 71 ++-- mlos_bench/mlos_bench/storage/sql/common.py | 277 ++++++++------ mlos_bench/mlos_bench/storage/sql/schema.py | 87 +++-- .../mlos_bench/storage/sql/trial_data.py | 108 +++--- .../tests/launcher_parse_args_test.py | 211 ++++++----- 12 files changed, 1181 insertions(+), 879 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index d7281f725e..1b37d2b6c6 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -2,19 +2,28 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -A hierarchy of benchmark environments. -""" +"""A hierarchy of benchmark environments.""" import abc import json import logging from datetime import datetime from types import TracebackType -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type, TYPE_CHECKING, Union -from typing_extensions import Literal +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Sequence, + Tuple, + Type, + Union, +) from pytz import UTC +from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater @@ -32,9 +41,7 @@ class Environment(metaclass=abc.ABCMeta): # pylint: disable=too-many-instance-attributes - """ - An abstract base of all benchmark environments. - """ + """An abstract base of all benchmark environments.""" # Should be provided by the runtime. _COMMON_CONST_ARGS = { "trial_runner_id", @@ -45,15 +52,16 @@ class Environment(metaclass=abc.ABCMeta): } @classmethod - def new(cls, - *, - env_name: str, - class_name: str, - config: dict, - global_config: Optional[dict] = None, - tunables: Optional[TunableGroups] = None, - service: Optional[Service] = None, - ) -> "Environment": + def new( + cls, + *, + env_name: str, + class_name: str, + config: dict, + global_config: Optional[dict] = None, + tunables: Optional[TunableGroups] = None, + service: Optional[Service] = None, + ) -> "Environment": """ Factory method for a new environment with a given config. @@ -91,16 +99,18 @@ def new(cls, config=config, global_config=global_config, tunables=tunables, - service=service + service=service, ) - def __init__(self, - *, - name: str, - config: dict, - global_config: Optional[dict] = None, - tunables: Optional[TunableGroups] = None, - service: Optional[Service] = None): + def __init__( + self, + *, + name: str, + config: dict, + global_config: Optional[dict] = None, + tunables: Optional[TunableGroups] = None, + service: Optional[Service] = None, + ): """ Create a new environment with a given config. @@ -137,24 +147,29 @@ def __init__(self, self._const_args: Dict[str, TunableValue] = config.get("const_args", {}) if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Environment: '%s' Service: %s", name, - self._service.pprint() if self._service else None) + _LOG.debug( + "Environment: '%s' Service: %s", + name, + self._service.pprint() if self._service else None, + ) if tunables is None: - _LOG.warning("No tunables provided for %s. Tunable inheritance across composite environments may be broken.", name) + _LOG.warning( + "No tunables provided for %s. Tunable inheritance across composite environments may be broken.", + name, + ) tunables = TunableGroups() groups = self._expand_groups( - config.get("tunable_params", []), - global_config.get("tunable_params_map", {})) + config.get("tunable_params", []), global_config.get("tunable_params_map", {}) + ) _LOG.debug("Tunable groups for: '%s' :: %s", name, groups) self._tunable_params = tunables.subgroup(groups) # If a parameter comes from the tunables, do not require it in the const_args or globals - req_args = ( - set(config.get("required_args", [])) - - set(self._tunable_params.get_param_values().keys()) + req_args = set(config.get("required_args", [])) - set( + self._tunable_params.get_param_values().keys() ) req_args.update(self._COMMON_CONST_ARGS) merge_parameters(dest=self._const_args, source=global_config, required_keys=req_args) @@ -164,14 +179,12 @@ def __init__(self, _LOG.debug("Parameters for '%s' :: %s", name, self._params) if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Config for: '%s'\n%s", - name, json.dumps(self.config, indent=2)) + _LOG.debug("Config for: '%s'\n%s", name, json.dumps(self.config, indent=2)) def _validate_json_config(self, config: dict, name: str) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -183,8 +196,9 @@ def _validate_json_config(self, config: dict, name: str) -> None: ConfigSchema.ENVIRONMENT.validate(json_config) @staticmethod - def _expand_groups(groups: Iterable[str], - groups_exp: Dict[str, Union[str, Sequence[str]]]) -> List[str]: + def _expand_groups( + groups: Iterable[str], groups_exp: Dict[str, Union[str, Sequence[str]]] + ) -> List[str]: """ Expand `$tunable_group` into actual names of the tunable groups. @@ -206,7 +220,9 @@ def _expand_groups(groups: Iterable[str], if grp[:1] == "$": tunable_group_name = grp[1:] if tunable_group_name not in groups_exp: - raise KeyError(f"Expected tunable group name ${tunable_group_name} undefined in {groups_exp}") + raise KeyError( + f"Expected tunable group name ${tunable_group_name} undefined in {groups_exp}" + ) add_groups = groups_exp[tunable_group_name] res += [add_groups] if isinstance(add_groups, str) else add_groups else: @@ -214,10 +230,10 @@ def _expand_groups(groups: Iterable[str], return res @staticmethod - def _expand_vars(params: Dict[str, TunableValue], global_config: Dict[str, TunableValue]) -> dict: - """ - Expand `$var` into actual values of the variables. - """ + def _expand_vars( + params: Dict[str, TunableValue], global_config: Dict[str, TunableValue] + ) -> dict: + """Expand `$var` into actual values of the variables.""" return DictTemplater(params).expand_vars(extra_source_dict=global_config) @property @@ -225,10 +241,8 @@ def _config_loader_service(self) -> "SupportsConfigLoading": assert self._service is not None return self._service.config_loader_service - def __enter__(self) -> 'Environment': - """ - Enter the environment's benchmarking context. - """ + def __enter__(self) -> "Environment": + """Enter the environment's benchmarking context.""" _LOG.debug("Environment START :: %s", self) assert not self._in_context if self._service: @@ -236,12 +250,13 @@ def __enter__(self) -> 'Environment': self._in_context = True return self - def __exit__(self, ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the benchmarking environment. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the benchmarking environment.""" ex_throw = None if ex_val is None: _LOG.debug("Environment END :: %s", self) @@ -271,8 +286,8 @@ def __repr__(self) -> str: def pprint(self, indent: int = 4, level: int = 0) -> str: """ - Pretty-print the environment configuration. - For composite environments, print all children environments as well. + Pretty-print the environment configuration. For composite environments, print + all children environments as well. Parameters ---------- @@ -292,8 +307,8 @@ def pprint(self, indent: int = 4, level: int = 0) -> str: def _combine_tunables(self, tunables: TunableGroups) -> Dict[str, TunableValue]: """ Plug tunable values into the base config. If the tunable group is unknown, - ignore it (it might belong to another environment). This method should - never mutate the original config or the tunables. + ignore it (it might belong to another environment). This method should never + mutate the original config or the tunables. Parameters ---------- @@ -308,7 +323,8 @@ def _combine_tunables(self, tunables: TunableGroups) -> Dict[str, TunableValue]: """ return tunables.get_param_values( group_names=list(self._tunable_params.get_covariant_group_names()), - into_params=self._const_args.copy()) + into_params=self._const_args.copy(), + ) @property def tunable_params(self) -> TunableGroups: @@ -337,8 +353,9 @@ def const_args(self) -> Dict[str, TunableValue]: @property def parameters(self) -> Dict[str, TunableValue]: """ - Key/value pairs of all environment parameters (i.e., `const_args` and `tunable_params`). - Note that before `.setup()` is called, all tunables will be set to None. + Key/value pairs of all environment parameters (i.e., `const_args` and + `tunable_params`). Note that before `.setup()` is called, all tunables will be + set to None. Returns ------- @@ -350,8 +367,8 @@ def parameters(self) -> Dict[str, TunableValue]: def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) -> bool: """ Set up a new benchmark environment, if necessary. This method must be - idempotent, i.e., calling it several times in a row should be - equivalent to a single call. + idempotent, i.e., calling it several times in a row should be equivalent to a + single call. Parameters ---------- @@ -380,10 +397,15 @@ def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) - # (Derived classes still have to check `self._tunable_params.is_updated()`). is_updated = self._tunable_params.is_updated() if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Env '%s': Tunable groups reset = %s :: %s", self, is_updated, { - name: self._tunable_params.is_updated([name]) - for name in self._tunable_params.get_covariant_group_names() - }) + _LOG.debug( + "Env '%s': Tunable groups reset = %s :: %s", + self, + is_updated, + { + name: self._tunable_params.is_updated([name]) + for name in self._tunable_params.get_covariant_group_names() + }, + ) else: _LOG.info("Env '%s': Tunable groups reset = %s", self, is_updated) @@ -398,9 +420,10 @@ def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) - def teardown(self) -> None: """ - Tear down the benchmark environment. This method must be idempotent, - i.e., calling it several times in a row should be equivalent to a - single call. + Tear down the benchmark environment. + + This method must be idempotent, i.e., calling it several times in a row should + be equivalent to a single call. """ _LOG.info("Teardown %s", self) # Make sure we create a context before invoking setup/run/status/teardown diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index a86d630445..5f8fd975ab 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -3,8 +3,8 @@ # Licensed under the MIT License. # """ -A helper class to load the configuration files, parse the command line parameters, -and instantiate the main components of mlos_bench system. +A helper class to load the configuration files, parse the command line parameters, and +instantiate the main components of mlos_bench system. It is used in `mlos_bench.run` module to run the benchmark/optimizer from the command line. @@ -13,35 +13,27 @@ import argparse import logging import sys - from typing import Any, Dict, Iterable, List, Optional, Tuple from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater -from mlos_bench.util import try_parse_val - -from mlos_bench.tunables.tunable import TunableValue -from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.environments.base_environment import Environment -from mlos_bench.schedulers.trial_runner import TrialRunner - from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.optimizers.one_shot_optimizer import OneShotOptimizer - -from mlos_bench.storage.base_storage import Storage - +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.base_service import Service -from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService - -from mlos_bench.schedulers.base_scheduler import Scheduler - +from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.types.config_loader_type import SupportsConfigLoading - +from mlos_bench.storage.base_storage import Storage +from mlos_bench.tunables.tunable import TunableValue +from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import try_parse_val _LOG_LEVEL = logging.INFO -_LOG_FORMAT = '%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s' +_LOG_FORMAT = "%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s" logging.basicConfig(level=_LOG_LEVEL, format=_LOG_FORMAT) _LOG = logging.getLogger(__name__) @@ -49,9 +41,7 @@ class Launcher: # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - Command line launcher for mlos_bench and mlos_core. - """ + """Command line launcher for mlos_bench and mlos_core.""" def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements @@ -65,8 +55,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st For additional details, please see the website or the README.md files in the source tree: """ - parser = argparse.ArgumentParser(description=f"{description} : {long_text}", - epilog=epilog) + parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) (args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. @@ -104,12 +93,16 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st (args.config_path or []) + config.get("config_path", []), args_rest, # Prime the global config with the command line args and the config file. - {key: val for (key, val) in config.items() if key not in args_dict or args_dict[key] is None}, + { + key: val + for (key, val) in config.items() + if key not in args_dict or args_dict[key] is None + }, ) # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: - self.global_config['experiment_id'] = args.experiment_id + self.global_config["experiment_id"] = args.experiment_id # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line if args.trial_config_repeat_count: self.global_config["trial_config_repeat_count"] = args.trial_config_repeat_count @@ -117,10 +110,12 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st if args.num_trial_runners: self.global_config["num_trial_runners"] = args.num_trial_runners if self.global_config["num_trial_runners"] <= 0: - raise ValueError(f"Invalid num_trial_runners: {self.global_config['num_trial_runners']}") + raise ValueError( + f"Invalid num_trial_runners: {self.global_config['num_trial_runners']}" + ) # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. - self.global_config.setdefault('trial_id', 1) + self.global_config.setdefault("trial_id", 1) self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) @@ -128,13 +123,17 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # --service cli args should override the config file values. service_files: List[str] = config.get("services", []) + (args.service or []) assert isinstance(self._parent_service, SupportsConfigLoading) - self._parent_service = self._parent_service.load_services(service_files, self.global_config, self._parent_service) + self._parent_service = self._parent_service.load_services( + service_files, self.global_config, self._parent_service + ) env_path = args.environment or config.get("environment") if not env_path: _LOG.error("No environment config specified.") - parser.error("At least the Environment config must be specified." + - " Run `mlos_bench --help` and consult `README.md` for more info.") + parser.error( + "At least the Environment config must be specified." + + " Run `mlos_bench --help` and consult `README.md` for more info." + ) self.root_env_config = self._config_loader.resolve_path(env_path) self.trial_runners: List[TrialRunner] = [] @@ -143,10 +142,17 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st env_global_config = self.global_config.copy() env_global_config["trial_runner_id"] = trial_runner_id env = self._config_loader.load_environment( - self.root_env_config, TunableGroups(), env_global_config, service=self._parent_service) + self.root_env_config, + TunableGroups(), + env_global_config, + service=self._parent_service, + ) self.trial_runners.append(TrialRunner(trial_runner_id, env)) - _LOG.info("Init %d trial runners for environments: %s", - len(self.trial_runners), list(trial_runner.environment for trial_runner in self.trial_runners)) + _LOG.info( + "Init %d trial runners for environments: %s", + len(self.trial_runners), + list(trial_runner.environment for trial_runner in self.trial_runners), + ) # NOTE: Init tunable values *after* the Environment(s), but *before* the Optimizer # TODO: should we assign the same or different tunables for all TrialRunner Environments? @@ -154,7 +160,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.trial_runners[0].environment, args.random_init or config.get("random_init", False), config.get("random_seed") if args.random_seed is None else args.random_seed, - config.get("tunable_values", []) + (args.tunable_values or []) + config.get("tunable_values", []) + (args.tunable_values or []), ) _LOG.info("Init tunables: %s", self.tunables) @@ -164,111 +170,172 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage(args.storage or config.get("storage")) _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = bool(args.teardown) if args.teardown is not None else bool(config.get("teardown", True)) + self.teardown: bool = ( + bool(args.teardown) + if args.teardown is not None + else bool(config.get("teardown", True)) + ) self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) @property def config_loader(self) -> ConfigPersistenceService: - """ - Get the config loader service. - """ + """Get the config loader service.""" return self._config_loader @property def service(self) -> Service: - """ - Get the parent service. - """ + """Get the parent service.""" return self._parent_service @staticmethod - def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str]]: - """ - Parse the command line arguments. - """ + def _parse_args( + parser: argparse.ArgumentParser, argv: Optional[List[str]] + ) -> Tuple[argparse.Namespace, List[str]]: + """Parse the command line arguments.""" parser.add_argument( - '--config', required=False, - help='Main JSON5 configuration file. Its keys are the same as the' + - ' command line options and can be overridden by the latter.\n' + - '\n' + - ' See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ ' + - ' for additional config examples for this and other arguments.') + "--config", + required=False, + help="Main JSON5 configuration file. Its keys are the same as the" + + " command line options and can be overridden by the latter.\n" + + "\n" + + " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ " + + " for additional config examples for this and other arguments.", + ) parser.add_argument( - '--log_file', '--log-file', required=False, - help='Path to the log file. Use stdout if omitted.') + "--log_file", + "--log-file", + required=False, + help="Path to the log file. Use stdout if omitted.", + ) parser.add_argument( - '--log_level', '--log-level', required=False, type=str, - help=f'Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}.' + - ' Set to DEBUG for debug, WARNING for warnings only.') + "--log_level", + "--log-level", + required=False, + type=str, + help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}." + + " Set to DEBUG for debug, WARNING for warnings only.", + ) parser.add_argument( - '--config_path', '--config-path', '--config-paths', '--config_paths', - nargs="+", action='extend', required=False, - help='One or more locations of JSON config files.') + "--config_path", + "--config-path", + "--config-paths", + "--config_paths", + nargs="+", + action="extend", + required=False, + help="One or more locations of JSON config files.", + ) parser.add_argument( - '--service', '--services', - nargs='+', action='extend', required=False, - help='Path to JSON file with the configuration of the service(s) for environment(s) to use.') + "--service", + "--services", + nargs="+", + action="extend", + required=False, + help="Path to JSON file with the configuration of the service(s) for environment(s) to use.", + ) parser.add_argument( - '--environment', required=False, - help='Path to JSON file with the configuration of the benchmarking environment(s).') + "--environment", + required=False, + help="Path to JSON file with the configuration of the benchmarking environment(s).", + ) parser.add_argument( - '--optimizer', required=False, - help='Path to the optimizer configuration file. If omitted, run' + - ' a single trial with default (or specified in --tunable_values).') + "--optimizer", + required=False, + help="Path to the optimizer configuration file. If omitted, run" + + " a single trial with default (or specified in --tunable_values).", + ) parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, - help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + "--trial_config_repeat_count", + "--trial-config-repeat-count", + required=False, + type=int, + help="Number of times to repeat each config. Default is 1 trial per config, though more may be advised.", + ) parser.add_argument( - '--num_trial_runners', '--num-trial-runners', required=False, type=int, - help='Number of TrialRunners to use for executing benchmark Environments. ' - + 'Individual TrialRunners can be identified in configs with $trial_runner_id and optionally run in parallel.') + "--num_trial_runners", + "--num-trial-runners", + required=False, + type=int, + help="Number of TrialRunners to use for executing benchmark Environments. " + + "Individual TrialRunners can be identified in configs with $trial_runner_id and optionally run in parallel.", + ) parser.add_argument( - '--scheduler', required=False, - help='Path to the scheduler configuration file. By default, use' + - ' a single worker synchronous scheduler.') + "--scheduler", + required=False, + help="Path to the scheduler configuration file. By default, use" + + " a single worker synchronous scheduler.", + ) parser.add_argument( - '--storage', required=False, - help='Path to the storage configuration file.' + - ' If omitted, use the ephemeral in-memory SQL storage.') + "--storage", + required=False, + help="Path to the storage configuration file." + + " If omitted, use the ephemeral in-memory SQL storage.", + ) parser.add_argument( - '--random_init', '--random-init', required=False, default=False, - dest='random_init', action='store_true', - help='Initialize tunables with random values. (Before applying --tunable_values).') + "--random_init", + "--random-init", + required=False, + default=False, + dest="random_init", + action="store_true", + help="Initialize tunables with random values. (Before applying --tunable_values).", + ) parser.add_argument( - '--random_seed', '--random-seed', required=False, type=int, - help='Seed to use with --random_init') + "--random_seed", + "--random-seed", + required=False, + type=int, + help="Seed to use with --random_init", + ) parser.add_argument( - '--tunable_values', '--tunable-values', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain values of the tunable' + - ' parameters. This can be used for a single trial (when no --optimizer' + - ' is specified) or as default values for the first run in optimization.') + "--tunable_values", + "--tunable-values", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain values of the tunable" + + " parameters. This can be used for a single trial (when no --optimizer" + + " is specified) or as default values for the first run in optimization.", + ) parser.add_argument( - '--globals', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain additional' + - ' [private] parameters of the benchmarking environment.') + "--globals", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain additional" + + " [private] parameters of the benchmarking environment.", + ) parser.add_argument( - '--no_teardown', '--no-teardown', required=False, default=None, - dest='teardown', action='store_false', - help='Disable teardown of the environment after the benchmark.') + "--no_teardown", + "--no-teardown", + required=False, + default=None, + dest="teardown", + action="store_false", + help="Disable teardown of the environment after the benchmark.", + ) parser.add_argument( - '--experiment_id', '--experiment-id', required=False, default=None, + "--experiment_id", + "--experiment-id", + required=False, + default=None, help=""" Experiment ID to use for the benchmark. If omitted, the value from the --cli config or --globals is used. @@ -278,7 +345,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T changes are made to config files, scripts, versions, etc. This is left as a manual operation as detection of what is "incompatible" is not easily automatable across systems. - """ + """, ) # By default we use the command line arguments, but allow the caller to @@ -291,9 +358,7 @@ def _parse_args(parser: argparse.ArgumentParser, argv: Optional[List[str]]) -> T @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: - """ - Helper function to parse global key/value pairs from the command line. - """ + """Helper function to parse global key/value pairs from the command line.""" _LOG.debug("Extra args: %s", cmdline) config: Dict[str, TunableValue] = {} @@ -320,16 +385,17 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: _LOG.debug("Parsed config: %s", config) return config - def _load_config(self, - args_globals: Iterable[str], - config_path: Iterable[str], - args_rest: Iterable[str], - global_config: Dict[str, Any]) -> Dict[str, Any]: + def _load_config( + self, + args_globals: Iterable[str], + config_path: Iterable[str], + args_rest: Iterable[str], + global_config: Dict[str, Any], + ) -> Dict[str, Any]: + """Get key/value pairs of the global configuration parameters from the specified + config files (if any) and command line arguments. """ - Get key/value pairs of the global configuration parameters - from the specified config files (if any) and command line arguments. - """ - for config_file in (args_globals or []): + for config_file in args_globals or []: conf = self._config_loader.load_config(config_file, ConfigSchema.GLOBALS) assert isinstance(conf, dict) global_config.update(conf) @@ -338,19 +404,25 @@ def _load_config(self, global_config["config_path"] = config_path return global_config - def _init_tunable_values(self, env: Environment, random_init: bool, seed: Optional[int], - args_tunables: Optional[str]) -> TunableGroups: - """ - Initialize the tunables and load key/value pairs of the tunable values - from given JSON files, if specified. + def _init_tunable_values( + self, + env: Environment, + random_init: bool, + seed: Optional[int], + args_tunables: Optional[str], + ) -> TunableGroups: + """Initialize the tunables and load key/value pairs of the tunable values from + given JSON files, if specified. """ tunables = env.tunable_params _LOG.debug("Init tunables: default = %s", tunables) if random_init: tunables = MockOptimizer( - tunables=tunables, service=None, - config={"start_with_defaults": False, "seed": seed}).suggest() + tunables=tunables, + service=None, + config={"start_with_defaults": False, "seed": seed}, + ).suggest() _LOG.debug("Init tunables: random = %s", tunables) # TODO: should we assign the same or different tunables for all TrialRunner Environments? @@ -366,50 +438,62 @@ def _init_tunable_values(self, env: Environment, random_init: bool, seed: Option def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: """ - Instantiate the Optimizer object from JSON config file, if specified - in the --optimizer command line option. If config file not specified, - create a one-shot optimizer to run a single benchmark trial. + Instantiate the Optimizer object from JSON config file, if specified in the + --optimizer command line option. + + If config file not specified, create a one-shot optimizer to run a single + benchmark trial. """ if args_optimizer is None: # global_config may contain additional properties, so we need to # strip those out before instantiating the basic oneshot optimizer. - config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS} - return OneShotOptimizer( - self.tunables, config=config, service=self._parent_service) + config = { + key: val + for key, val in self.global_config.items() + if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS + } + return OneShotOptimizer(self.tunables, config=config, service=self._parent_service) class_config = self._config_loader.load_config(args_optimizer, ConfigSchema.OPTIMIZER) assert isinstance(class_config, Dict) - optimizer = self._config_loader.build_optimizer(tunables=self.tunables, - service=self._parent_service, - config=class_config, - global_config=self.global_config) + optimizer = self._config_loader.build_optimizer( + tunables=self.tunables, + service=self._parent_service, + config=class_config, + global_config=self.global_config, + ) return optimizer def _load_storage(self, args_storage: Optional[str]) -> Storage: """ - Instantiate the Storage object from JSON file provided in the --storage - command line parameter. If omitted, create an ephemeral in-memory SQL - storage instead. + Instantiate the Storage object from JSON file provided in the --storage command + line parameter. + + If omitted, create an ephemeral in-memory SQL storage instead. """ if args_storage is None: # pylint: disable=import-outside-toplevel from mlos_bench.storage.sql.storage import SqlStorage - return SqlStorage(service=self._parent_service, - config={ - "drivername": "sqlite", - "database": ":memory:", - "lazy_schema_create": True, - }) + + return SqlStorage( + service=self._parent_service, + config={ + "drivername": "sqlite", + "database": ":memory:", + "lazy_schema_create": True, + }, + ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, Dict) - storage = self._config_loader.build_storage(service=self._parent_service, - config=class_config, - global_config=self.global_config) + storage = self._config_loader.build_storage( + service=self._parent_service, config=class_config, global_config=self.global_config + ) return storage def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: """ Instantiate the Scheduler object from JSON file provided in the --scheduler command line parameter. + Create a simple synchronous single-threaded scheduler if omitted. """ # Set `teardown` for scheduler only to prevent conflicts with other configs. @@ -418,6 +502,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: if args_scheduler is None: # pylint: disable=import-outside-toplevel from mlos_bench.schedulers.sync_scheduler import SyncScheduler + return SyncScheduler( # All config values can be overridden from global config config={ diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 467fd9e02a..5bb819e724 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -2,26 +2,23 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base class for the optimization loop scheduling policies. -""" +"""Base class for the optimization loop scheduling policies.""" import json import logging -from datetime import datetime - from abc import ABCMeta, abstractmethod +from datetime import datetime from types import TracebackType -from typing import Any, Dict, List, Iterable, Optional, Tuple, Type -from typing_extensions import Literal +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type from pytz import UTC +from typing_extensions import Literal from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer +from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.util import merge_parameters _LOG = logging.getLogger(__name__) @@ -29,23 +26,23 @@ class Scheduler(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes - """ - Base class for the optimization loop scheduling policies. - """ - - def __init__(self, *, - config: Dict[str, Any], - global_config: Dict[str, Any], - trial_runners: List[TrialRunner], - optimizer: Optimizer, - storage: Storage, - root_env_config: str): - """ - Create a new instance of the scheduler. The constructor of this - and the derived classes is called by the persistence service - after reading the class JSON configuration. Other objects like - the TrialRunner(s) and their Environment(s) and Optimizer are - provided by the Launcher. + """Base class for the optimization loop scheduling policies.""" + + def __init__( + self, + *, + config: Dict[str, Any], + global_config: Dict[str, Any], + trial_runners: List[TrialRunner], + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + ): + """ + Create a new instance of the scheduler. The constructor of this and the derived + classes is called by the persistence service after reading the class JSON + configuration. Other objects like the TrialRunner(s) and their Environment(s) + and Optimizer are provided by the Launcher. Parameters ---------- @@ -63,8 +60,9 @@ def __init__(self, *, Path to the root Environment configuration. """ self.global_config = global_config - config = merge_parameters(dest=config.copy(), source=global_config, - required_keys=["experiment_id", "trial_id"]) + config = merge_parameters( + dest=config.copy(), source=global_config, required_keys=["experiment_id", "trial_id"] + ) self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) @@ -74,7 +72,9 @@ def __init__(self, *, self._trial_config_repeat_count = int(config.get("trial_config_repeat_count", 1)) if self._trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") + raise ValueError( + f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}" + ) self._do_teardown = bool(config.get("teardown", True)) @@ -96,7 +96,9 @@ def trial_config_repeat_count(self) -> int: @property def max_trials(self) -> int: - """Gets the maximum number of trials to run for a given config, or -1 for no limit.""" + """Gets the maximum number of trials to run for a given config, or -1 for no + limit. + """ return self._max_trials @property @@ -163,10 +165,8 @@ def __repr__(self) -> str: """ return self.__class__.__name__ - def __enter__(self) -> 'Scheduler': - """ - Enter the scheduler's context. - """ + def __enter__(self) -> "Scheduler": + """Enter the scheduler's context.""" _LOG.debug("Scheduler START :: %s", self) assert self.experiment is None self._optimizer.__enter__() @@ -184,13 +184,13 @@ def __enter__(self) -> 'Scheduler': ).__enter__() return self - def __exit__(self, - ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the scheduler. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the scheduler.""" if ex_val is None: _LOG.debug("Scheduler END :: %s", self) else: @@ -204,12 +204,14 @@ def __exit__(self, @abstractmethod def start(self) -> None: - """ - Start the scheduling loop. - """ + """Start the scheduling loop.""" assert self.experiment is not None - _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", - self._experiment, self.root_environment, self.optimizer) + _LOG.info( + "START: Experiment: %s Env: %s Optimizer: %s", + self._experiment, + self.root_environment, + self.optimizer, + ) if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", self.root_environment.pprint()) @@ -220,6 +222,7 @@ def start(self) -> None: def teardown(self) -> None: """ Tear down the TrialRunners/Environment(s). + Call it after the completion of the `.start()` in the scheduler context. """ assert self.experiment is not None @@ -229,17 +232,13 @@ def teardown(self) -> None: trial_runner.teardown() def get_best_observation(self) -> Tuple[Optional[Dict[str, float]], Optional[TunableGroups]]: - """ - Get the best observation from the optimizer. - """ + """Get the best observation from the optimizer.""" (best_score, best_config) = self.optimizer.get_best_observation() _LOG.info("Env: %s best score: %s", self.root_environment, best_score) return (best_score, best_config) def load_tunable_config(self, config_id: int) -> TunableGroups: - """ - Load the existing tunable configuration from the storage. - """ + """Load the existing tunable configuration from the storage.""" assert self.experiment is not None tunable_values = self.experiment.load_tunable_config(config_id) for environment in self.environments: @@ -251,9 +250,11 @@ def load_tunable_config(self, config_id: int) -> TunableGroups: def _schedule_new_optimizer_suggestions(self) -> bool: """ - Optimizer part of the loop. Load the results of the executed trials - into the optimizer, suggest new configurations, and add them to the queue. - Return True if optimization is not over, False otherwise. + Optimizer part of the loop. + + Load the results of the executed trials into the optimizer, suggest new + configurations, and add them to the queue. Return True if optimization is not + over, False otherwise. """ assert self.experiment is not None # FIXME: In async mode, trial_ids may be returned out of order, so we may @@ -271,39 +272,48 @@ def _schedule_new_optimizer_suggestions(self) -> bool: return not_done def schedule_trial(self, tunables: TunableGroups) -> None: - """ - Add a configuration to the queue of trials. - """ + """Add a configuration to the queue of trials.""" # TODO: Alternative scheduling policies may prefer to expand repeats over # time as well as space, or adjust the number of repeats (budget) of a given # trial based on whether initial results are promising. for repeat_i in range(1, self._trial_config_repeat_count + 1): - self._add_trial_to_queue(tunables, config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - "optimizer": self.optimizer.name, - "repeat_i": repeat_i, - "trial_runner_id": self._trial_runners[self._current_trial_runner_idx].trial_runner_id, - "is_defaults": tunables.is_defaults(), - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(self.optimizer.targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - } - }) + self._add_trial_to_queue( + tunables, + config={ + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + "optimizer": self.optimizer.name, + "repeat_i": repeat_i, + "trial_runner_id": self._trial_runners[ + self._current_trial_runner_idx + ].trial_runner_id, + "is_defaults": tunables.is_defaults(), + **{ + f"opt_{key}_{i}": val + for (i, opt_target) in enumerate(self.optimizer.targets.items()) + for (key, val) in zip(["target", "direction"], opt_target) + }, + }, + ) # Rotate which TrialRunner the Trial is assigned to. - self._current_trial_runner_idx = (self._current_trial_runner_idx + 1) % len(self._trial_runners) + self._current_trial_runner_idx = (self._current_trial_runner_idx + 1) % len( + self._trial_runners + ) - def _add_trial_to_queue(self, tunables: TunableGroups, - ts_start: Optional[datetime] = None, - config: Optional[Dict[str, Any]] = None) -> None: + def _add_trial_to_queue( + self, + tunables: TunableGroups, + ts_start: Optional[datetime] = None, + config: Optional[Dict[str, Any]] = None, + ) -> None: """ Add a configuration to the queue of trials in the Storage backend. + A wrapper for the `Experiment.new_trial` method. """ assert self.experiment is not None @@ -312,7 +322,9 @@ def _add_trial_to_queue(self, tunables: TunableGroups, def _run_schedule(self, running: bool = False) -> None: """ - Scheduler part of the loop. Check for pending trials in the queue and run them. + Scheduler part of the loop. + + Check for pending trials in the queue and run them. """ assert self.experiment is not None for trial in self.experiment.pending_trials(datetime.now(UTC), running=running): @@ -321,6 +333,7 @@ def _run_schedule(self, running: bool = False) -> None: def not_done(self) -> bool: """ Check the stopping conditions. + By default, stop when the optimizer converges or max limit of trials reached. """ return self.optimizer.not_converged() and ( @@ -330,7 +343,9 @@ def not_done(self) -> bool: @abstractmethod def run_trial(self, trial: Storage.Trial) -> None: """ - Set up and run a single trial. Save the results in the storage. + Set up and run a single trial. + + Save the results in the storage. """ assert self.experiment is not None self._trial_count += 1 diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 867f4f9f0d..4b864942dc 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -2,9 +2,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -A simple single-threaded synchronous optimization loop implementation. -""" +"""A simple single-threaded synchronous optimization loop implementation.""" import logging @@ -15,14 +13,10 @@ class SyncScheduler(Scheduler): - """ - A simple single-threaded synchronous optimization loop implementation. - """ + """A simple single-threaded synchronous optimization loop implementation.""" def start(self) -> None: - """ - Start the optimization loop. - """ + """Start the optimization loop.""" super().start() is_warm_up = self.optimizer.supports_preload @@ -38,7 +32,9 @@ def start(self) -> None: def run_trial(self, trial: Storage.Trial) -> None: """ - Set up and run a single trial. Save the results in the storage. + Set up and run a single trial. + + Save the results in the storage. """ super().run_trial(trial) # In the sync scheduler we run each trial on its own TrialRunner in sequence. diff --git a/mlos_bench/mlos_bench/schedulers/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py index b1d93f982f..a6b16f5ead 100644 --- a/mlos_bench/mlos_bench/schedulers/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -2,23 +2,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Simple class to run an individual Trial on a given Environment. -""" +"""Simple class to run an individual Trial on a given Environment.""" +import logging +from datetime import datetime from types import TracebackType from typing import Any, Dict, Literal, Optional, Type -from datetime import datetime -import logging - from pytz import UTC from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.status import Status -from mlos_bench.storage.base_storage import Storage from mlos_bench.event_loop_context import EventLoopContext - +from mlos_bench.storage.base_storage import Storage _LOG = logging.getLogger(__name__) @@ -44,16 +40,12 @@ def __init__(self, trial_runner_id: int, env: Environment) -> None: @property def trial_runner_id(self) -> int: - """ - Get the TrialRunner's id. - """ + """Get the TrialRunner's id.""" return self._trial_runner_id @property def environment(self) -> Environment: - """ - Get the Environment. - """ + """Get the Environment.""" return self._env def __enter__(self) -> "TrialRunner": @@ -64,10 +56,12 @@ def __enter__(self) -> "TrialRunner": self._in_context = True return self - def __exit__(self, - ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: assert self._in_context _LOG.debug("TrialRunner END :: %s", self) self._env.__exit__(ex_type, ex_val, ex_tb) @@ -80,12 +74,12 @@ def is_running(self) -> bool: """Get the running state of the current TrialRunner.""" return self._is_running - def run_trial(self, - trial: Storage.Trial, - global_config: Optional[Dict[str, Any]] = None) -> None: + def run_trial( + self, trial: Storage.Trial, global_config: Optional[Dict[str, Any]] = None + ) -> None: """ - Run a single trial on this TrialRunner's Environment and stores the results - in the backend Trial Storage. + Run a single trial on this TrialRunner's Environment and stores the results in + the backend Trial Storage. Parameters ---------- @@ -104,8 +98,9 @@ def run_trial(self, assert not self._is_running self._is_running = True - assert trial.trial_runner_id == self.trial_runner_id, \ - f"TrialRunner {self} should not run trial {trial} with different trial_runner_id {trial.trial_runner_id}." + assert ( + trial.trial_runner_id == self.trial_runner_id + ), f"TrialRunner {self} should not run trial {trial} with different trial_runner_id {trial.trial_runner_id}." if not self.environment.setup(trial.tunables, trial.config(global_config)): _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) @@ -116,7 +111,9 @@ def run_trial(self, # TODO: start background status polling of the environments in the event loop. - (status, timestamp, results) = self.environment.run() # Block and wait for the final result. + (status, timestamp, results) = ( + self.environment.run() + ) # Block and wait for the final result. _LOG.info("TrialRunner Results: %s :: %s\n%s", trial.tunables, status, results) # In async mode (TODO), poll the environment for status and telemetry @@ -135,7 +132,9 @@ def run_trial(self, def teardown(self) -> None: """ Tear down the Environment. - Call it after the completion of one (or more) `.run()` in the TrialRunner context. + + Call it after the completion of one (or more) `.run()` in the TrialRunner + context. """ assert self._in_context self._env.teardown() diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index adb6e825aa..cbfbf2df17 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -2,22 +2,28 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Helper functions to load, instantiate, and serialize Python objects -that encapsulate benchmark environments, tunable parameters, and -service functions. +"""Helper functions to load, instantiate, and serialize Python objects that encapsulate +benchmark environments, tunable parameters, and service functions. """ +import json # For logging only +import logging import os import sys - -import json # For logging only -import logging - -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, TYPE_CHECKING - -import json5 # To read configs with comments and other JSON5 syntax features -from jsonschema import ValidationError, SchemaError +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) + +import json5 # To read configs with comments and other JSON5 syntax features +from jsonschema import SchemaError, ValidationError from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment @@ -26,7 +32,12 @@ from mlos_bench.services.types.config_loader_type import SupportsConfigLoading from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.util import instantiate_from_config, merge_parameters, path_join, preprocess_dynamic_configs +from mlos_bench.util import ( + instantiate_from_config, + merge_parameters, + path_join, + preprocess_dynamic_configs, +) if sys.version_info < (3, 10): from importlib_resources import files @@ -34,26 +45,28 @@ from importlib.resources import files if TYPE_CHECKING: - from mlos_bench.storage.base_storage import Storage from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner + from mlos_bench.storage.base_storage import Storage _LOG = logging.getLogger(__name__) class ConfigPersistenceService(Service, SupportsConfigLoading): - """ - Collection of methods to deserialize the Environment, Service, and TunableGroups objects. + """Collection of methods to deserialize the Environment, Service, and TunableGroups + objects. """ BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/") - def __init__(self, - config: Optional[Dict[str, Any]] = None, - global_config: Optional[Dict[str, Any]] = None, - parent: Optional[Service] = None, - methods: Union[Dict[str, Callable], List[Callable], None] = None): + def __init__( + self, + config: Optional[Dict[str, Any]] = None, + global_config: Optional[Dict[str, Any]] = None, + parent: Optional[Service] = None, + methods: Union[Dict[str, Callable], List[Callable], None] = None, + ): """ Create a new instance of config persistence service. @@ -70,17 +83,22 @@ def __init__(self, New methods to register with the service. """ super().__init__( - config, global_config, parent, - self.merge_methods(methods, [ - self.resolve_path, - self.load_config, - self.prepare_class_load, - self.build_service, - self.build_environment, - self.load_services, - self.load_environment, - self.load_environment_list, - ]) + config, + global_config, + parent, + self.merge_methods( + methods, + [ + self.resolve_path, + self.load_config, + self.prepare_class_load, + self.build_service, + self.build_environment, + self.load_services, + self.load_environment, + self.load_environment_list, + ], + ), ) self._config_loader_service = self @@ -108,11 +126,10 @@ def config_paths(self) -> List[str]: """ return list(self._config_path) # make a copy to avoid modifications - def resolve_path(self, file_path: str, - extra_paths: Optional[Iterable[str]] = None) -> str: + def resolve_path(self, file_path: str, extra_paths: Optional[Iterable[str]] = None) -> str: """ - Prepend the suitable `_config_path` to `path` if the latter is not absolute. - If `_config_path` is `None` or `path` is absolute, return `path` as is. + Prepend the suitable `_config_path` to `path` if the latter is not absolute. If + `_config_path` is `None` or `path` is absolute, return `path` as is. Parameters ---------- @@ -139,14 +156,14 @@ def resolve_path(self, file_path: str, _LOG.debug("Path not resolved: %s", file_path) return file_path - def load_config(self, - json_file_name: str, - schema_type: Optional[ConfigSchema], - ) -> Dict[str, Any]: + def load_config( + self, + json_file_name: str, + schema_type: Optional[ConfigSchema], + ) -> Dict[str, Any]: """ - Load JSON config file. Search for a file relative to `_config_path` - if the input path is not absolute. - This method is exported to be used as a service. + Load JSON config file. Search for a file relative to `_config_path` if the input + path is not absolute. This method is exported to be used as a service. Parameters ---------- @@ -162,16 +179,22 @@ def load_config(self, """ json_file_name = self.resolve_path(json_file_name) _LOG.info("Load config: %s", json_file_name) - with open(json_file_name, mode='r', encoding='utf-8') as fh_json: + with open(json_file_name, mode="r", encoding="utf-8") as fh_json: config = json5.load(fh_json) if schema_type is not None: try: schema_type.validate(config) except (ValidationError, SchemaError) as ex: - _LOG.error("Failed to validate config %s against schema type %s at %s", - json_file_name, schema_type.name, schema_type.value) - raise ValueError(f"Failed to validate config {json_file_name} against " + - f"schema type {schema_type.name} at {schema_type.value}") from ex + _LOG.error( + "Failed to validate config %s against schema type %s at %s", + json_file_name, + schema_type.name, + schema_type.value, + ) + raise ValueError( + f"Failed to validate config {json_file_name} against " + + f"schema type {schema_type.name} at {schema_type.value}" + ) from ex if isinstance(config, dict) and config.get("$schema"): # Remove $schema attributes from the config after we've validated # them to avoid passing them on to other objects @@ -182,15 +205,17 @@ def load_config(self, del config["$schema"] else: _LOG.warning("Config %s is not validated against a schema.", json_file_name) - return config # type: ignore[no-any-return] + return config # type: ignore[no-any-return] - def prepare_class_load(self, config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None, - parent_args: Optional[Dict[str, TunableValue]] = None) -> Tuple[str, Dict[str, Any]]: + def prepare_class_load( + self, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None, + parent_args: Optional[Dict[str, TunableValue]] = None, + ) -> Tuple[str, Dict[str, Any]]: """ - Extract the class instantiation parameters from the configuration. - Mix-in the global parameters and resolve the local file system paths, - where it is required. + Extract the class instantiation parameters from the configuration. Mix-in the + global parameters and resolve the local file system paths, where it is required. Parameters ---------- @@ -229,19 +254,24 @@ def prepare_class_load(self, config: Dict[str, Any], raise ValueError(f"Parameter {key} must be a string or a list") if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Instantiating: %s with config:\n%s", - class_name, json.dumps(class_config, indent=2)) + _LOG.debug( + "Instantiating: %s with config:\n%s", + class_name, + json.dumps(class_config, indent=2), + ) return (class_name, class_config) - def build_optimizer(self, *, - tunables: TunableGroups, - service: Service, - config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None) -> Optimizer: + def build_optimizer( + self, + *, + tunables: TunableGroups, + service: Service, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None, + ) -> Optimizer: """ - Instantiation of mlos_bench Optimizer - that depend on Service and TunableGroups. + Instantiation of mlos_bench Optimizer that depend on Service and TunableGroups. A class *MUST* have a constructor that takes four named arguments: (tunables, config, global_config, service) @@ -267,18 +297,24 @@ def build_optimizer(self, *, if tunables_path is not None: tunables = self._load_tunables(tunables_path, tunables) (class_name, class_config) = self.prepare_class_load(config, global_config) - inst = instantiate_from_config(Optimizer, class_name, # type: ignore[type-abstract] - tunables=tunables, - config=class_config, - global_config=global_config, - service=service) + inst = instantiate_from_config( + Optimizer, + class_name, # type: ignore[type-abstract] + tunables=tunables, + config=class_config, + global_config=global_config, + service=service, + ) _LOG.info("Created: Optimizer %s", inst) return inst - def build_storage(self, *, - service: Service, - config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None) -> "Storage": + def build_storage( + self, + *, + service: Service, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None, + ) -> "Storage": """ Instantiation of mlos_bench Storage objects. @@ -297,21 +333,30 @@ def build_storage(self, *, A new instance of the Storage class. """ (class_name, class_config) = self.prepare_class_load(config, global_config) - from mlos_bench.storage.base_storage import Storage # pylint: disable=import-outside-toplevel - inst = instantiate_from_config(Storage, class_name, # type: ignore[type-abstract] - config=class_config, - global_config=global_config, - service=service) + from mlos_bench.storage.base_storage import ( + Storage, # pylint: disable=import-outside-toplevel + ) + + inst = instantiate_from_config( + Storage, + class_name, # type: ignore[type-abstract] + config=class_config, + global_config=global_config, + service=service, + ) _LOG.info("Created: Storage %s", inst) return inst - def build_scheduler(self, *, - config: Dict[str, Any], - global_config: Dict[str, Any], - trial_runners: List["TrialRunner"], - optimizer: Optimizer, - storage: "Storage", - root_env_config: str) -> "Scheduler": + def build_scheduler( + self, + *, + config: Dict[str, Any], + global_config: Dict[str, Any], + trial_runners: List["TrialRunner"], + optimizer: Optimizer, + storage: "Storage", + root_env_config: str, + ) -> "Scheduler": """ Instantiation of mlos_bench Scheduler. @@ -336,23 +381,31 @@ def build_scheduler(self, *, A new instance of the Scheduler. """ (class_name, class_config) = self.prepare_class_load(config, global_config) - from mlos_bench.schedulers.base_scheduler import Scheduler # pylint: disable=import-outside-toplevel - inst = instantiate_from_config(Scheduler, class_name, # type: ignore[type-abstract] - config=class_config, - global_config=global_config, - trial_runners=trial_runners, - optimizer=optimizer, - storage=storage, - root_env_config=root_env_config) + from mlos_bench.schedulers.base_scheduler import ( + Scheduler, # pylint: disable=import-outside-toplevel + ) + + inst = instantiate_from_config( + Scheduler, + class_name, # type: ignore[type-abstract] + config=class_config, + global_config=global_config, + trial_runners=trial_runners, + optimizer=optimizer, + storage=storage, + root_env_config=root_env_config, + ) _LOG.info("Created: Scheduler %s", inst) return inst - def build_environment(self, # pylint: disable=too-many-arguments - config: Dict[str, Any], - tunables: TunableGroups, - global_config: Optional[Dict[str, Any]] = None, - parent_args: Optional[Dict[str, TunableValue]] = None, - service: Optional[Service] = None) -> Environment: + def build_environment( + self, # pylint: disable=too-many-arguments + config: Dict[str, Any], + tunables: TunableGroups, + global_config: Optional[Dict[str, Any]] = None, + parent_args: Optional[Dict[str, TunableValue]] = None, + service: Optional[Service] = None, + ) -> Environment: """ Factory method for a new environment with a given config. @@ -392,16 +445,24 @@ def build_environment(self, # pylint: disable=too-many-arguments tunables = self._load_tunables(env_tunables_path, tunables) _LOG.debug("Creating env: %s :: %s", env_name, env_class) - env = Environment.new(env_name=env_name, class_name=env_class, - config=env_config, global_config=global_config, - tunables=tunables, service=service) + env = Environment.new( + env_name=env_name, + class_name=env_class, + config=env_config, + global_config=global_config, + tunables=tunables, + service=service, + ) _LOG.info("Created env: %s :: %s", env_name, env) return env - def _build_standalone_service(self, config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None, - parent: Optional[Service] = None) -> Service: + def _build_standalone_service( + self, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None, + parent: Optional[Service] = None, + ) -> Service: """ Factory method for a new service with a given config. @@ -426,9 +487,12 @@ def _build_standalone_service(self, config: Dict[str, Any], _LOG.info("Created service: %s", service) return service - def _build_composite_service(self, config_list: Iterable[Dict[str, Any]], - global_config: Optional[Dict[str, Any]] = None, - parent: Optional[Service] = None) -> Service: + def _build_composite_service( + self, + config_list: Iterable[Dict[str, Any]], + global_config: Optional[Dict[str, Any]] = None, + parent: Optional[Service] = None, + ) -> Service: """ Factory method for a new service with a given config. @@ -454,18 +518,21 @@ def _build_composite_service(self, config_list: Iterable[Dict[str, Any]], service.register(parent.export()) for config in config_list: - service.register(self._build_standalone_service( - config, global_config, service).export()) + service.register( + self._build_standalone_service(config, global_config, service).export() + ) if _LOG.isEnabledFor(logging.DEBUG): _LOG.debug("Created mix-in service: %s", service) return service - def build_service(self, - config: Dict[str, Any], - global_config: Optional[Dict[str, Any]] = None, - parent: Optional[Service] = None) -> Service: + def build_service( + self, + config: Dict[str, Any], + global_config: Optional[Dict[str, Any]] = None, + parent: Optional[Service] = None, + ) -> Service: """ Factory method for a new service with a given config. @@ -487,8 +554,7 @@ def build_service(self, services from the list plus the parent mix-in. """ if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("Build service from config:\n%s", - json.dumps(config, indent=2)) + _LOG.debug("Build service from config:\n%s", json.dumps(config, indent=2)) assert isinstance(config, dict) config_list: List[Dict[str, Any]] @@ -503,12 +569,14 @@ def build_service(self, return self._build_composite_service(config_list, global_config, parent) - def load_environment(self, # pylint: disable=too-many-arguments - json_file_name: str, - tunables: TunableGroups, - global_config: Optional[Dict[str, Any]] = None, - parent_args: Optional[Dict[str, TunableValue]] = None, - service: Optional[Service] = None) -> Environment: + def load_environment( + self, # pylint: disable=too-many-arguments + json_file_name: str, + tunables: TunableGroups, + global_config: Optional[Dict[str, Any]] = None, + parent_args: Optional[Dict[str, TunableValue]] = None, + service: Optional[Service] = None, + ) -> Environment: """ Load and build new environment from the config file. @@ -535,12 +603,14 @@ def load_environment(self, # pylint: disable=too-many-arguments assert isinstance(config, dict) return self.build_environment(config, tunables, global_config, parent_args, service) - def load_environment_list(self, # pylint: disable=too-many-arguments - json_file_name: str, - tunables: TunableGroups, - global_config: Optional[Dict[str, Any]] = None, - parent_args: Optional[Dict[str, TunableValue]] = None, - service: Optional[Service] = None) -> List[Environment]: + def load_environment_list( + self, # pylint: disable=too-many-arguments + json_file_name: str, + tunables: TunableGroups, + global_config: Optional[Dict[str, Any]] = None, + parent_args: Optional[Dict[str, TunableValue]] = None, + service: Optional[Service] = None, + ) -> List[Environment]: """ Load and build a list of environments from the config file. @@ -565,16 +635,17 @@ def load_environment_list(self, # pylint: disable=too-many-arguments A list of new benchmarking environments. """ config = self.load_config(json_file_name, ConfigSchema.ENVIRONMENT) - return [ - self.build_environment(config, tunables, global_config, parent_args, service) - ] + return [self.build_environment(config, tunables, global_config, parent_args, service)] - def load_services(self, json_file_names: Iterable[str], - global_config: Optional[Dict[str, Any]] = None, - parent: Optional[Service] = None) -> Service: + def load_services( + self, + json_file_names: Iterable[str], + global_config: Optional[Dict[str, Any]] = None, + parent: Optional[Service] = None, + ) -> Service: """ - Read the configuration files and bundle all service methods - from those configs into a single Service object. + Read the configuration files and bundle all service methods from those configs + into a single Service object. Parameters ---------- @@ -590,16 +661,16 @@ def load_services(self, json_file_names: Iterable[str], service : Service A collection of service methods. """ - _LOG.info("Load services: %s parent: %s", - json_file_names, parent.__class__.__name__) + _LOG.info("Load services: %s parent: %s", json_file_names, parent.__class__.__name__) service = Service({}, global_config, parent) for fname in json_file_names: config = self.load_config(fname, ConfigSchema.SERVICE) service.register(self.build_service(config, global_config, service).export()) return service - def _load_tunables(self, json_file_names: Iterable[str], - parent: TunableGroups) -> TunableGroups: + def _load_tunables( + self, json_file_names: Iterable[str], parent: TunableGroups + ) -> TunableGroups: """ Load a collection of tunable parameters from JSON files into the parent TunableGroup. diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 5d275010f0..9dfd15f236 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -2,15 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base interface for saving and restoring the benchmark data. -""" +"""Base interface for saving and restoring the benchmark data.""" import logging from abc import ABCMeta, abstractmethod from datetime import datetime from types import TracebackType -from typing import Optional, List, Tuple, Dict, Iterator, Type, Any +from typing import Any, Dict, Iterator, List, Optional, Tuple, Type + from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema @@ -24,15 +23,16 @@ class Storage(metaclass=ABCMeta): - """ - An abstract interface between the benchmarking framework - and storage systems (e.g., SQLite or MLFLow). + """An abstract interface between the benchmarking framework and storage systems + (e.g., SQLite or MLFLow). """ - def __init__(self, - config: Dict[str, Any], - global_config: Optional[dict] = None, - service: Optional[Service] = None): + def __init__( + self, + config: Dict[str, Any], + global_config: Optional[dict] = None, + service: Optional[Service] = None, + ): """ Create a new storage object. @@ -48,10 +48,9 @@ def __init__(self, self._global_config = global_config or {} def _validate_json_config(self, config: dict) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -73,13 +72,16 @@ def experiments(self) -> Dict[str, ExperimentData]: """ @abstractmethod - def experiment(self, *, - experiment_id: str, - trial_id: int, - root_env_config: str, - description: str, - tunables: TunableGroups, - opt_targets: Dict[str, Literal['min', 'max']]) -> 'Storage.Experiment': + def experiment( + self, + *, + experiment_id: str, + trial_id: int, + root_env_config: str, + description: str, + tunables: TunableGroups, + opt_targets: Dict[str, Literal["min", "max"]], + ) -> "Storage.Experiment": """ Create a new experiment in the storage. @@ -112,26 +114,31 @@ class Experiment(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes """ Base interface for storing the results of the experiment. + This class is instantiated in the `Storage.experiment()` method. """ - def __init__(self, - *, - tunables: TunableGroups, - experiment_id: str, - trial_id: int, - root_env_config: str, - description: str, - opt_targets: Dict[str, Literal['min', 'max']]): + def __init__( + self, + *, + tunables: TunableGroups, + experiment_id: str, + trial_id: int, + root_env_config: str, + description: str, + opt_targets: Dict[str, Literal["min", "max"]], + ): self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id - (self._git_repo, self._git_commit, self._root_env_config) = get_git_info(root_env_config) + (self._git_repo, self._git_commit, self._root_env_config) = get_git_info( + root_env_config + ) self._description = description self._opt_targets = opt_targets self._in_context = False - def __enter__(self) -> 'Storage.Experiment': + def __enter__(self) -> "Storage.Experiment": """ Enter the context of the experiment. @@ -143,9 +150,12 @@ def __enter__(self) -> 'Storage.Experiment': self._in_context = True return self - def __exit__(self, exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType]) -> Literal[False]: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Literal[False]: """ End the context of the experiment. @@ -156,8 +166,9 @@ def __exit__(self, exc_type: Optional[Type[BaseException]], _LOG.debug("Finishing experiment: %s", self) else: assert exc_type and exc_val - _LOG.warning("Finishing experiment: %s", self, - exc_info=(exc_type, exc_val, exc_tb)) + _LOG.warning( + "Finishing experiment: %s", self, exc_info=(exc_type, exc_val, exc_tb) + ) assert self._in_context self._teardown(is_ok) self._in_context = False @@ -168,7 +179,8 @@ def __repr__(self) -> str: def _setup(self) -> None: """ - Create a record of the new experiment or find an existing one in the storage. + Create a record of the new experiment or find an existing one in the + storage. This method is called by `Storage.Experiment.__enter__()`. """ @@ -187,36 +199,34 @@ def _teardown(self, is_ok: bool) -> None: @property def experiment_id(self) -> str: - """Get the Experiment's ID""" + """Get the Experiment's ID.""" return self._experiment_id @property def trial_id(self) -> int: - """Get the current Trial ID""" + """Get the current Trial ID.""" return self._trial_id @property def description(self) -> str: - """Get the Experiment's description""" + """Get the Experiment's description.""" return self._description @property def tunables(self) -> TunableGroups: - """Get the Experiment's tunables""" + """Get the Experiment's tunables.""" return self._tunables @property def opt_targets(self) -> Dict[str, Literal["min", "max"]]: - """ - Get the Experiment's optimization targets and directions - """ + """Get the Experiment's optimization targets and directions.""" return self._opt_targets @abstractmethod def merge(self, experiment_ids: List[str]) -> None: """ - Merge in the results of other (compatible) experiments trials. - Used to help warm up the optimizer for this experiment. + Merge in the results of other (compatible) experiments trials. Used to help + warm up the optimizer for this experiment. Parameters ---------- @@ -226,9 +236,7 @@ def merge(self, experiment_ids: List[str]) -> None: @abstractmethod def load_tunable_config(self, config_id: int) -> Dict[str, Any]: - """ - Load tunable values for a given config ID. - """ + """Load tunable values for a given config ID.""" @abstractmethod def load_telemetry(self, trial_id: int) -> List[Tuple[datetime, str, Any]]: @@ -247,8 +255,10 @@ def load_telemetry(self, trial_id: int) -> List[Tuple[datetime, str, Any]]: """ @abstractmethod - def load(self, last_trial_id: int = -1, - ) -> Tuple[List[int], List[dict], List[Optional[Dict[str, Any]]], List[Status]]: + def load( + self, + last_trial_id: int = -1, + ) -> Tuple[List[int], List[dict], List[Optional[Dict[str, Any]]], List[Status]]: """ Load (tunable values, benchmark scores, status) to warm-up the optimizer. @@ -268,10 +278,12 @@ def load(self, last_trial_id: int = -1, """ @abstractmethod - def pending_trials(self, timestamp: datetime, *, running: bool) -> Iterator['Storage.Trial']: + def pending_trials( + self, timestamp: datetime, *, running: bool + ) -> Iterator["Storage.Trial"]: """ - Return an iterator over the pending trials that are scheduled to run - on or before the specified timestamp. + Return an iterator over the pending trials that are scheduled to run on or + before the specified timestamp. Parameters ---------- @@ -288,8 +300,12 @@ def pending_trials(self, timestamp: datetime, *, running: bool) -> Iterator['Sto """ @abstractmethod - def new_trial(self, tunables: TunableGroups, ts_start: Optional[datetime] = None, - config: Optional[Dict[str, Any]] = None) -> 'Storage.Trial': + def new_trial( + self, + tunables: TunableGroups, + ts_start: Optional[datetime] = None, + config: Optional[Dict[str, Any]] = None, + ) -> "Storage.Trial": """ Create a new experiment run in the storage. @@ -313,13 +329,20 @@ class Trial(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes """ Base interface for storing the results of a single run of the experiment. + This class is instantiated in the `Storage.Experiment.trial()` method. """ - def __init__(self, *, - tunables: TunableGroups, experiment_id: str, trial_id: int, - tunable_config_id: int, opt_targets: Dict[str, Literal['min', 'max']], - config: Optional[Dict[str, Any]] = None): + def __init__( + self, + *, + tunables: TunableGroups, + experiment_id: str, + trial_id: int, + tunable_config_id: int, + opt_targets: Dict[str, Literal["min", "max"]], + config: Optional[Dict[str, Any]] = None, + ): self._tunables = tunables self._experiment_id = experiment_id self._trial_id = trial_id @@ -332,36 +355,28 @@ def __repr__(self) -> str: @property def trial_id(self) -> int: - """ - ID of the current trial. - """ + """ID of the current trial.""" return self._trial_id @property def tunable_config_id(self) -> int: - """ - ID of the current trial (tunable) configuration. - """ + """ID of the current trial (tunable) configuration.""" return self._tunable_config_id @property def trial_runner_id(self) -> Optional[int]: - """ - ID of the TrialRunner this trial is assigned to. - """ + """ID of the TrialRunner this trial is assigned to.""" return self._config.get("trial_runner_id") @property def opt_targets(self) -> Dict[str, Literal["min", "max"]]: - """ - Get the Trial's optimization targets and directions. - """ + """Get the Trial's optimization targets and directions.""" return self._opt_targets @property def tunables(self) -> TunableGroups: """ - Tunable parameters of the current trial + Tunable parameters of the current trial. (e.g., application Environment's "config") """ @@ -369,8 +384,8 @@ def tunables(self) -> TunableGroups: def config(self, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ - Produce a copy of the global configuration updated - with the parameters of the current trial. + Produce a copy of the global configuration updated with the parameters of + the current trial. Note: this is not the target Environment's "config" (i.e., tunable params), but rather the internal "config" which consists of a @@ -387,9 +402,9 @@ def config(self, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, An return config @abstractmethod - def update(self, status: Status, timestamp: datetime, - metrics: Optional[Dict[str, Any]] = None - ) -> Optional[Dict[str, Any]]: + def update( + self, status: Status, timestamp: datetime, metrics: Optional[Dict[str, Any]] = None + ) -> Optional[Dict[str, Any]]: """ Update the storage with the results of the experiment. @@ -413,14 +428,18 @@ def update(self, status: Status, timestamp: datetime, assert metrics is not None opt_targets = set(self._opt_targets.keys()) if not opt_targets.issubset(metrics.keys()): - _LOG.warning("Trial %s :: opt.targets missing: %s", - self, opt_targets.difference(metrics.keys())) + _LOG.warning( + "Trial %s :: opt.targets missing: %s", + self, + opt_targets.difference(metrics.keys()), + ) # raise ValueError() return metrics @abstractmethod - def update_telemetry(self, status: Status, timestamp: datetime, - metrics: List[Tuple[datetime, str, Any]]) -> None: + def update_telemetry( + self, status: Status, timestamp: datetime, metrics: List[Tuple[datetime, str, Any]] + ) -> None: """ Save the experiment's telemetry data and intermediate status. diff --git a/mlos_bench/mlos_bench/storage/base_trial_data.py b/mlos_bench/mlos_bench/storage/base_trial_data.py index 2c6e315f0f..39dfbe14af 100644 --- a/mlos_bench/mlos_bench/storage/base_trial_data.py +++ b/mlos_bench/mlos_bench/storage/base_trial_data.py @@ -2,41 +2,44 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base interface for accessing the stored benchmark trial data. -""" +"""Base interface for accessing the stored benchmark trial data.""" from abc import ABCMeta, abstractmethod from datetime import datetime -from typing import Any, Dict, Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, Optional import pandas from pytz import UTC from mlos_bench.environments.status import Status -from mlos_bench.tunables.tunable import TunableValue from mlos_bench.storage.base_tunable_config_data import TunableConfigData from mlos_bench.storage.util import kv_df_to_dict +from mlos_bench.tunables.tunable import TunableValue if TYPE_CHECKING: - from mlos_bench.storage.base_tunable_config_trial_group_data import TunableConfigTrialGroupData + from mlos_bench.storage.base_tunable_config_trial_group_data import ( + TunableConfigTrialGroupData, + ) class TrialData(metaclass=ABCMeta): """ Base interface for accessing the stored experiment benchmark trial data. - A trial is a single run of an experiment with a given configuration (e.g., set - of tunable parameters). + A trial is a single run of an experiment with a given configuration (e.g., set of + tunable parameters). """ - def __init__(self, *, - experiment_id: str, - trial_id: int, - tunable_config_id: int, - ts_start: datetime, - ts_end: Optional[datetime], - status: Status, - trial_runner_id: Optional[int] = None): + def __init__( + self, + *, + experiment_id: str, + trial_id: int, + tunable_config_id: int, + ts_start: datetime, + ts_end: Optional[datetime], + status: Status, + trial_runner_id: Optional[int] = None, + ): self._experiment_id = experiment_id self._trial_id = trial_id self._tunable_config_id = tunable_config_id @@ -48,8 +51,10 @@ def __init__(self, *, self._trial_runner_id = trial_runner_id def __repr__(self) -> str: - return f"Trial :: {self._experiment_id}:{self._trial_id} cid:{self._tunable_config_id} " \ + return ( + f"Trial :: {self._experiment_id}:{self._trial_id} cid:{self._tunable_config_id} " + f"rid:{self._trial_runner_id} {self._status.name}" + ) def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): @@ -58,53 +63,39 @@ def __eq__(self, other: Any) -> bool: @property def experiment_id(self) -> str: - """ - ID of the experiment this trial belongs to. - """ + """ID of the experiment this trial belongs to.""" return self._experiment_id @property def trial_id(self) -> int: - """ - ID of the trial. - """ + """ID of the trial.""" return self._trial_id @property def trial_runner_id(self) -> Optional[int]: - """ - ID of the TrialRunner. - """ + """ID of the TrialRunner.""" if not self._trial_runner_id: self._trial_runner_id = self.metadata_dict.get("trial_runner_id") return self._trial_runner_id @property def ts_start(self) -> datetime: - """ - Start timestamp of the trial (UTC). - """ + """Start timestamp of the trial (UTC).""" return self._ts_start @property def ts_end(self) -> Optional[datetime]: - """ - End timestamp of the trial (UTC). - """ + """End timestamp of the trial (UTC).""" return self._ts_end @property def status(self) -> Status: - """ - Status of the trial. - """ + """Status of the trial.""" return self._status @property def tunable_config_id(self) -> int: - """ - ID of the (tunable) configuration of the trial. - """ + """ID of the (tunable) configuration of the trial.""" return self._tunable_config_id @property @@ -124,9 +115,7 @@ def tunable_config(self) -> TunableConfigData: @property @abstractmethod def tunable_config_trial_group(self) -> "TunableConfigTrialGroupData": - """ - Retrieve the trial's (tunable) config trial group data from the storage. - """ + """Retrieve the trial's (tunable) config trial group data from the storage.""" @property @abstractmethod diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 2dcf3bb458..5c14ac562e 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -2,45 +2,50 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Common SQL methods for accessing the stored benchmark data. -""" +"""Common SQL methods for accessing the stored benchmark data.""" from typing import Dict, Optional import pandas -from sqlalchemy import Engine, Integer, func, and_, select +from sqlalchemy import Engine, Integer, and_, func, select from mlos_bench.environments.status import Status from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.base_trial_data import TrialData from mlos_bench.storage.sql.schema import DbSchema -from mlos_bench.util import utcify_timestamp, utcify_nullable_timestamp +from mlos_bench.util import utcify_nullable_timestamp, utcify_timestamp def get_trials( - engine: Engine, - schema: DbSchema, - experiment_id: str, - tunable_config_id: Optional[int] = None) -> Dict[int, TrialData]: + engine: Engine, schema: DbSchema, experiment_id: str, tunable_config_id: Optional[int] = None +) -> Dict[int, TrialData]: """ - Gets TrialData for the given experiment_data and optionally additionally - restricted by tunable_config_id. + Gets TrialData for the given experiment_data and optionally additionally restricted + by tunable_config_id. + Used by both TunableConfigTrialGroupSqlData and ExperimentSqlData. """ - from mlos_bench.storage.sql.trial_data import TrialSqlData # pylint: disable=import-outside-toplevel,cyclic-import + from mlos_bench.storage.sql.trial_data import ( + TrialSqlData, # pylint: disable=import-outside-toplevel,cyclic-import + ) + with engine.connect() as conn: # Build up sql a statement for fetching trials. - stmt = schema.trial.select().join( - schema.trial_param, - schema.trial.c.trial_id == schema.trial_param.c.trial_id - and schema.trial.c.exp_id == schema.trial_param.c.exp_id - and schema.trial_param.c.param_id == "trial_runner_id", - isouter=True, - ).where( - schema.trial.c.exp_id == experiment_id, - ).order_by( - schema.trial.c.exp_id.asc(), - schema.trial.c.trial_id.asc(), + stmt = ( + schema.trial.select() + .join( + schema.trial_param, + schema.trial.c.trial_id == schema.trial_param.c.trial_id + and schema.trial.c.exp_id == schema.trial_param.c.exp_id + and schema.trial_param.c.param_id == "trial_runner_id", + isouter=True, + ) + .where( + schema.trial.c.exp_id == experiment_id, + ) + .order_by( + schema.trial.c.exp_id.asc(), + schema.trial.c.trial_id.asc(), + ) ) # Optionally restrict to those using a particular tunable config. if tunable_config_id is not None: @@ -65,27 +70,33 @@ def get_trials( def get_results_df( - engine: Engine, - schema: DbSchema, - experiment_id: str, - tunable_config_id: Optional[int] = None) -> pandas.DataFrame: + engine: Engine, schema: DbSchema, experiment_id: str, tunable_config_id: Optional[int] = None +) -> pandas.DataFrame: """ - Gets TrialData for the given experiment_data and optionally additionally - restricted by tunable_config_id. + Gets TrialData for the given experiment_data and optionally additionally restricted + by tunable_config_id. + Used by both TunableConfigTrialGroupSqlData and ExperimentSqlData. """ # pylint: disable=too-many-locals with engine.connect() as conn: # Compose a subquery to fetch the tunable_config_trial_group_id for each tunable config. - tunable_config_group_id_stmt = schema.trial.select().with_only_columns( - schema.trial.c.exp_id, - schema.trial.c.config_id, - func.min(schema.trial.c.trial_id).cast(Integer).label('tunable_config_trial_group_id'), - ).where( - schema.trial.c.exp_id == experiment_id, - ).group_by( - schema.trial.c.exp_id, - schema.trial.c.config_id, + tunable_config_group_id_stmt = ( + schema.trial.select() + .with_only_columns( + schema.trial.c.exp_id, + schema.trial.c.config_id, + func.min(schema.trial.c.trial_id) + .cast(Integer) + .label("tunable_config_trial_group_id"), + ) + .where( + schema.trial.c.exp_id == experiment_id, + ) + .group_by( + schema.trial.c.exp_id, + schema.trial.c.config_id, + ) ) # Optionally restrict to those using a particular tunable config. if tunable_config_id is not None: @@ -95,24 +106,29 @@ def get_results_df( tunable_config_trial_group_id_subquery = tunable_config_group_id_stmt.subquery() # Get each trial's metadata. - cur_trials_stmt = select( - schema.trial, - tunable_config_trial_group_id_subquery, - ).join( - schema.trial_param, - schema.trial.c.trial_id == schema.trial_param.c.trial_id - and schema.trial.c.exp_id == schema.trial_param.c.exp_id - and schema.trial_param.c.param_id == "trial_runner_id", - isouter=True, - ).where( - schema.trial.c.exp_id == experiment_id, - and_( - tunable_config_trial_group_id_subquery.c.exp_id == schema.trial.c.exp_id, - tunable_config_trial_group_id_subquery.c.config_id == schema.trial.c.config_id, - ), - ).order_by( - schema.trial.c.exp_id.asc(), - schema.trial.c.trial_id.asc(), + cur_trials_stmt = ( + select( + schema.trial, + tunable_config_trial_group_id_subquery, + ) + .join( + schema.trial_param, + schema.trial.c.trial_id == schema.trial_param.c.trial_id + and schema.trial.c.exp_id == schema.trial_param.c.exp_id + and schema.trial_param.c.param_id == "trial_runner_id", + isouter=True, + ) + .where( + schema.trial.c.exp_id == experiment_id, + and_( + tunable_config_trial_group_id_subquery.c.exp_id == schema.trial.c.exp_id, + tunable_config_trial_group_id_subquery.c.config_id == schema.trial.c.config_id, + ), + ) + .order_by( + schema.trial.c.exp_id.asc(), + schema.trial.c.trial_id.asc(), + ) ) # Optionally restrict to those using a particular tunable config. if tunable_config_id is not None: @@ -121,41 +137,50 @@ def get_results_df( ) cur_trials = conn.execute(cur_trials_stmt) trials_df = pandas.DataFrame( - [( - row.trial_id, - utcify_timestamp(row.ts_start, origin="utc"), - utcify_nullable_timestamp(row.ts_end, origin="utc"), - row.config_id, - row.tunable_config_trial_group_id, - row.status, - row.param_value, - ) for row in cur_trials.fetchall()], + [ + ( + row.trial_id, + utcify_timestamp(row.ts_start, origin="utc"), + utcify_nullable_timestamp(row.ts_end, origin="utc"), + row.config_id, + row.tunable_config_trial_group_id, + row.status, + row.param_value, + ) + for row in cur_trials.fetchall() + ], columns=[ - 'trial_id', - 'ts_start', - 'ts_end', - 'tunable_config_id', - 'tunable_config_trial_group_id', - 'status', - 'trial_runner_id', - ] + "trial_id", + "ts_start", + "ts_end", + "tunable_config_id", + "tunable_config_trial_group_id", + "status", + "trial_runner_id", + ], ) # Get each trial's config in wide format. - configs_stmt = schema.trial.select().with_only_columns( - schema.trial.c.trial_id, - schema.trial.c.config_id, - schema.config_param.c.param_id, - schema.config_param.c.param_value, - ).where( - schema.trial.c.exp_id == experiment_id, - ).join( - schema.config_param, - schema.config_param.c.config_id == schema.trial.c.config_id, - isouter=True - ).order_by( - schema.trial.c.trial_id, - schema.config_param.c.param_id, + configs_stmt = ( + schema.trial.select() + .with_only_columns( + schema.trial.c.trial_id, + schema.trial.c.config_id, + schema.config_param.c.param_id, + schema.config_param.c.param_value, + ) + .where( + schema.trial.c.exp_id == experiment_id, + ) + .join( + schema.config_param, + schema.config_param.c.config_id == schema.trial.c.config_id, + isouter=True, + ) + .order_by( + schema.trial.c.trial_id, + schema.config_param.c.param_id, + ) ) if tunable_config_id is not None: configs_stmt = configs_stmt.where( @@ -163,41 +188,67 @@ def get_results_df( ) configs = conn.execute(configs_stmt) configs_df = pandas.DataFrame( - [(row.trial_id, row.config_id, ExperimentData.CONFIG_COLUMN_PREFIX + row.param_id, row.param_value) - for row in configs.fetchall()], - columns=['trial_id', 'tunable_config_id', 'param', 'value'] + [ + ( + row.trial_id, + row.config_id, + ExperimentData.CONFIG_COLUMN_PREFIX + row.param_id, + row.param_value, + ) + for row in configs.fetchall() + ], + columns=["trial_id", "tunable_config_id", "param", "value"], ).pivot( - index=["trial_id", "tunable_config_id"], columns="param", values="value", + index=["trial_id", "tunable_config_id"], + columns="param", + values="value", ) - configs_df = configs_df.apply(pandas.to_numeric, errors='coerce').fillna(configs_df) # type: ignore[assignment] # (fp) + configs_df = configs_df.apply(pandas.to_numeric, errors="coerce").fillna(configs_df) # type: ignore[assignment] # (fp) # Get each trial's results in wide format. - results_stmt = schema.trial_result.select().with_only_columns( - schema.trial_result.c.trial_id, - schema.trial_result.c.metric_id, - schema.trial_result.c.metric_value, - ).where( - schema.trial_result.c.exp_id == experiment_id, - ).order_by( - schema.trial_result.c.trial_id, - schema.trial_result.c.metric_id, + results_stmt = ( + schema.trial_result.select() + .with_only_columns( + schema.trial_result.c.trial_id, + schema.trial_result.c.metric_id, + schema.trial_result.c.metric_value, + ) + .where( + schema.trial_result.c.exp_id == experiment_id, + ) + .order_by( + schema.trial_result.c.trial_id, + schema.trial_result.c.metric_id, + ) ) if tunable_config_id is not None: - results_stmt = results_stmt.join(schema.trial, and_( - schema.trial.c.exp_id == schema.trial_result.c.exp_id, - schema.trial.c.trial_id == schema.trial_result.c.trial_id, - schema.trial.c.config_id == tunable_config_id, - )) + results_stmt = results_stmt.join( + schema.trial, + and_( + schema.trial.c.exp_id == schema.trial_result.c.exp_id, + schema.trial.c.trial_id == schema.trial_result.c.trial_id, + schema.trial.c.config_id == tunable_config_id, + ), + ) results = conn.execute(results_stmt) results_df = pandas.DataFrame( - [(row.trial_id, ExperimentData.RESULT_COLUMN_PREFIX + row.metric_id, row.metric_value) - for row in results.fetchall()], - columns=['trial_id', 'metric', 'value'] + [ + ( + row.trial_id, + ExperimentData.RESULT_COLUMN_PREFIX + row.metric_id, + row.metric_value, + ) + for row in results.fetchall() + ], + columns=["trial_id", "metric", "value"], ).pivot( - index="trial_id", columns="metric", values="value", + index="trial_id", + columns="metric", + values="value", ) - results_df = results_df.apply(pandas.to_numeric, errors='coerce').fillna(results_df) # type: ignore[assignment] # (fp) + results_df = results_df.apply(pandas.to_numeric, errors="coerce").fillna(results_df) # type: ignore[assignment] # (fp) # Concat the trials, configs, and results. - return trials_df.merge(configs_df, on=["trial_id", "tunable_config_id"], how="left") \ - .merge(results_df, on="trial_id", how="left") + return trials_df.merge(configs_df, on=["trial_id", "tunable_config_id"], how="left").merge( + results_df, on="trial_id", how="left" + ) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 736c8251e9..cf911482e8 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -2,17 +2,26 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -DB schema definition. -""" +"""DB schema definition.""" import logging -from typing import List, Any +from typing import Any, List from sqlalchemy import ( - Engine, MetaData, Dialect, create_mock_engine, - Table, Column, Sequence, Integer, Float, String, DateTime, - PrimaryKeyConstraint, ForeignKeyConstraint, UniqueConstraint, + Column, + DateTime, + Dialect, + Engine, + Float, + ForeignKeyConstraint, + Integer, + MetaData, + PrimaryKeyConstraint, + Sequence, + String, + Table, + UniqueConstraint, + create_mock_engine, ) _LOG = logging.getLogger(__name__) @@ -38,9 +47,7 @@ def __repr__(self) -> str: class DbSchema: - """ - A class to define and create the DB schema. - """ + """A class to define and create the DB schema.""" # This class is internal to SqlStorage and is mostly a struct # for all DB tables, so it's ok to disable the warnings. @@ -53,9 +60,7 @@ class DbSchema: _STATUS_LEN = 16 def __init__(self, engine: Engine): - """ - Declare the SQLAlchemy schema for the database. - """ + """Declare the SQLAlchemy schema for the database.""" _LOG.info("Create the DB schema for: %s", engine) self._engine = engine # TODO: bind for automatic schema updates? (#649) @@ -69,7 +74,6 @@ def __init__(self, engine: Engine): Column("root_env_config", String(1024), nullable=False), Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), - PrimaryKeyConstraint("exp_id"), ) @@ -84,20 +88,25 @@ def __init__(self, engine: Engine): # Will need to adjust the insert and return values to support this # eventually. Column("weight", Float, nullable=True), - PrimaryKeyConstraint("exp_id", "optimization_target"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ) # A workaround for SQLAlchemy issue with autoincrement in DuckDB: if engine.dialect.name == "duckdb": - seq_config_id = Sequence('seq_config_id') - col_config_id = Column("config_id", Integer, seq_config_id, - server_default=seq_config_id.next_value(), - nullable=False, primary_key=True) + seq_config_id = Sequence("seq_config_id") + col_config_id = Column( + "config_id", + Integer, + seq_config_id, + server_default=seq_config_id.next_value(), + nullable=False, + primary_key=True, + ) else: - col_config_id = Column("config_id", Integer, nullable=False, - primary_key=True, autoincrement=True) + col_config_id = Column( + "config_id", Integer, nullable=False, primary_key=True, autoincrement=True + ) self.config = Table( "config", @@ -116,7 +125,6 @@ def __init__(self, engine: Engine): Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), - PrimaryKeyConstraint("exp_id", "trial_id"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), @@ -130,7 +138,6 @@ def __init__(self, engine: Engine): Column("config_id", Integer, nullable=False), Column("param_id", String(self._ID_LEN), nullable=False), Column("param_value", String(self._PARAM_VALUE_LEN)), - PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) @@ -145,10 +152,10 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("param_id", String(self._ID_LEN), nullable=False), Column("param_value", String(self._PARAM_VALUE_LEN)), - PrimaryKeyConstraint("exp_id", "trial_id", "param_id"), - ForeignKeyConstraint(["exp_id", "trial_id"], - [self.trial.c.exp_id, self.trial.c.trial_id]), + ForeignKeyConstraint( + ["exp_id", "trial_id"], [self.trial.c.exp_id, self.trial.c.trial_id] + ), ) self.trial_status = Table( @@ -158,10 +165,10 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._STATUS_LEN), nullable=False), - UniqueConstraint("exp_id", "trial_id", "ts"), - ForeignKeyConstraint(["exp_id", "trial_id"], - [self.trial.c.exp_id, self.trial.c.trial_id]), + ForeignKeyConstraint( + ["exp_id", "trial_id"], [self.trial.c.exp_id, self.trial.c.trial_id] + ), ) self.trial_result = Table( @@ -171,10 +178,10 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), - PrimaryKeyConstraint("exp_id", "trial_id", "metric_id"), - ForeignKeyConstraint(["exp_id", "trial_id"], - [self.trial.c.exp_id, self.trial.c.trial_id]), + ForeignKeyConstraint( + ["exp_id", "trial_id"], [self.trial.c.exp_id, self.trial.c.trial_id] + ), ) self.trial_telemetry = Table( @@ -185,26 +192,24 @@ def __init__(self, engine: Engine): Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), - UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), - ForeignKeyConstraint(["exp_id", "trial_id"], - [self.trial.c.exp_id, self.trial.c.trial_id]), + ForeignKeyConstraint( + ["exp_id", "trial_id"], [self.trial.c.exp_id, self.trial.c.trial_id] + ), ) _LOG.debug("Schema: %s", self._meta) - def create(self) -> 'DbSchema': - """ - Create the DB schema. - """ + def create(self) -> "DbSchema": + """Create the DB schema.""" _LOG.info("Create the DB schema") self._meta.create_all(self._engine) return self def __repr__(self) -> str: """ - Produce a string with all SQL statements required to create the schema - from scratch in current SQL dialect. + Produce a string with all SQL statements required to create the schema from + scratch in current SQL dialect. That is, return a collection of CREATE TABLE statements and such. NOTE: this method is quite heavy! We use it only once at startup diff --git a/mlos_bench/mlos_bench/storage/sql/trial_data.py b/mlos_bench/mlos_bench/storage/sql/trial_data.py index 83615af902..9510dd9752 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial_data.py +++ b/mlos_bench/mlos_bench/storage/sql/trial_data.py @@ -2,41 +2,42 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -An interface to access the benchmark trial data stored in SQL DB. -""" +"""An interface to access the benchmark trial data stored in SQL DB.""" from datetime import datetime -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import pandas from sqlalchemy import Engine +from mlos_bench.environments.status import Status from mlos_bench.storage.base_trial_data import TrialData from mlos_bench.storage.base_tunable_config_data import TunableConfigData -from mlos_bench.environments.status import Status from mlos_bench.storage.sql.schema import DbSchema from mlos_bench.storage.sql.tunable_config_data import TunableConfigSqlData from mlos_bench.util import utcify_timestamp if TYPE_CHECKING: - from mlos_bench.storage.base_tunable_config_trial_group_data import TunableConfigTrialGroupData + from mlos_bench.storage.base_tunable_config_trial_group_data import ( + TunableConfigTrialGroupData, + ) class TrialSqlData(TrialData): - """ - An interface to access the trial data stored in the SQL DB. - """ + """An interface to access the trial data stored in the SQL DB.""" - def __init__(self, *, - engine: Engine, - schema: DbSchema, - experiment_id: str, - trial_id: int, - config_id: int, - ts_start: datetime, - ts_end: Optional[datetime], - status: Status, - trial_runner_id: Optional[int] = None): + def __init__( + self, + *, + engine: Engine, + schema: DbSchema, + experiment_id: str, + trial_id: int, + config_id: int, + ts_start: datetime, + ts_end: Optional[datetime], + status: Status, + trial_runner_id: Optional[int] = None, + ): super().__init__( experiment_id=experiment_id, trial_id=trial_id, @@ -56,49 +57,57 @@ def tunable_config(self) -> TunableConfigData: Note: this corresponds to the Trial object's "tunables" property. """ - return TunableConfigSqlData(engine=self._engine, schema=self._schema, - tunable_config_id=self._tunable_config_id) + return TunableConfigSqlData( + engine=self._engine, schema=self._schema, tunable_config_id=self._tunable_config_id + ) @property def tunable_config_trial_group(self) -> "TunableConfigTrialGroupData": - """ - Retrieve the trial's tunable config group configuration data from the storage. + """Retrieve the trial's tunable config group configuration data from the + storage. """ # pylint: disable=import-outside-toplevel - from mlos_bench.storage.sql.tunable_config_trial_group_data import TunableConfigTrialGroupSqlData - return TunableConfigTrialGroupSqlData(engine=self._engine, schema=self._schema, - experiment_id=self._experiment_id, - tunable_config_id=self._tunable_config_id) + from mlos_bench.storage.sql.tunable_config_trial_group_data import ( + TunableConfigTrialGroupSqlData, + ) + + return TunableConfigTrialGroupSqlData( + engine=self._engine, + schema=self._schema, + experiment_id=self._experiment_id, + tunable_config_id=self._tunable_config_id, + ) @property def results_df(self) -> pandas.DataFrame: - """ - Retrieve the trials' results from the storage. - """ + """Retrieve the trials' results from the storage.""" with self._engine.connect() as conn: cur_results = conn.execute( - self._schema.trial_result.select().where( + self._schema.trial_result.select() + .where( self._schema.trial_result.c.exp_id == self._experiment_id, - self._schema.trial_result.c.trial_id == self._trial_id - ).order_by( + self._schema.trial_result.c.trial_id == self._trial_id, + ) + .order_by( self._schema.trial_result.c.metric_id, ) ) return pandas.DataFrame( [(row.metric_id, row.metric_value) for row in cur_results.fetchall()], - columns=['metric', 'value']) + columns=["metric", "value"], + ) @property def telemetry_df(self) -> pandas.DataFrame: - """ - Retrieve the trials' telemetry from the storage. - """ + """Retrieve the trials' telemetry from the storage.""" with self._engine.connect() as conn: cur_telemetry = conn.execute( - self._schema.trial_telemetry.select().where( + self._schema.trial_telemetry.select() + .where( self._schema.trial_telemetry.c.exp_id == self._experiment_id, - self._schema.trial_telemetry.c.trial_id == self._trial_id - ).order_by( + self._schema.trial_telemetry.c.trial_id == self._trial_id, + ) + .order_by( self._schema.trial_telemetry.c.ts, self._schema.trial_telemetry.c.metric_id, ) @@ -106,8 +115,12 @@ def telemetry_df(self) -> pandas.DataFrame: # Not all storage backends store the original zone info. # We try to ensure data is entered in UTC and augment it on return again here. return pandas.DataFrame( - [(utcify_timestamp(row.ts, origin="utc"), row.metric_id, row.metric_value) for row in cur_telemetry.fetchall()], - columns=['ts', 'metric', 'value']) + [ + (utcify_timestamp(row.ts, origin="utc"), row.metric_id, row.metric_value) + for row in cur_telemetry.fetchall() + ], + columns=["ts", "metric", "value"], + ) @property def metadata_df(self) -> pandas.DataFrame: @@ -118,13 +131,16 @@ def metadata_df(self) -> pandas.DataFrame: """ with self._engine.connect() as conn: cur_params = conn.execute( - self._schema.trial_param.select().where( + self._schema.trial_param.select() + .where( self._schema.trial_param.c.exp_id == self._experiment_id, - self._schema.trial_param.c.trial_id == self._trial_id - ).order_by( + self._schema.trial_param.c.trial_id == self._trial_id, + ) + .order_by( self._schema.trial_param.c.param_id, ) ) return pandas.DataFrame( [(row.param_id, row.param_value) for row in cur_params.fetchall()], - columns=['parameter', 'value']) + columns=["parameter", "value"], + ) diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index a736592438..be91ca7721 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -14,11 +14,10 @@ import pytest +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.launcher import Launcher -from mlos_bench.optimizers import OneShotOptimizer, MlosCoreOptimizer +from mlos_bench.optimizers import MlosCoreOptimizer, OneShotOptimizer from mlos_bench.os_environ import environ -from mlos_bench.config.schemas import ConfigSchema -from mlos_bench.util import path_join from mlos_bench.schedulers import SyncScheduler from mlos_bench.services.types import ( SupportsAuth, @@ -28,6 +27,7 @@ SupportsRemoteExec, ) from mlos_bench.tests import check_class_name +from mlos_bench.util import path_join if sys.version_info < (3, 10): from importlib_resources import files @@ -48,13 +48,13 @@ def config_paths() -> List[str]: """ return [ path_join(os.getcwd(), abs_path=True), - str(files('mlos_bench.config')), - str(files('mlos_bench.tests.config')), + str(files("mlos_bench.config")), + str(files("mlos_bench.tests.config")), ] # This is part of the minimal required args by the Launcher. -ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' +ENV_CONF_PATH = "environments/mock/mock_env.jsonc" def _get_launcher(desc: str, cli_args: str) -> Launcher: @@ -63,167 +63,200 @@ def _get_launcher(desc: str, cli_args: str) -> Launcher: # variable so we use a separate variable. # See global_test_config.jsonc for more details. environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': + if sys.platform == "win32": # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] + environ["USER"] = environ["USERNAME"] launcher = Launcher(description=desc, argv=cli_args.split()) # Check the basic parent service assert isinstance(launcher.service, SupportsConfigLoading) # built-in - assert isinstance(launcher.service, SupportsLocalExec) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in # All trial runners should have the same Environment class. - assert len(set(trial_runner.environment.__class__ for trial_runner in launcher.trial_runners)) == 1 + assert ( + len(set(trial_runner.environment.__class__ for trial_runner in launcher.trial_runners)) + == 1 + ) # Make sure that each trial runner has a unique ID. - assert set(trial_runner.environment.const_args["trial_runner_id"] for trial_runner in launcher.trial_runners) \ - == set(range(0, len(launcher.trial_runners))) + assert set( + trial_runner.environment.const_args["trial_runner_id"] + for trial_runner in launcher.trial_runners + ) == set(range(0, len(launcher.trial_runners))) return launcher def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """Test that we get the defaults we expect when using minimal config arg + examples. """ - Test that we get the defaults we expect when using minimal config arg examples. - """ - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + ) launcher = _get_launcher(__name__, cli_args) # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown # defaults + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" + assert launcher.teardown # defaults # Make sure we have the right number of trial runners. - assert len(launcher.trial_runners) == 1 # defaults + assert len(launcher.trial_runners) == 1 # defaults # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" # All TrialRunners should get the same Environment. - assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + assert all( + check_class_name(trial_runner.environment, env_config["class"]) + for trial_runner in launcher.trial_runners + ) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 1 # default - assert launcher.scheduler.max_trials == -1 # default + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default def test_launcher_args_parse_1(config_paths: List[str]) -> None: """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. + Test that using multiple --globals arguments works and that multiple space separated + options to --config-paths works. + Check $var expansion and Environment loading. """ # Here we have multiple paths following --config-paths and --service. - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - ' --num-trial-runners 5' + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' services/remote/mock/mock_remote_exec_service.jsonc' + \ - ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + \ - ' --globals globals/global_test_extra_config.jsonc' \ - ' --test_global_value_2 from-args' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + " --num-trial-runners 5" + + " --service services/remote/mock/mock_auth_service.jsonc" + + " services/remote/mock/mock_remote_exec_service.jsonc" + + " --scheduler schedulers/sync_scheduler.jsonc" + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + + " --globals globals/global_test_extra_config.jsonc" + " --test_global_value_2 from-args" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that the second --globals file is loaded. - assert launcher.global_config['test_global_value'] == 'from-file' + assert launcher.global_config["test_global_value"] == "from-file" # Check overriding values in a file from the command line. - assert launcher.global_config['test_global_value_2'] == 'from-args' + assert launcher.global_config["test_global_value_2"] == "from-args" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert launcher.teardown # Make sure we have the right number of trial runners. - assert len(launcher.trial_runners) == 5 # from cli args + assert len(launcher.trial_runners) == 5 # from cli args # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" # All TrialRunners should get the same Environment. - assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + assert all( + check_class_name(trial_runner.environment, env_config["class"]) + for trial_runner in launcher.trial_runners + ) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert ( + launcher.scheduler.trial_config_repeat_count == 3 + ) # from the custom sync_scheduler.jsonc config assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: - """ - Test multiple --config-path instances, --config file vs --arg, --var=val + """Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - config_file = 'cli/test-cli-config.jsonc' - globals_file = 'globals/global_test_config.jsonc' + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" # Here we have multiple --config-path and --service args, each with their own path. - cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ - f' --config {config_file}' + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ - f' --globals {globals_file}' + \ - ' --experiment_id MockeryExperiment' + \ - ' --no-teardown' + \ - ' --random-init' + \ - ' --random-seed 1234' + \ - ' --trial-config-repeat-count 5' + \ - ' --max_trials 200' + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + " --service services/remote/mock/mock_auth_service.jsonc" + + " --service services/remote/mock/mock_remote_exec_service.jsonc" + + f" --globals {globals_file}" + + " --experiment_id MockeryExperiment" + + " --no-teardown" + + " --random-init" + + " --random-seed 1234" + + " --trial-config-repeat-count 5" + + " --max_trials 200" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsFileShareOps) # from --config - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. - assert launcher.global_config['experiment_id'] == 'MockeryExperiment' - assert launcher.global_config['testVmName'] == 'MockeryExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockeryExperiment" + assert launcher.global_config["testVmName"] == "MockeryExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockeryExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockeryExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert not launcher.teardown config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) - assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] + assert launcher.config_loader.config_paths == [ + path_join(path, abs_path=True) for path in config_paths + config["config_path"] + ] # Make sure we have the right number of trial runners. - assert len(launcher.trial_runners) == 3 # from test-cli-config.jsonc + assert len(launcher.trial_runners) == 3 # from test-cli-config.jsonc # Check that the environment that got loaded looks to be of the right type. - env_config_file = config['environment'] + env_config_file = config["environment"] env_config = launcher.config_loader.load_config(env_config_file, ConfigSchema.ENVIRONMENT) # All TrialRunners should get the same Environment. - assert all(check_class_name(trial_runner.environment, env_config['class']) for trial_runner in launcher.trial_runners) + assert all( + check_class_name(trial_runner.environment, env_config["class"]) + for trial_runner in launcher.trial_runners + ) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, MlosCoreOptimizer) - opt_config_file = config['optimizer'] + opt_config_file = config["optimizer"] opt_config = launcher.config_loader.load_config(opt_config_file, ConfigSchema.OPTIMIZER) globals_file_config = launcher.config_loader.load_config(globals_file, ConfigSchema.GLOBALS) # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. - orig_max_iters = globals_file_config.get('max_suggestions', opt_config.get('config', {}).get('max_suggestions', 100)) - assert launcher.optimizer.max_iterations \ - == orig_max_iters \ - == launcher.global_config['max_suggestions'] + orig_max_iters = globals_file_config.get( + "max_suggestions", opt_config.get("config", {}).get("max_suggestions", 100) + ) + assert ( + launcher.optimizer.max_iterations + == orig_max_iters + == launcher.global_config["max_suggestions"] + ) # Check that the optimizer got initialized with random values instead of the defaults. # Note: the environment doesn't get updated until suggest() is called to @@ -240,12 +273,12 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. - assert config['random_seed'] == 42 + assert config["random_seed"] == 42 # TODO: This isn't actually respected yet because the `--random-init` only # applies to a temporary Optimizer used to populate the initial values via # random sampling. # assert launcher.optimizer.seed == 1234 -if __name__ == '__main__': +if __name__ == "__main__": pytest.main([__file__, "-n1"]) From 9023eb73a876a4e714bfdbde8744206c37343a0b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 22 Jul 2024 19:49:12 +0000 Subject: [PATCH 093/121] slurp some files from main --- .editorconfig | 3 + .pylintrc | 52 ----- Makefile | 535 +++++++++++++++++++++++++++++++++++++------------ pyproject.toml | 73 +++++++ setup.cfg | 20 +- 5 files changed, 498 insertions(+), 185 deletions(-) delete mode 100644 .pylintrc create mode 100644 pyproject.toml diff --git a/.editorconfig b/.editorconfig index e984d47595..7e753174de 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,6 +12,9 @@ charset = utf-8 # Note: this is not currently supported by all editors or their editorconfig plugins. max_line_length = 132 +[*.py] +max_line_length = 99 + # Makefiles need tab indentation [{Makefile,*.mk}] indent_style = tab diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 6b308d1966..0000000000 --- a/.pylintrc +++ /dev/null @@ -1,52 +0,0 @@ -# vim: set ft=dosini: - -[MAIN] -# Specify a score threshold to be exceeded before program exits with error. -fail-under=9.9 - -# Make sure public methods are documented. -# See Also: https://github.com/PyCQA/pydocstyle/issues/309#issuecomment-1426642147 -# Also fail on unused imports. -fail-on= - missing-function-docstring, - unused-import - -# Ignore pylint complaints about an upstream dependency. -ignored-modules=ConfigSpace.hyperparameters - -# Help inform pylint where to find the project's source code without needing to relyon PYTHONPATH. -#init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc())); from logging import warning; warning(sys.path)" -init-hook="from logging import warning; warning(sys.path)" - -# Load some extra checkers. -load-plugins= - pylint.extensions.bad_builtin, - pylint.extensions.code_style, - pylint.extensions.docparams, - pylint.extensions.docstyle, - pylint.extensions.for_any_all, - pylint.extensions.mccabe, - pylint.extensions.no_self_use, - pylint.extensions.private_import, - pylint.extensions.redefined_loop_name, - pylint.extensions.redefined_variable_type, - pylint.extensions.set_membership, - pylint.extensions.typing - -[FORMAT] -# Maximum number of characters on a single line. -max-line-length=132 - -[MESSAGE CONTROL] -disable= - fixme, - no-else-return, - consider-using-assignment-expr, - deprecated-typing-alias, # disable for now - only deprecated recently - docstring-first-line-empty, - consider-alternative-union-syntax, # disable for now - still supporting python 3.8 - missing-raises-doc - -[STRING] -#check-quote-consistency=yes -check-str-concat-over-line-jumps=yes diff --git a/Makefile b/Makefile index a62cd8daea..297ba39303 100644 --- a/Makefile +++ b/Makefile @@ -26,18 +26,26 @@ MAKEFLAGS += -j$(shell nproc) #MAKEFLAGS += -Oline .PHONY: all -all: check test dist dist-test doc licenseheaders +all: format check test dist dist-test doc | conda-env .PHONY: conda-env conda-env: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/conda-env.${CONDA_ENV_NAME}.build-stamp: ${ENV_YML} mlos_core/setup.py mlos_bench/setup.py mlos_viz/setup.py +MLOS_CORE_CONF_FILES := mlos_core/pyproject.toml mlos_core/setup.py mlos_core/MANIFEST.in +MLOS_BENCH_CONF_FILES := mlos_bench/pyproject.toml mlos_bench/setup.py mlos_bench/MANIFEST.in +MLOS_VIZ_CONF_FILES := mlos_viz/pyproject.toml mlos_viz/setup.py mlos_viz/MANIFEST.in +MLOS_GLOBAL_CONF_FILES := setup.cfg pyproject.toml + +MLOS_PKGS := mlos_core mlos_bench mlos_viz +MLOS_PKG_CONF_FILES := $(MLOS_CORE_CONF_FILES) $(MLOS_BENCH_CONF_FILES) $(MLOS_VIZ_CONF_FILES) $(MLOS_GLOBAL_CONF_FILES) + +build/conda-env.${CONDA_ENV_NAME}.build-stamp: ${ENV_YML} $(MLOS_PKG_CONF_FILES) @echo "CONDA_SOLVER: ${CONDA_SOLVER}" @echo "CONDA_EXPERIMENTAL_SOLVER: ${CONDA_EXPERIMENTAL_SOLVER}" @echo "CONDA_INFO_LEVEL: ${CONDA_INFO_LEVEL}" conda env list -q | grep -q "^${CONDA_ENV_NAME} " || conda env create ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} -f ${ENV_YML} conda env update ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} --prune -f ${ENV_YML} - $(MAKE) clean-check clean-test clean-doc + $(MAKE) clean-format clean-check clean-test clean-doc clean-dist touch $@ .PHONY: clean-conda-env @@ -45,51 +53,254 @@ clean-conda-env: conda env remove -y ${CONDA_INFO_LEVEL} -n ${CONDA_ENV_NAME} rm -f build/conda-env.${CONDA_ENV_NAME}.build-stamp + +# Since these targets potentially change the files we need to run them in sequence. +# In future versions of make we can do that by marking each as a .NOTPARALLEL psuedo target. +# But with make 4.3 that changes the entire Makefile to be serial. + +# Here we make dynamic prereqs to apply to other targets that need to run in sequence. +FORMAT_PREREQS := + +.PHONY: format +format: build/format.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter format,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/format.${CONDA_ENV_NAME}.build-stamp +endif + +build/format.${CONDA_ENV_NAME}.build-stamp: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/isort.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/black.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: build/docformatter.${CONDA_ENV_NAME}.build-stamp +build/format.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +.PHONY: licenseheaders +licenseheaders: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter licenseheaders,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif + +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(PYTHON_FILES) +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(SCRIPT_FILES) +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(SQL_FILES) doc/mit-license.tmpl +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: doc/mit-license.tmpl +build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: + # Note: to avoid makefile dependency loops, we don't touch the setup.py + # files as that would force the conda-env to be rebuilt. + conda run -n ${CONDA_ENV_NAME} licenseheaders -t doc/mit-license.tmpl \ + -E .py .sh .ps1 .sql .cmd \ + -x mlos_bench/setup.py mlos_core/setup.py mlos_viz/setup.py + touch $@ + +.PHONY: isort +isort: build/isort.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter isort,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif + +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: build/isort.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/isort.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# NOTE: when using pattern rules (involving %) we can only add one line of +# prerequisities, so we use this pattern to compose the list as variables. + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +ISORT_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +ISORT_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ISORT_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +ISORT_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/isort.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/isort.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/isort.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +build/isort.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_COMMON_PREREQS) + # Reformat python file imports with isort. + conda run -n ${CONDA_ENV_NAME} isort --verbose --only-modified --atomic -j0 $(filter %.py,$+) + touch $@ + +.PHONY: black +black: build/black.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter black,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/black.${CONDA_ENV_NAME}.build-stamp +endif + +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: build/black.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/black.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +BLACK_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +BLACK_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format isort,$(MAKECMDGOALS))) +BLACK_COMMON_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif +BLACK_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/black.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/black.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/black.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +build/black.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_COMMON_PREREQS) + # Reformat python files with black. + conda run -n ${CONDA_ENV_NAME} black $(filter %.py,$+) + touch $@ + +.PHONY: docformatter +docformatter: build/docformatter.${CONDA_ENV_NAME}.build-stamp + +ifneq (,$(filter docformatter,$(MAKECMDGOALS))) + FORMAT_PREREQS += build/docformatter.${CONDA_ENV_NAME}.build-stamp +endif + +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp +build/docformatter.${CONDA_ENV_NAME}.build-stamp: + touch $@ + +# black, licenseheaders, isort, and docformatter all alter files, so only run +# one at a time, by adding prerequisites, but only as necessary. +DOCFORMATTER_COMMON_PREREQS := +ifneq (,$(filter format licenseheaders,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/licenseheaders.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format isort,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/isort.${CONDA_ENV_NAME}.build-stamp +endif +ifneq (,$(filter format black,$(MAKECMDGOALS))) +DOCFORMATTER_COMMON_PREREQS += build/black.${CONDA_ENV_NAME}.build-stamp +endif +DOCFORMATTER_COMMON_PREREQS += build/conda-env.${CONDA_ENV_NAME}.build-stamp +DOCFORMATTER_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +# docformatter returns non-zero when it changes anything so instead we ignore that +# return code and just have it recheck itself immediately +build/docformatter.%.${CONDA_ENV_NAME}.build-stamp: $(DOCFORMATTER_COMMON_PREREQS) + # Reformat python file docstrings with docformatter. + conda run -n ${CONDA_ENV_NAME} docformatter --in-place $(filter %.py,$+) || true + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) + touch $@ + + .PHONY: check -check: pycodestyle pydocstyle pylint mypy # cspell licenseheaders markdown-link-check +check: isort-check black-check docformatter-check pycodestyle pydocstyle pylint mypy # cspell markdown-link-check + +.PHONY: black-check +black-check: build/black-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +black-check: build/black-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +black-check: build/black-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure black format rules run before black-check rules. +build/black-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/black-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/black-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +BLACK_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/black-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) + # Check for import sort order. + # Note: if this fails use "make format" or "make black" to fix it. + conda run -n ${CONDA_ENV_NAME} black --verbose --check --diff $(filter %.py,$+) + touch $@ + +.PHONY: docformatter-check +docformatter-check: build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +docformatter-check: build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure docformatter format rules run before docformatter-check rules. +build/docformatter-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/docformatter-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/docformatter-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +BLACK_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +BLACK_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +BLACK_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/docformatter-check.%.${CONDA_ENV_NAME}.build-stamp: $(BLACK_CHECK_COMMON_PREREQS) + # Check for import sort order. + # Note: if this fails use "make format" or "make docformatter" to fix it. + conda run -n ${CONDA_ENV_NAME} docformatter --check --diff $(filter %.py,$+) + touch $@ + +.PHONY: isort-check +isort-check: build/isort-check.mlos_core.${CONDA_ENV_NAME}.build-stamp +isort-check: build/isort-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp +isort-check: build/isort-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp + +# Make sure isort format rules run before isort-check rules. +build/isort-check.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) +build/isort-check.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) +build/isort-check.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) + +ISORT_CHECK_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +ISORT_CHECK_COMMON_PREREQS += $(FORMAT_PREREQS) +ISORT_CHECK_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/isort-check.%.${CONDA_ENV_NAME}.build-stamp: $(ISORT_CHECK_COMMON_PREREQS) + # Note: if this fails use "make format" or "make isort" to fix it. + conda run -n ${CONDA_ENV_NAME} isort --only-modified --check --diff -j0 $(filter %.py,$+) + touch $@ .PHONY: pycodestyle -pycodestyle: conda-env pycodestyle: build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp pycodestyle: build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp pycodestyle: build/pycodestyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp - build/pycodestyle.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pycodestyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pycodestyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pycodestyle.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg +PYCODESTYLE_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYCODESTYLE_COMMON_PREREQS += $(FORMAT_PREREQS) +PYCODESTYLE_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/pycodestyle.%.${CONDA_ENV_NAME}.build-stamp: $(PYCODESTYLE_COMMON_PREREQS) # Check for decent pep8 code style with pycodestyle. - # Note: if this fails, try using autopep8 to fix it. - conda run -n ${CONDA_ENV_NAME} pycodestyle $(filter-out setup.cfg,$+) + # Note: if this fails, try using 'make format' to fix it. + conda run -n ${CONDA_ENV_NAME} pycodestyle $(filter %.py,$+) touch $@ .PHONY: pydocstyle -pydocstyle: conda-env pydocstyle: build/pydocstyle.mlos_core.${CONDA_ENV_NAME}.build-stamp pydocstyle: build/pydocstyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp pydocstyle: build/pydocstyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp - build/pydocstyle.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pydocstyle.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pydocstyle.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pydocstyle.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg - # Check for decent pep8 doc style with pydocstyle. - conda run -n ${CONDA_ENV_NAME} pydocstyle $(filter-out setup.cfg,$+) - touch $@ +PYDOCSTYLE_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYDOCSTYLE_COMMON_PREREQS += $(FORMAT_PREREQS) +PYDOCSTYLE_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) -.PHONY: licenseheaders -licenseheaders: build/licenseheaders.${CONDA_ENV_NAME}.build-stamp - -build/licenseheaders.${CONDA_ENV_NAME}.build-stamp: $(PYTHON_FILES) $(SCRIPT_FILES) $(SQL_FILES) doc/mit-license.tmpl - # Note: to avoid makefile dependency loops, we don't touch the setup.py - # files as that would force the conda-env to be rebuilt. - conda run -n ${CONDA_ENV_NAME} licenseheaders -t doc/mit-license.tmpl \ - -E .py .sh .ps1 .sql .cmd \ - -x mlos_bench/setup.py mlos_core/setup.py mlos_viz/setup.py +build/pydocstyle.%.${CONDA_ENV_NAME}.build-stamp: $(PYDOCSTYLE_COMMON_PREREQS) + # Check for decent pep8 doc style with pydocstyle. + conda run -n ${CONDA_ENV_NAME} pydocstyle $(filter %.py,$+) touch $@ .PHONY: cspell @@ -101,7 +312,7 @@ cspell: build/cspell-container.build-stamp ./.devcontainer/scripts/run-cspell.sh endif -build/cspell-container.build-stamp: +build/cspell-container.build-stamp: $(FORMAT_PREREQS) # Build the docker image with cspell in it. $(MAKE) -C .devcontainer/build cspell touch $@ @@ -115,13 +326,12 @@ markdown-link-check: build/markdown-link-check-container.build-stamp ./.devcontainer/scripts/run-markdown-link-check.sh endif -build/markdown-link-check-container.build-stamp: +build/markdown-link-check-container.build-stamp: $(FORMAT_PREREQS) # Build the docker image with markdown-link-check in it. $(MAKE) -C .devcontainer/build markdown-link-check touch $@ .PHONY: pylint -pylint: conda-env pylint: build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp pylint: build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp pylint: build/pylint.mlos_viz.${CONDA_ENV_NAME}.build-stamp @@ -131,12 +341,15 @@ build/pylint.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/pylint.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/pylint.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/pylint.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp .pylintrc - conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter-out .pylintrc,$+) +PYLINT_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYLINT_COMMON_PREREQS += $(FORMAT_PREREQS) +PYLINT_COMMON_PREREQS += pyproject.toml + +build/pylint.%.${CONDA_ENV_NAME}.build-stamp: $(PYLINT_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} pylint -j0 $(filter %.py,$+) touch $@ .PHONY: flake8 -flake8: conda-env flake8: build/flake8.mlos_core.${CONDA_ENV_NAME}.build-stamp flake8: build/flake8.mlos_bench.${CONDA_ENV_NAME}.build-stamp flake8: build/flake8.mlos_viz.${CONDA_ENV_NAME}.build-stamp @@ -145,65 +358,65 @@ build/flake8.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/flake8.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/flake8.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) -build/flake8.%.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg - conda run -n ${CONDA_ENV_NAME} flake8 -j0 $(filter-out setup.cfg,$+) +FLAKE8_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +FLAKE8_COMMON_PREREQS += $(FORMAT_PREREQS) +FLAKE8_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) + +build/flake8.%.${CONDA_ENV_NAME}.build-stamp: $(FLAKE8_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} flake8 -j0 $(filter %.py,$+) touch $@ .PHONY: mypy -mypy: conda-env mypy: build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp mypy: build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp mypy: build/mypy.mlos_viz.${CONDA_ENV_NAME}.build-stamp +# Run these in order. build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp build/mypy.mlos_viz.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES := scripts/dmypy-wrapper.sh setup.cfg -NON_MYPY_FILES += build/conda-env.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES += build/mypy.mlos_core.${CONDA_ENV_NAME}.build-stamp -NON_MYPY_FILES += build/mypy.mlos_bench.${CONDA_ENV_NAME}.build-stamp -build/mypy.%.${CONDA_ENV_NAME}.build-stamp: scripts/dmypy-wrapper.sh build/conda-env.${CONDA_ENV_NAME}.build-stamp setup.cfg +MYPY_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +MYPY_COMMON_PREREQS += $(FORMAT_PREREQS) +MYPY_COMMON_PREREQS += $(MLOS_GLOBAL_CONF_FILES) +MYPY_COMMON_PREREQS += scripts/dmypy-wrapper.sh + +build/mypy.%.${CONDA_ENV_NAME}.build-stamp: $(MYPY_COMMON_PREREQS) conda run -n ${CONDA_ENV_NAME} scripts/dmypy-wrapper.sh \ - $(filter-out $(NON_MYPY_FILES),$+) + $(filter %.py,$+) touch $@ .PHONY: test test: pytest -PYTEST_MODULES := +PYTEST_CONF_FILES := $(MLOS_GLOBAL_CONF_FILES) conftest.py .PHONY: pytest pytest: conda-env build/pytest.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_CORE_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_core. - $(eval PYTEST_MODULES += mlos_core) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" - touch $@ +pytest-mlos-core: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp +pytest-mlos-bench: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp +pytest-mlos-viz: build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp -# Run the mlos_bench target update after mlos_core target update. -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_BENCH_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_bench. - $(eval PYTEST_MODULES += mlos_bench) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" - touch $@ +build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_CORE_PYTHON_FILES) $(MLOS_CORE_CONF_FILES) +build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_core + +build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_BENCH_PYTHON_FILES) $(MLOS_BENCH_CONF_FILES) +build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_bench -# Run the mlos_viz target update after mlos_bench target update. -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_VIZ_PYTHON_FILES) conftest.py setup.cfg -build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: - # Update the PYTEST_MODULES list to include mlos_viz. - $(eval PYTEST_MODULES += mlos_viz) - echo "PYTEST_MODULES: $(PYTEST_MODULES)" +build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: $(MLOS_VIZ_PYTHON_FILES) $(MLOS_VIZ_CONF_FILES) +build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp: PYTEST_MODULE := mlos_viz + +# Invividual package test rules (for tight loop dev work). +# Skip code coverage tests for these. +PYTEST_COMMON_PREREQS := build/conda-env.${CONDA_ENV_NAME}.build-stamp +PYTEST_COMMON_PREREQS += $(FORMAT_PREREQS) +PYTEST_COMMON_PREREQS += $(PYTEST_CONF_FILES) + +build/pytest.%.${CONDA_ENV_NAME}.needs-build-stamp: $(PYTEST_COMMON_PREREQS) + conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_EXTRA_OPTIONS) $(PYTEST_MODULE) touch $@ PYTEST_OPTIONS := @@ -212,85 +425,120 @@ PYTEST_OPTIONS := SKIP_COVERAGE := $(shell echo $${SKIP_COVERAGE:-} | grep -i -x -e 1 -e true) ifeq ($(SKIP_COVERAGE),) - PYTEST_OPTIONS += --cov=. --cov-append --cov-fail-under=91.5 --cov-report=xml --cov-report=html --junitxml=junit/test-results.xml --local-badge-output-dir=doc/source/badges/ + PYTEST_OPTIONS += --cov=. --cov-append --cov-fail-under=92 --cov-report=xml --cov-report=html --junitxml=junit/test-results.xml --local-badge-output-dir=doc/source/badges/ endif -# Run the pytest target on only the modules that have changed recently, but -# make sure the coverage report is for both of them when used in the pipeline. +# Global pytest rule that also produces code coverage for the pipeline. # NOTE: When run locally, the junit/test-results.xml will only include the # tests from the latest run, but this file is only used for upstream reporting, # so probably shouldn't matter. -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_core.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_bench.${CONDA_ENV_NAME}.needs-build-stamp -build/pytest.${CONDA_ENV_NAME}.build-stamp: build/pytest.mlos_viz.${CONDA_ENV_NAME}.needs-build-stamp +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(PYTEST_COMMON_PREREQS) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_CORE_PYTHON_FILES) $(MLOS_CORE_CONF_FILES) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_BENCH_PYTHON_FILES) $(MLOS_BENCH_CONF_FILES) +build/pytest.${CONDA_ENV_NAME}.build-stamp: $(MLOS_VIZ_PYTHON_FILES) $(MLOS_VIZ_CONF_FILES) build/pytest.${CONDA_ENV_NAME}.build-stamp: - # Make sure to update the list of modules needed everytime in case the test fails and we need to rerun it. - for pytest_module in $(PYTEST_MODULES); do rm -f build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.needs-build-stamp; done - # Run pytest for the modules: $(PYTEST_MODULES) + # Remove the markers for individual targets (above). + for pytest_module in $(MLOS_PKGS); do rm -f build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.build-stamp; done + # Run pytest for the modules: $(MLOS_PKGS) mkdir -p doc/source/badges/ - conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_OPTIONS) $(PYTEST_EXTRA_OPTIONS) $(PYTEST_MODULES) - # Mark those as done again. - for pytest_module in $(PYTEST_MODULES); do touch build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.needs-build-stamp; done + conda run -n ${CONDA_ENV_NAME} pytest $(PYTEST_OPTIONS) $(PYTEST_EXTRA_OPTIONS) $(MLOS_PKGS) + # Global success. Mark the individual targets as done again. + for pytest_module in $(MLOS_PKGS); do touch build/pytest.$${pytest_module}.${CONDA_ENV_NAME}.build-stamp; done touch $@ +# setuptools-scm needs a longer history than Github CI workers have by default. +.PHONY: unshallow +unshallow: build/unshallow.build-stamp + +build/unshallow.build-stamp: + git rev-parse --is-shallow-repository | grep -x -q false || git fetch --unshallow --quiet + touch $@ + .PHONY: dist -dist: bdist_wheel +dist: sdist bdist_wheel + +.PHONY: sdist +sdist: conda-env unshallow +sdist: mlos_core/dist/tmp/mlos_core-latest.tar.gz +sdist: mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +sdist: mlos_viz/dist/tmp/mlos_viz-latest.tar.gz .PHONY: bdist_wheel -bdist_wheel: conda-env +bdist_wheel: conda-env unshallow bdist_wheel: mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl bdist_wheel: mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl bdist_wheel: mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl -mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: mlos_core/dist/tmp/mlos_core-latest.tar +# Make the whl files depend on the .tar.gz files, mostly to prevent conflicts +# with shared use of the their build/ trees. + mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: MODULE_NAME := mlos_core mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: PACKAGE_NAME := mlos_core -mlos_core/dist/tmp/mlos_core-latest.tar: mlos_core/setup.py mlos_core/MANIFEST.in $(MLOS_CORE_PYTHON_FILES) -mlos_core/dist/tmp/mlos_core-latest.tar: MODULE_NAME := mlos_core -mlos_core/dist/tmp/mlos_core-latest.tar: PACKAGE_NAME := mlos_core +mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl: mlos_core/dist/tmp/mlos_core-latest.tar.gz +mlos_core/dist/tmp/mlos_core-latest.tar.gz: $(MLOS_CORE_CONF_FILES) $(MLOS_CORE_PYTHON_FILES) +mlos_core/dist/tmp/mlos_core-latest.tar.gz: MODULE_NAME := mlos_core +mlos_core/dist/tmp/mlos_core-latest.tar.gz: PACKAGE_NAME := mlos_core -mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: mlos_bench/dist/tmp/mlos_bench-latest.tar mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: MODULE_NAME := mlos_bench mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: PACKAGE_NAME := mlos_bench -mlos_bench/dist/tmp/mlos_bench-latest.tar: mlos_bench/setup.py mlos_bench/MANIFEST.in $(MLOS_BENCH_PYTHON_FILES) -mlos_bench/dist/tmp/mlos_bench-latest.tar: MODULE_NAME := mlos_bench -mlos_bench/dist/tmp/mlos_bench-latest.tar: PACKAGE_NAME := mlos_bench +mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl: mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: $(MLOS_BENCH_CONF_FILES) $(MLOS_BENCH_PYTHON_FILES) +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: MODULE_NAME := mlos_bench +mlos_bench/dist/tmp/mlos_bench-latest.tar.gz: PACKAGE_NAME := mlos_bench -mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: mlos_viz/dist/tmp/mlos_viz-latest.tar mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: MODULE_NAME := mlos_viz mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: PACKAGE_NAME := mlos_viz -mlos_viz/dist/tmp/mlos_viz-latest.tar: mlos_viz/setup.py mlos_viz/MANIFEST.in $(mlos_viz_PYTHON_FILES) -mlos_viz/dist/tmp/mlos_viz-latest.tar: MODULE_NAME := mlos_viz -mlos_viz/dist/tmp/mlos_viz-latest.tar: PACKAGE_NAME := mlos_viz +mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl: mlos_viz/dist/tmp/mlos_viz-latest.tar.gz +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: $(MLOS_VIZ_CONF_FILES) $(MLOS_VIZ_PYTHON_FILES) +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: MODULE_NAME := mlos_viz +mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz + +%-latest.tar.gz: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp $(FORMAT_PREREQS) + mkdir -p $(MODULE_NAME)/dist/tmp + rm -f $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar{,.gz} + rm -f $(MODULE_NAME)/dist/tmp/$(PACKAGE_NAME)-latest.tar{,.gz} + rm -rf $(MODULE_NAME)/build/ + rm -rf $(MODULE_NAME)/$(MODULE_NAME).egg-info/ + cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --sdist + # Do some sanity checks on the sdist tarball output. + BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-base-version` \ + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -F -e $$BASE_VERS -e $$TAG_VERS + # Make sure tests were excluded. + ! ( tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 tests/ ) + # Make sure the py.typed marker file exists. + tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /py.typed + # Check to make sure the mlos_bench module has the config directory. + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 mlos_bench/config/ + cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar.gz $(PACKAGE_NAME)-latest.tar.gz -%-latest.tar: build/conda-env.${CONDA_ENV_NAME}.build-stamp -%-latest.tar: +%-latest-py3-none-any.whl: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp $(FORMAT_PREREQS) mkdir -p $(MODULE_NAME)/dist/tmp - rm -f $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar - rm -f $(MODULE_NAME)/dist/tmp/$(PACKAGE_NAME)-latest.tar - cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 setup.py sdist --formats tar - ls $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar - ! ( tar tf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar | grep -m1 tests/ ) - [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar | grep -m1 mlos_bench/config/ - cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar $(PACKAGE_NAME)-latest.tar - -%-latest-py3-none-any.whl: build/conda-env.${CONDA_ENV_NAME}.build-stamp -%-latest-py3-none-any.whl: rm -f $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl rm -f $(MODULE_NAME)/dist/tmp/$(MODULE_NAME)-latest-py3-none-any.whl - cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} pip wheel --no-index --no-deps --wheel-dir dist dist/tmp/$(PACKAGE_NAME)-latest.tar - ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl + rm -rf $(MODULE_NAME)/build/ + rm -rf $(MODULE_NAME)/$(MODULE_NAME).egg-info/ + cd $(MODULE_NAME)/ && conda run -n ${CONDA_ENV_NAME} python3 -m build --wheel + # Do some sanity checks on the wheel output. + BASE_VERS=`conda run -n ${CONDA_ENV_NAME} python3 $(MODULE_NAME)/$(MODULE_NAME)/version.py | cut -d. -f-2 | egrep -o '^[0-9.]+' || echo err-unknown-base-version` \ + && TAG_VERS=`git tag -l --sort=-version:refname | egrep -x '^v[0-9.]+' | head -n1 | sed 's/^v//' | cut -d. -f-2 | egrep -x '[0-9.]+' || echo err-unknown-tag-version` \ + && ls $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -F -e $$BASE_VERS -e $$TAG_VERS # Check to make sure the tests were excluded from the wheel. ! ( unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 tests/ ) + # Make sure the py.typed marker file exists. + unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 /py.typed # Check to make sure the mlos_bench module has the config directory. [ "$(MODULE_NAME)" != "mlos_bench" ] || unzip -t $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl | grep -m1 mlos_bench/config/ - cd $(MODULE_NAME)/dist/tmp && ln -s ../$(MODULE_NAME)-*-py3-none-any.whl $(MODULE_NAME)-latest-py3-none-any.whl # Check to make sure the README contents made it into the package metadata. - unzip -p $(MODULE_NAME)/dist/tmp/$(MODULE_NAME)-latest-py3-none-any.whl */METADATA | egrep -v '^[A-Z][a-zA-Z-]+:' | grep -q -i '^# mlos' + unzip -p $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl */METADATA | egrep -v '^[A-Z][a-zA-Z-]+:' | grep -q -i '^# mlos' + # Also check that the they include the URL + unzip -p $(MODULE_NAME)/dist/$(MODULE_NAME)-*-py3-none-any.whl */METADATA | grep -q -e '](https://github.com/microsoft/MLOS/' + # Link it into place + cd $(MODULE_NAME)/dist/tmp && ln -s ../$(MODULE_NAME)-*-py3-none-any.whl $(MODULE_NAME)-latest-py3-none-any.whl -.PHONY: dist-test-env-clean -dist-test-env-clean: +.PHONY: clean-dist-test-env +clean-dist-test-env: # Remove any existing mlos-dist-test environment so we can start clean. conda env remove -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) 2>/dev/null || true rm -f build/dist-test-env.$(PYTHON_VERSION).build-stamp @@ -305,7 +553,7 @@ build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_core/dist/tmp/mlos_core- build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl # Create a clean test environment for checking the wheel files. - $(MAKE) dist-test-env-clean + $(MAKE) clean-dist-test-env conda create -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) python=$(PYTHON_VERS_REQ) # Install some additional dependencies necessary for clean building some of the wheels. conda install -y ${CONDA_INFO_LEVEL} -n mlos-dist-test-$(PYTHON_VERSION) swig libpq @@ -320,7 +568,7 @@ build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_viz/dist/tmp/mlos_viz-la touch $@ .PHONY: dist-test -#dist-test: dist-clean +#dist-test: clean-dist dist-test: dist-test-env build/dist-test.$(PYTHON_VERSION).build-stamp # Unnecessary if we invoke it as "python3 -m pytest ..." @@ -334,12 +582,14 @@ build/dist-test.$(PYTHON_VERSION).build-stamp: $(PYTHON_FILES) build/dist-test-e conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_core/mlos_core/tests/spaces/spaces_test.py # Run a simple test that uses the mlos_bench wheel (full tests can be checked with `make test`). conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_bench/mlos_bench/tests/environments/mock_env_test.py + # Run a basic cli tool check. + conda run -n mlos-dist-test-$(PYTHON_VERSION) mlos_bench --help 2>&1 | grep '^usage: mlos_bench ' # Run a simple test that uses the mlos_viz wheel (full tests can be checked with `make test`). # To do that, we need the fixtures from mlos_bench, so make those available too. PYTHONPATH=mlos_bench conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py touch $@ -dist-test-clean: dist-test-env-clean +clean-dist-test: clean-dist-test-env rm -f build/dist-test-env.$(PYTHON_VERSION).build-stamp @@ -355,13 +605,30 @@ build/publish-pypi-deps.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_E PUBLISH_DEPS := build/publish-pypi-deps.${CONDA_ENV_NAME}.build-stamp PUBLISH_DEPS += build/pytest.${CONDA_ENV_NAME}.build-stamp +PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest.tar.gz +PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest.tar.gz +PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest.tar.gz +PUBLISH_DEPS += mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl +PUBLISH_DEPS += mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl PUBLISH_DEPS += build/dist-test.$(PYTHON_VERSION).build-stamp PUBLISH_DEPS += build/check-doc.build-stamp PUBLISH_DEPS += build/linklint-doc.build-stamp build/publish.${CONDA_ENV_NAME}.%.py.build-stamp: $(PUBLISH_DEPS) - rm -f mlos_*/dist/*.tar.gz - ls mlos_*/dist/*.tar | xargs -I% gzip -k % + # Basic sanity checks on files about to be published. + # Run "make clean-dist && make dist" if these fail. + # Check the tar count. + test `ls -1 mlos_core/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_bench/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_viz/dist/*.tar.gz | wc -l` -eq 1 + test `ls -1 mlos_*/dist/*.tar.gz | wc -l` -eq 3 + # Check the whl count. + test `ls -1 mlos_core/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_bench/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_viz/dist/*.whl | wc -l` -eq 1 + test `ls -1 mlos_*/dist/*.whl | wc -l` -eq 3 + # Publish the files to the specified repository. repo_name=`echo "$@" | sed -r -e 's|build/publish\.[^.]+\.||' -e 's|\.py\.build-stamp||'` \ && conda run -n ${CONDA_ENV_NAME} python3 -m twine upload --repository $$repo_name \ mlos_*/dist/mlos*-*.tar.gz mlos_*/dist/mlos*-*.whl @@ -370,13 +637,14 @@ build/publish.${CONDA_ENV_NAME}.%.py.build-stamp: $(PUBLISH_DEPS) publish-pypi: build/publish.${CONDA_ENV_NAME}.pypi.py.build-stamp publish-test-pypi: build/publish.${CONDA_ENV_NAME}.testpypi.py.build-stamp + build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp: doc/requirements.txt conda run -n ${CONDA_ENV_NAME} pip install -U -r doc/requirements.txt touch $@ .PHONY: doc-prereqs -doc-prereqs: build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp +doc-prereqs: build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp build/unshallow.build-stamp .PHONY: clean-doc-env clean-doc-env: @@ -385,14 +653,16 @@ clean-doc-env: COMMON_DOC_FILES := build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp doc/source/*.rst doc/source/_templates/*.rst doc/source/conf.py -doc/source/api/mlos_core/modules.rst: $(MLOS_CORE_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_core/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_core/modules.rst: $(MLOS_CORE_PYTHON_FILES) rm -rf doc/source/api/mlos_core cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_core/ \ ../mlos_core/ \ ../mlos_core/setup.py ../mlos_core/mlos_core/tests/ -doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_bench/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) rm -rf doc/source/api/mlos_bench cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_bench/ \ @@ -405,7 +675,8 @@ doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) $(COMMON_DOC_F echo ".. literalinclude:: mlos_bench.run.usage.txt" >> doc/source/api/mlos_bench/mlos_bench.run.rst echo " :language: none" >> doc/source/api/mlos_bench/mlos_bench.run.rst -doc/source/api/mlos_viz/modules.rst: $(MLOS_VIZ_PYTHON_FILES) $(COMMON_DOC_FILES) +doc/source/api/mlos_viz/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) +doc/source/api/mlos_viz/modules.rst: $(MLOS_VIZ_PYTHON_FILES) rm -rf doc/source/api/mlos_viz cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ -o source/api/mlos_viz/ \ @@ -517,25 +788,42 @@ build/linklint-doc.build-stamp: doc/build/html/index.html doc/build/html/htmlcov @echo "OK" touch $@ + .PHONY: clean-doc clean-doc: rm -rf doc/build/ doc/global/ doc/source/api/ doc/source/generated rm -rf doc/source/source_tree_docs/* +.PHONY: clean-format +clean-format: + rm -f build/black.${CONDA_ENV_NAME}.build-stamp + rm -f build/black.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/licenseheaders.${CONDA_ENV_NAME}.build-stamp + rm -f build/licenseheaders-prereqs.${CONDA_ENV_NAME}.build-stamp + .PHONY: clean-check clean-check: rm -f build/pylint.build-stamp rm -f build/pylint.${CONDA_ENV_NAME}.build-stamp rm -f build/pylint.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/mypy.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.build-stamp + rm -f build/black-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/black-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/docformatter-check.mlos_*.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.${CONDA_ENV_NAME}.build-stamp + rm -f build/isort-check.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.build-stamp rm -f build/pycodestyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pycodestyle.mlos_*.${CONDA_ENV_NAME}.build-stamp rm -f build/pydocstyle.build-stamp rm -f build/pydocstyle.${CONDA_ENV_NAME}.build-stamp rm -f build/pydocstyle.mlos_*.${CONDA_ENV_NAME}.build-stamp - rm -f build/licenseheaders.${CONDA_ENV_NAME}.build-stamp - rm -f build/licenseheaders-prereqs.${CONDA_ENV_NAME}.build-stamp .PHONY: clean-test clean-test: @@ -549,15 +837,16 @@ clean-test: rm -rf junit/ rm -rf test-output.xml -.PHONY: dist-clean -dist-clean: - rm -rf build dist +.PHONY: clean-dist +clean-dist: + rm -rf dist rm -rf mlos_core/build mlos_core/dist rm -rf mlos_bench/build mlos_bench/dist rm -rf mlos_viz/build mlos_viz/dist .PHONY: clean -clean: clean-check clean-test dist-clean clean-doc clean-doc-env dist-test-clean +clean: clean-format clean-check clean-test clean-dist clean-doc clean-doc-env clean-dist-test + rm -f build/unshallow.build-stamp rm -f .*.build-stamp rm -f build/conda-env.build-stamp build/conda-env.*.build-stamp rm -rf mlos_core.egg-info diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..4380245f79 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,73 @@ +[tool.black] +line-length = 99 +target-version = ["py38", "py39", "py310", "py311", "py312"] +include = '\.pyi?$' + +[tool.isort] +profile = "black" +py_version = 311 +src_paths = ["mlos_core", "mlos_bench", "mlos_viz"] + +# TODO: Consider switching to pydocstringformatter +[tool.docformatter] +recursive = true +black = true +style = "numpy" +pre-summary-newline = true +close-quotes-on-newline = true + +# TODO: move some other setup.cfg configs here + +[tool.pylint.main] +# Specify a score threshold to be exceeded before program exits with error. +fail-under = 9.9 + +# Make sure public methods are documented. +# See Also: https://github.com/PyCQA/pydocstyle/issues/309#issuecomment-1426642147 +# Also fail on unused imports. +fail-on = [ + "missing-function-docstring", + "unused-import", +] + +# Ignore pylint complaints about an upstream dependency. +ignored-modules = ["ConfigSpace.hyperparameters"] + +# Help inform pylint where to find the project's source code without needing to relyon PYTHONPATH. +#init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc())); from logging import warning; warning(sys.path)" +init-hook = "from logging import warning; warning(sys.path)" + +# Load some extra checkers. +load-plugins = [ + "pylint.extensions.bad_builtin", + "pylint.extensions.code_style", + "pylint.extensions.docparams", + "pylint.extensions.docstyle", + "pylint.extensions.for_any_all", + "pylint.extensions.mccabe", + "pylint.extensions.no_self_use", + "pylint.extensions.private_import", + "pylint.extensions.redefined_loop_name", + "pylint.extensions.redefined_variable_type", + "pylint.extensions.set_membership", + "pylint.extensions.typing", +] + +[tool.pylint.format] +# Maximum number of characters on a single line. +max-line-length = 99 + +[tool.pylint."messages control"] +disable = [ + "fixme", + "no-else-return", + "consider-using-assignment-expr", + "deprecated-typing-alias", # disable for now - only deprecated recently + "docstring-first-line-empty", + "consider-alternative-union-syntax", # disable for now - still supporting python 3.8 + "missing-raises-doc", +] + +[tool.pylint.string] +check-quote-consistency = true +check-str-concat-over-line-jumps = true diff --git a/setup.cfg b/setup.cfg index be6ee9c9c6..6f948f523a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,33 +1,33 @@ # vim: set ft=dosini: -[bdist_wheel] -universal = 1 [pycodestyle] count = True +# E203: Whitespace before : (black incompatibility) # W503: Line break occurred before a binary operator # W504: Line break occurred after a binary operator -ignore = W503,W504 +ignore = E203,W503,W504 format = pylint # See Also: .editorconfig, .pylintrc -max-line-length = 132 +max-line-length = 99 show-source = True statistics = True [pydocstyle] -# D102: Missing docstring in public method (Avoids inheritence bug. Force checked in .pylintrc instead.) +# D102: Missing docstring in public method (Avoids inheritence bug. Force checked in pylint instead.) # D105: Missing docstring in magic method # D107: Missing docstring in __init__ -# D200: One-line docstring should fit on one line with quotes # D401: First line should be in imperative mood # We have many docstrings that are too long to fit on one line, so we ignore both of these two rules: # D205: 1 blank line required between summary line and description # D400: First line should end with a period -add_ignore = D102,D105,D107,D200,D401,D205,D400 +add_ignore = D102,D105,D107,D401,D205,D400 match = .+(? Date: Mon, 22 Jul 2024 19:56:03 +0000 Subject: [PATCH 094/121] apply formatters selectively --- mlos_bench/mlos_bench/launcher.py | 354 +++++++++++------- .../mlos_bench/optimizers/base_optimizer.py | 146 ++++---- .../mlos_bench/schedulers/base_scheduler.py | 171 +++++---- .../tests/launcher_parse_args_test.py | 199 +++++----- 4 files changed, 490 insertions(+), 380 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 7b2bf50623..e928a983d1 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -3,8 +3,8 @@ # Licensed under the MIT License. # """ -A helper class to load the configuration files, parse the command line parameters, -and instantiate the main components of mlos_bench system. +A helper class to load the configuration files, parse the command line parameters, and +instantiate the main components of mlos_bench system. It is used in `mlos_bench.run` module to run the benchmark/optimizer from the command line. @@ -13,34 +13,26 @@ import argparse import logging import sys - from typing import Any, Dict, Iterable, List, Optional, Tuple from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater -from mlos_bench.util import try_parse_val - -from mlos_bench.tunables.tunable import TunableValue -from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.environments.base_environment import Environment - from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.optimizers.one_shot_optimizer import OneShotOptimizer - -from mlos_bench.storage.base_storage import Storage - +from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.services.base_service import Service -from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.config_persistence import ConfigPersistenceService - -from mlos_bench.schedulers.base_scheduler import Scheduler - +from mlos_bench.services.local.local_exec import LocalExecService from mlos_bench.services.types.config_loader_type import SupportsConfigLoading - +from mlos_bench.storage.base_storage import Storage +from mlos_bench.tunables.tunable import TunableValue +from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import try_parse_val _LOG_LEVEL = logging.INFO -_LOG_FORMAT = '%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s' +_LOG_FORMAT = "%(asctime)s %(filename)s:%(lineno)d %(funcName)s %(levelname)s %(message)s" logging.basicConfig(level=_LOG_LEVEL, format=_LOG_FORMAT) _LOG = logging.getLogger(__name__) @@ -48,9 +40,7 @@ class Launcher: # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - Command line launcher for mlos_bench and mlos_core. - """ + """Command line launcher for mlos_bench and mlos_core.""" def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements @@ -62,8 +52,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st For additional details, please see the website or the README.md files in the source tree: """ - parser = argparse.ArgumentParser(description=f"{description} : {long_text}", - epilog=epilog) + parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) (args, path_args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. @@ -102,8 +91,11 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st excluded_cli_args = path_args + ["teardown"] # Include (almost) any item from the cli config file that either isn't in the cli # args at all or whose cli arg is missing. - cli_config_args = {key: val for (key, val) in config.items() - if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args} + cli_config_args = { + key: val + for (key, val) in config.items() + if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args + } self.global_config = self._load_config( args_globals=config.get("globals", []) + (args.globals or []), @@ -115,13 +107,13 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # experiment_id is generally taken from --globals files, but we also allow overriding it on the CLI. # It's useful to keep it there explicitly mostly for the --help output. if args.experiment_id: - self.global_config['experiment_id'] = args.experiment_id + self.global_config["experiment_id"] = args.experiment_id # trial_config_repeat_count is a scheduler property but it's convenient to set it via command line if args.trial_config_repeat_count: self.global_config["trial_config_repeat_count"] = args.trial_config_repeat_count # Ensure that the trial_id is present since it gets used by some other # configs but is typically controlled by the run optimize loop. - self.global_config.setdefault('trial_id', 1) + self.global_config.setdefault("trial_id", 1) self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) @@ -129,24 +121,29 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st # --service cli args should override the config file values. service_files: List[str] = config.get("services", []) + (args.service or []) assert isinstance(self._parent_service, SupportsConfigLoading) - self._parent_service = self._parent_service.load_services(service_files, self.global_config, self._parent_service) + self._parent_service = self._parent_service.load_services( + service_files, self.global_config, self._parent_service + ) env_path = args.environment or config.get("environment") if not env_path: _LOG.error("No environment config specified.") - parser.error("At least the Environment config must be specified." + - " Run `mlos_bench --help` and consult `README.md` for more info.") + parser.error( + "At least the Environment config must be specified." + + " Run `mlos_bench --help` and consult `README.md` for more info." + ) self.root_env_config = self._config_loader.resolve_path(env_path) self.environment: Environment = self._config_loader.load_environment( - self.root_env_config, TunableGroups(), self.global_config, service=self._parent_service) + self.root_env_config, TunableGroups(), self.global_config, service=self._parent_service + ) _LOG.info("Init environment: %s", self.environment) # NOTE: Init tunable values *after* the Environment, but *before* the Optimizer self.tunables = self._init_tunable_values( args.random_init or config.get("random_init", False), config.get("random_seed") if args.random_seed is None else args.random_seed, - config.get("tunable_values", []) + (args.tunable_values or []) + config.get("tunable_values", []) + (args.tunable_values or []), ) _LOG.info("Init tunables: %s", self.tunables) @@ -156,120 +153,176 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage(args.storage or config.get("storage")) _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = bool(args.teardown) if args.teardown is not None else bool(config.get("teardown", True)) + self.teardown: bool = ( + bool(args.teardown) + if args.teardown is not None + else bool(config.get("teardown", True)) + ) self.scheduler = self._load_scheduler(args.scheduler or config.get("scheduler")) _LOG.info("Init scheduler: %s", self.scheduler) @property def config_loader(self) -> ConfigPersistenceService: - """ - Get the config loader service. - """ + """Get the config loader service.""" return self._config_loader @property def service(self) -> Service: - """ - Get the parent service. - """ + """Get the parent service.""" return self._parent_service @staticmethod - def _parse_args(parser: argparse.ArgumentParser, - argv: Optional[List[str]]) -> Tuple[argparse.Namespace, List[str], List[str]]: - """ - Parse the command line arguments. - """ + def _parse_args( + parser: argparse.ArgumentParser, argv: Optional[List[str]] + ) -> Tuple[argparse.Namespace, List[str], List[str]]: + """Parse the command line arguments.""" path_args = [] parser.add_argument( - '--config', required=False, - help='Main JSON5 configuration file. Its keys are the same as the' + - ' command line options and can be overridden by the latter.\n' + - '\n' + - ' See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ ' + - ' for additional config examples for this and other arguments.') - path_args.append('config') + "--config", + required=False, + help="Main JSON5 configuration file. Its keys are the same as the" + + " command line options and can be overridden by the latter.\n" + + "\n" + + " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ " + + " for additional config examples for this and other arguments.", + ) + path_args.append("config") parser.add_argument( - '--log_file', '--log-file', required=False, - help='Path to the log file. Use stdout if omitted.') - path_args.append('log_file') + "--log_file", + "--log-file", + required=False, + help="Path to the log file. Use stdout if omitted.", + ) + path_args.append("log_file") parser.add_argument( - '--log_level', '--log-level', required=False, type=str, - help=f'Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}.' + - ' Set to DEBUG for debug, WARNING for warnings only.') + "--log_level", + "--log-level", + required=False, + type=str, + help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}." + + " Set to DEBUG for debug, WARNING for warnings only.", + ) parser.add_argument( - '--config_path', '--config-path', '--config-paths', '--config_paths', - nargs="+", action='extend', required=False, - help='One or more locations of JSON config files.') - path_args.append('config_path') - path_args.append('config_paths') + "--config_path", + "--config-path", + "--config-paths", + "--config_paths", + nargs="+", + action="extend", + required=False, + help="One or more locations of JSON config files.", + ) + path_args.append("config_path") + path_args.append("config_paths") parser.add_argument( - '--service', '--services', - nargs='+', action='extend', required=False, - help='Path to JSON file with the configuration of the service(s) for environment(s) to use.') - path_args.append('service') - path_args.append('services') + "--service", + "--services", + nargs="+", + action="extend", + required=False, + help="Path to JSON file with the configuration of the service(s) for environment(s) to use.", + ) + path_args.append("service") + path_args.append("services") parser.add_argument( - '--environment', required=False, - help='Path to JSON file with the configuration of the benchmarking environment(s).') - path_args.append('environment') + "--environment", + required=False, + help="Path to JSON file with the configuration of the benchmarking environment(s).", + ) + path_args.append("environment") parser.add_argument( - '--optimizer', required=False, - help='Path to the optimizer configuration file. If omitted, run' + - ' a single trial with default (or specified in --tunable_values).') - path_args.append('optimizer') + "--optimizer", + required=False, + help="Path to the optimizer configuration file. If omitted, run" + + " a single trial with default (or specified in --tunable_values).", + ) + path_args.append("optimizer") parser.add_argument( - '--trial_config_repeat_count', '--trial-config-repeat-count', required=False, type=int, - help='Number of times to repeat each config. Default is 1 trial per config, though more may be advised.') + "--trial_config_repeat_count", + "--trial-config-repeat-count", + required=False, + type=int, + help="Number of times to repeat each config. Default is 1 trial per config, though more may be advised.", + ) parser.add_argument( - '--scheduler', required=False, - help='Path to the scheduler configuration file. By default, use' + - ' a single worker synchronous scheduler.') - path_args.append('scheduler') + "--scheduler", + required=False, + help="Path to the scheduler configuration file. By default, use" + + " a single worker synchronous scheduler.", + ) + path_args.append("scheduler") parser.add_argument( - '--storage', required=False, - help='Path to the storage configuration file.' + - ' If omitted, use the ephemeral in-memory SQL storage.') - path_args.append('storage') + "--storage", + required=False, + help="Path to the storage configuration file." + + " If omitted, use the ephemeral in-memory SQL storage.", + ) + path_args.append("storage") parser.add_argument( - '--random_init', '--random-init', required=False, default=False, - dest='random_init', action='store_true', - help='Initialize tunables with random values. (Before applying --tunable_values).') + "--random_init", + "--random-init", + required=False, + default=False, + dest="random_init", + action="store_true", + help="Initialize tunables with random values. (Before applying --tunable_values).", + ) parser.add_argument( - '--random_seed', '--random-seed', required=False, type=int, - help='Seed to use with --random_init') + "--random_seed", + "--random-seed", + required=False, + type=int, + help="Seed to use with --random_init", + ) parser.add_argument( - '--tunable_values', '--tunable-values', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain values of the tunable' + - ' parameters. This can be used for a single trial (when no --optimizer' + - ' is specified) or as default values for the first run in optimization.') - path_args.append('tunable_values') + "--tunable_values", + "--tunable-values", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain values of the tunable" + + " parameters. This can be used for a single trial (when no --optimizer" + + " is specified) or as default values for the first run in optimization.", + ) + path_args.append("tunable_values") parser.add_argument( - '--globals', nargs="+", action='extend', required=False, - help='Path to one or more JSON files that contain additional' + - ' [private] parameters of the benchmarking environment.') - path_args.append('globals') + "--globals", + nargs="+", + action="extend", + required=False, + help="Path to one or more JSON files that contain additional" + + " [private] parameters of the benchmarking environment.", + ) + path_args.append("globals") parser.add_argument( - '--no_teardown', '--no-teardown', required=False, default=None, - dest='teardown', action='store_false', - help='Disable teardown of the environment after the benchmark.') + "--no_teardown", + "--no-teardown", + required=False, + default=None, + dest="teardown", + action="store_false", + help="Disable teardown of the environment after the benchmark.", + ) parser.add_argument( - '--experiment_id', '--experiment-id', required=False, default=None, + "--experiment_id", + "--experiment-id", + required=False, + default=None, help=""" Experiment ID to use for the benchmark. If omitted, the value from the --cli config or --globals is used. @@ -279,7 +332,7 @@ def _parse_args(parser: argparse.ArgumentParser, changes are made to config files, scripts, versions, etc. This is left as a manual operation as detection of what is "incompatible" is not easily automatable across systems. - """ + """, ) # By default we use the command line arguments, but allow the caller to @@ -292,9 +345,7 @@ def _parse_args(parser: argparse.ArgumentParser, @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: - """ - Helper function to parse global key/value pairs from the command line. - """ + """Helper function to parse global key/value pairs from the command line.""" _LOG.debug("Extra args: %s", cmdline) config: Dict[str, TunableValue] = {} @@ -321,16 +372,18 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: _LOG.debug("Parsed config: %s", config) return config - def _load_config(self, *, - args_globals: Iterable[str], - config_path: Iterable[str], - args_rest: Iterable[str], - global_config: Dict[str, Any]) -> Dict[str, Any]: - """ - Get key/value pairs of the global configuration parameters - from the specified config files (if any) and command line arguments. + def _load_config( + self, + *, + args_globals: Iterable[str], + config_path: Iterable[str], + args_rest: Iterable[str], + global_config: Dict[str, Any], + ) -> Dict[str, Any]: + """Get key/value pairs of the global configuration parameters from the specified + config files (if any) and command line arguments. """ - for config_file in (args_globals or []): + for config_file in args_globals or []: conf = self._config_loader.load_config(config_file, ConfigSchema.GLOBALS) assert isinstance(conf, dict) global_config.update(conf) @@ -339,19 +392,21 @@ def _load_config(self, *, global_config["config_path"] = config_path return global_config - def _init_tunable_values(self, random_init: bool, seed: Optional[int], - args_tunables: Optional[str]) -> TunableGroups: - """ - Initialize the tunables and load key/value pairs of the tunable values - from given JSON files, if specified. + def _init_tunable_values( + self, random_init: bool, seed: Optional[int], args_tunables: Optional[str] + ) -> TunableGroups: + """Initialize the tunables and load key/value pairs of the tunable values from + given JSON files, if specified. """ tunables = self.environment.tunable_params _LOG.debug("Init tunables: default = %s", tunables) if random_init: tunables = MockOptimizer( - tunables=tunables, service=None, - config={"start_with_defaults": False, "seed": seed}).suggest() + tunables=tunables, + service=None, + config={"start_with_defaults": False, "seed": seed}, + ).suggest() _LOG.debug("Init tunables: random = %s", tunables) if args_tunables is not None: @@ -365,50 +420,62 @@ def _init_tunable_values(self, random_init: bool, seed: Optional[int], def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: """ - Instantiate the Optimizer object from JSON config file, if specified - in the --optimizer command line option. If config file not specified, - create a one-shot optimizer to run a single benchmark trial. + Instantiate the Optimizer object from JSON config file, if specified in the + --optimizer command line option. + + If config file not specified, create a one-shot optimizer to run a single + benchmark trial. """ if args_optimizer is None: # global_config may contain additional properties, so we need to # strip those out before instantiating the basic oneshot optimizer. - config = {key: val for key, val in self.global_config.items() if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS} - return OneShotOptimizer( - self.tunables, config=config, service=self._parent_service) + config = { + key: val + for key, val in self.global_config.items() + if key in OneShotOptimizer.BASE_SUPPORTED_CONFIG_PROPS + } + return OneShotOptimizer(self.tunables, config=config, service=self._parent_service) class_config = self._config_loader.load_config(args_optimizer, ConfigSchema.OPTIMIZER) assert isinstance(class_config, Dict) - optimizer = self._config_loader.build_optimizer(tunables=self.tunables, - service=self._parent_service, - config=class_config, - global_config=self.global_config) + optimizer = self._config_loader.build_optimizer( + tunables=self.tunables, + service=self._parent_service, + config=class_config, + global_config=self.global_config, + ) return optimizer def _load_storage(self, args_storage: Optional[str]) -> Storage: """ - Instantiate the Storage object from JSON file provided in the --storage - command line parameter. If omitted, create an ephemeral in-memory SQL - storage instead. + Instantiate the Storage object from JSON file provided in the --storage command + line parameter. + + If omitted, create an ephemeral in-memory SQL storage instead. """ if args_storage is None: # pylint: disable=import-outside-toplevel from mlos_bench.storage.sql.storage import SqlStorage - return SqlStorage(service=self._parent_service, - config={ - "drivername": "sqlite", - "database": ":memory:", - "lazy_schema_create": True, - }) + + return SqlStorage( + service=self._parent_service, + config={ + "drivername": "sqlite", + "database": ":memory:", + "lazy_schema_create": True, + }, + ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, Dict) - storage = self._config_loader.build_storage(service=self._parent_service, - config=class_config, - global_config=self.global_config) + storage = self._config_loader.build_storage( + service=self._parent_service, config=class_config, global_config=self.global_config + ) return storage def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: """ Instantiate the Scheduler object from JSON file provided in the --scheduler command line parameter. + Create a simple synchronous single-threaded scheduler if omitted. """ # Set `teardown` for scheduler only to prevent conflicts with other configs. @@ -417,6 +484,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: if args_scheduler is None: # pylint: disable=import-outside-toplevel from mlos_bench.schedulers.sync_scheduler import SyncScheduler + return SyncScheduler( # All config values can be overridden from global config config={ diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 89ee6c9fd1..7d14cb68bb 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -2,34 +2,32 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base class for an interface between the benchmarking framework -and mlos_core optimizers. +"""Base class for an interface between the benchmarking framework and mlos_core +optimizers. """ import logging from abc import ABCMeta, abstractmethod -from distutils.util import strtobool # pylint: disable=deprecated-module - +from distutils.util import strtobool # pylint: disable=deprecated-module from types import TracebackType from typing import Dict, Optional, Sequence, Tuple, Type, Union -from typing_extensions import Literal from ConfigSpace import ConfigurationSpace +from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema -from mlos_bench.services.base_service import Service from mlos_bench.environments.status import Status +from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace +from mlos_bench.services.base_service import Service from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace _LOG = logging.getLogger(__name__) -class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes - """ - An abstract interface between the benchmarking framework and mlos_core optimizers. +class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes + """An abstract interface between the benchmarking framework and mlos_core + optimizers. """ # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json @@ -40,13 +38,16 @@ class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attr "start_with_defaults", } - def __init__(self, - tunables: TunableGroups, - config: dict, - global_config: Optional[dict] = None, - service: Optional[Service] = None): + def __init__( + self, + tunables: TunableGroups, + config: dict, + global_config: Optional[dict] = None, + service: Optional[Service] = None, + ): """ - Create a new optimizer for the given configuration space defined by the tunables. + Create a new optimizer for the given configuration space defined by the + tunables. Parameters ---------- @@ -68,19 +69,20 @@ def __init__(self, self._seed = int(config.get("seed", 42)) self._in_context = False - experiment_id = self._global_config.get('experiment_id') + experiment_id = self._global_config.get("experiment_id") self.experiment_id = str(experiment_id).strip() if experiment_id else None self._iter = 0 # If False, use the optimizer to suggest the initial configuration; # if True (default), use the already initialized values for the first iteration. self._start_with_defaults: bool = bool( - strtobool(str(self._config.pop('start_with_defaults', True)))) - self._max_iter = int(self._config.pop('max_suggestions', 100)) + strtobool(str(self._config.pop("start_with_defaults", True))) + ) + self._max_iter = int(self._config.pop("max_suggestions", 100)) - opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'}) + opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) self._opt_targets: Dict[str, Literal[1, -1]] = {} - for (opt_target, opt_dir) in opt_targets.items(): + for opt_target, opt_dir in opt_targets.items(): if opt_dir == "min": self._opt_targets[opt_target] = 1 elif opt_dir == "max": @@ -89,10 +91,9 @@ def __init__(self, raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") def _validate_json_config(self, config: dict) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -108,21 +109,20 @@ def __repr__(self) -> str: ) return f"{self.name}({opt_targets},config={self._config})" - def __enter__(self) -> 'Optimizer': - """ - Enter the optimizer's context. - """ + def __enter__(self) -> "Optimizer": + """Enter the optimizer's context.""" _LOG.debug("Optimizer START :: %s", self) assert not self._in_context self._in_context = True return self - def __exit__(self, ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the optimizer. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the optimizer.""" if ex_val is None: _LOG.debug("Optimizer END :: %s", self) else: @@ -157,15 +157,14 @@ def max_iterations(self) -> int: @property def seed(self) -> int: - """ - The random seed for the optimizer. - """ + """The random seed for the optimizer.""" return self._seed @property def start_with_defaults(self) -> bool: """ Return True if the optimizer should start with the default values. + Note: This parameter is mutable and will be reset to False after the defaults are first suggested. """ @@ -201,16 +200,16 @@ def config_space(self) -> ConfigurationSpace: @property def name(self) -> str: """ - The name of the optimizer. We save this information in - mlos_bench storage to track the source of each configuration. + The name of the optimizer. + + We save this information in mlos_bench storage to track the source of each + configuration. """ return self.__class__.__name__ @property - def targets(self) -> Dict[str, Literal['min', 'max']]: - """ - A dictionary of {target: direction} of optimization targets. - """ + def targets(self) -> Dict[str, Literal["min", "max"]]: + """A dictionary of {target: direction} of optimization targets.""" return { opt_target: "min" if opt_dir == 1 else "max" for (opt_target, opt_dir) in self._opt_targets.items() @@ -218,16 +217,18 @@ def targets(self) -> Dict[str, Literal['min', 'max']]: @property def supports_preload(self) -> bool: - """ - Return True if the optimizer supports pre-loading the data from previous experiments. + """Return True if the optimizer supports pre-loading the data from previous + experiments. """ return True @abstractmethod - def bulk_register(self, - configs: Sequence[dict], - scores: Sequence[Optional[Dict[str, TunableValue]]], - status: Optional[Sequence[Status]] = None) -> bool: + def bulk_register( + self, + configs: Sequence[dict], + scores: Sequence[Optional[Dict[str, TunableValue]]], + status: Optional[Sequence[Status]] = None, + ) -> bool: """ Pre-load the optimizer with the bulk data from previous experiments. @@ -245,8 +246,12 @@ def bulk_register(self, is_not_empty : bool True if there is data to register, false otherwise. """ - _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values", - len(configs or []), len(scores or []), len(status or [])) + _LOG.info( + "Update the optimizer with: %d configs, %d scores, %d status values", + len(configs or []), + len(scores or []), + len(status or []), + ) if len(configs or []) != len(scores or []): raise ValueError("Numbers of configs and scores do not match.") if status is not None and len(configs or []) != len(status or []): @@ -259,9 +264,8 @@ def bulk_register(self, def suggest(self) -> TunableGroups: """ - Generate the next suggestion. - Base class' implementation increments the iteration count - and returns the current values of the tunables. + Generate the next suggestion. Base class' implementation increments the + iteration count and returns the current values of the tunables. Returns ------- @@ -275,8 +279,12 @@ def suggest(self) -> TunableGroups: return self._tunables.copy() @abstractmethod - def register(self, tunables: TunableGroups, status: Status, - score: Optional[Dict[str, TunableValue]] = None) -> Optional[Dict[str, float]]: + def register( + self, + tunables: TunableGroups, + status: Status, + score: Optional[Dict[str, TunableValue]] = None, + ) -> Optional[Dict[str, float]]: """ Register the observation for the given configuration. @@ -297,18 +305,19 @@ def register(self, tunables: TunableGroups, status: Status, Benchmark scores extracted (and possibly transformed) from the dataframe that's being MINIMIZED. """ - _LOG.info("Iteration %d :: Register: %s = %s score: %s", - self._iter, tunables, status, score) + _LOG.info( + "Iteration %d :: Register: %s = %s score: %s", self._iter, tunables, status, score + ) if status.is_succeeded() == (score is None): # XOR raise ValueError("Status and score must be consistent.") return self._get_scores(status, score) - def _get_scores(self, status: Status, - scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]] - ) -> Optional[Dict[str, float]]: + def _get_scores( + self, status: Status, scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]] + ) -> Optional[Dict[str, float]]: """ - Extract a scalar benchmark score from the dataframe. - Change the sign if we are maximizing. + Extract a scalar benchmark score from the dataframe. Change the sign if we are + maximizing. Parameters ---------- @@ -332,7 +341,7 @@ def _get_scores(self, status: Status, assert scores is not None target_metrics: Dict[str, float] = {} - for (opt_target, opt_dir) in self._opt_targets.items(): + for opt_target, opt_dir in self._opt_targets.items(): val = scores[opt_target] assert val is not None target_metrics[opt_target] = float(val) * opt_dir @@ -342,12 +351,15 @@ def _get_scores(self, status: Status, def not_converged(self) -> bool: """ Return True if not converged, False otherwise. + Base implementation just checks the iteration count. """ return self._iter < self._max_iter @abstractmethod - def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: + def get_best_observation( + self, + ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: """ Get the best observation so far. diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 1c974da957..cadd61fc9f 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -2,20 +2,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Base class for the optimization loop scheduling policies. -""" +"""Base class for the optimization loop scheduling policies.""" import json import logging -from datetime import datetime - from abc import ABCMeta, abstractmethod +from datetime import datetime from types import TracebackType from typing import Any, Dict, Optional, Tuple, Type -from typing_extensions import Literal from pytz import UTC +from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment @@ -29,22 +26,23 @@ class Scheduler(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes - """ - Base class for the optimization loop scheduling policies. - """ - - def __init__(self, *, - config: Dict[str, Any], - global_config: Dict[str, Any], - environment: Environment, - optimizer: Optimizer, - storage: Storage, - root_env_config: str): + """Base class for the optimization loop scheduling policies.""" + + def __init__( + self, + *, + config: Dict[str, Any], + global_config: Dict[str, Any], + environment: Environment, + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + ): """ - Create a new instance of the scheduler. The constructor of this - and the derived classes is called by the persistence service - after reading the class JSON configuration. Other objects like - the Environment and Optimizer are provided by the Launcher. + Create a new instance of the scheduler. The constructor of this and the derived + classes is called by the persistence service after reading the class JSON + configuration. Other objects like the Environment and Optimizer are provided by + the Launcher. Parameters ---------- @@ -62,8 +60,9 @@ def __init__(self, *, Path to the root environment configuration. """ self.global_config = global_config - config = merge_parameters(dest=config.copy(), source=global_config, - required_keys=["experiment_id", "trial_id"]) + config = merge_parameters( + dest=config.copy(), source=global_config, required_keys=["experiment_id", "trial_id"] + ) self._validate_json_config(config) self._experiment_id = config["experiment_id"].strip() @@ -74,7 +73,9 @@ def __init__(self, *, self._trial_config_repeat_count = int(config.get("trial_config_repeat_count", 1)) if self._trial_config_repeat_count <= 0: - raise ValueError(f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}") + raise ValueError( + f"Invalid trial_config_repeat_count: {self._trial_config_repeat_count}" + ) self._do_teardown = bool(config.get("teardown", True)) @@ -88,10 +89,9 @@ def __init__(self, *, _LOG.debug("Scheduler instantiated: %s :: %s", self, config) def _validate_json_config(self, config: dict) -> None: - """ - Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, @@ -107,7 +107,9 @@ def trial_config_repeat_count(self) -> int: @property def max_trials(self) -> int: - """Gets the maximum number of trials to run for a given experiment, or -1 for no limit.""" + """Gets the maximum number of trials to run for a given experiment, or -1 for no + limit. + """ return self._max_trials def __repr__(self) -> str: @@ -121,10 +123,8 @@ def __repr__(self) -> str: """ return self.__class__.__name__ - def __enter__(self) -> 'Scheduler': - """ - Enter the scheduler's context. - """ + def __enter__(self) -> "Scheduler": + """Enter the scheduler's context.""" _LOG.debug("Scheduler START :: %s", self) assert self.experiment is None self.environment.__enter__() @@ -143,13 +143,13 @@ def __enter__(self) -> 'Scheduler': ).__enter__() return self - def __exit__(self, - ex_type: Optional[Type[BaseException]], - ex_val: Optional[BaseException], - ex_tb: Optional[TracebackType]) -> Literal[False]: - """ - Exit the context of the scheduler. - """ + def __exit__( + self, + ex_type: Optional[Type[BaseException]], + ex_val: Optional[BaseException], + ex_tb: Optional[TracebackType], + ) -> Literal[False]: + """Exit the context of the scheduler.""" if ex_val is None: _LOG.debug("Scheduler END :: %s", self) else: @@ -164,12 +164,14 @@ def __exit__(self, @abstractmethod def start(self) -> None: - """ - Start the optimization loop. - """ + """Start the optimization loop.""" assert self.experiment is not None - _LOG.info("START: Experiment: %s Env: %s Optimizer: %s", - self.experiment, self.environment, self.optimizer) + _LOG.info( + "START: Experiment: %s Env: %s Optimizer: %s", + self.experiment, + self.environment, + self.optimizer, + ) if _LOG.isEnabledFor(logging.INFO): _LOG.info("Root Environment:\n%s", self.environment.pprint()) @@ -180,6 +182,7 @@ def start(self) -> None: def teardown(self) -> None: """ Tear down the environment. + Call it after the completion of the `.start()` in the scheduler context. """ assert self.experiment is not None @@ -187,17 +190,13 @@ def teardown(self) -> None: self.environment.teardown() def get_best_observation(self) -> Tuple[Optional[Dict[str, float]], Optional[TunableGroups]]: - """ - Get the best observation from the optimizer. - """ + """Get the best observation from the optimizer.""" (best_score, best_config) = self.optimizer.get_best_observation() _LOG.info("Env: %s best score: %s", self.environment, best_score) return (best_score, best_config) def load_config(self, config_id: int) -> TunableGroups: - """ - Load the existing tunable configuration from the storage. - """ + """Load the existing tunable configuration from the storage.""" assert self.experiment is not None tunable_values = self.experiment.load_tunable_config(config_id) tunables = self.environment.tunable_params.assign(tunable_values) @@ -208,9 +207,11 @@ def load_config(self, config_id: int) -> TunableGroups: def _schedule_new_optimizer_suggestions(self) -> bool: """ - Optimizer part of the loop. Load the results of the executed trials - into the optimizer, suggest new configurations, and add them to the queue. - Return True if optimization is not over, False otherwise. + Optimizer part of the loop. + + Load the results of the executed trials into the optimizer, suggest new + configurations, and add them to the queue. Return True if optimization is not + over, False otherwise. """ assert self.experiment is not None (trial_ids, configs, scores, status) = self.experiment.load(self._last_trial_id) @@ -226,33 +227,38 @@ def _schedule_new_optimizer_suggestions(self) -> bool: return not_done def schedule_trial(self, tunables: TunableGroups) -> None: - """ - Add a configuration to the queue of trials. - """ + """Add a configuration to the queue of trials.""" for repeat_i in range(1, self._trial_config_repeat_count + 1): - self._add_trial_to_queue(tunables, config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - "optimizer": self.optimizer.name, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults, - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(self.optimizer.targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - } - }) - - def _add_trial_to_queue(self, tunables: TunableGroups, - ts_start: Optional[datetime] = None, - config: Optional[Dict[str, Any]] = None) -> None: + self._add_trial_to_queue( + tunables, + config={ + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + "optimizer": self.optimizer.name, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults, + **{ + f"opt_{key}_{i}": val + for (i, opt_target) in enumerate(self.optimizer.targets.items()) + for (key, val) in zip(["target", "direction"], opt_target) + }, + }, + ) + + def _add_trial_to_queue( + self, + tunables: TunableGroups, + ts_start: Optional[datetime] = None, + config: Optional[Dict[str, Any]] = None, + ) -> None: """ Add a configuration to the queue of trials. + A wrapper for the `Experiment.new_trial` method. """ assert self.experiment is not None @@ -261,7 +267,9 @@ def _add_trial_to_queue(self, tunables: TunableGroups, def _run_schedule(self, running: bool = False) -> None: """ - Scheduler part of the loop. Check for pending trials in the queue and run them. + Scheduler part of the loop. + + Check for pending trials in the queue and run them. """ assert self.experiment is not None for trial in self.experiment.pending_trials(datetime.now(UTC), running=running): @@ -270,6 +278,7 @@ def _run_schedule(self, running: bool = False) -> None: def not_done(self) -> bool: """ Check the stopping conditions. + By default, stop when the optimizer converges or max limit of trials reached. """ return self.optimizer.not_converged() and ( @@ -279,7 +288,9 @@ def not_done(self) -> bool: @abstractmethod def run_trial(self, trial: Storage.Trial) -> None: """ - Set up and run a single trial. Save the results in the storage. + Set up and run a single trial. + + Save the results in the storage. """ assert self.experiment is not None self._trial_count += 1 diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index d5a92fc30f..2b9c31c014 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -14,11 +14,10 @@ import pytest +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.launcher import Launcher -from mlos_bench.optimizers import OneShotOptimizer, MlosCoreOptimizer +from mlos_bench.optimizers import MlosCoreOptimizer, OneShotOptimizer from mlos_bench.os_environ import environ -from mlos_bench.config.schemas import ConfigSchema -from mlos_bench.util import path_join from mlos_bench.schedulers import SyncScheduler from mlos_bench.services.types import ( SupportsAuth, @@ -28,6 +27,7 @@ SupportsRemoteExec, ) from mlos_bench.tests import check_class_name +from mlos_bench.util import path_join if sys.version_info < (3, 10): from importlib_resources import files @@ -48,13 +48,13 @@ def config_paths() -> List[str]: """ return [ path_join(os.getcwd(), abs_path=True), - str(files('mlos_bench.config')), - str(files('mlos_bench.tests.config')), + str(files("mlos_bench.config")), + str(files("mlos_bench.tests.config")), ] # This is part of the minimal required args by the Launcher. -ENV_CONF_PATH = 'environments/mock/mock_env.jsonc' +ENV_CONF_PATH = "environments/mock/mock_env.jsonc" def _get_launcher(desc: str, cli_args: str) -> Launcher: @@ -63,81 +63,90 @@ def _get_launcher(desc: str, cli_args: str) -> Launcher: # variable so we use a separate variable. # See global_test_config.jsonc for more details. environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == 'win32': + if sys.platform == "win32": # Some env tweaks for platform compatibility. - environ['USER'] = environ['USERNAME'] + environ["USER"] = environ["USERNAME"] launcher = Launcher(description=desc, argv=cli_args.split()) # Check the basic parent service assert isinstance(launcher.service, SupportsConfigLoading) # built-in - assert isinstance(launcher.service, SupportsLocalExec) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in return launcher def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """Test that we get the defaults we expect when using minimal config arg + examples. """ - Test that we get the defaults we expect when using minimal config arg examples. - """ - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + ) launcher = _get_launcher(__name__, cli_args) # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' - assert launcher.teardown # defaults + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" + assert launcher.teardown # defaults # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" - assert check_class_name(launcher.environment, env_config['class']) + assert check_class_name(launcher.environment, env_config["class"]) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 1 # default - assert launcher.scheduler.max_trials == -1 # default + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default def test_launcher_args_parse_1(config_paths: List[str]) -> None: """ - Test that using multiple --globals arguments works and that multiple space - separated options to --config-paths works. + Test that using multiple --globals arguments works and that multiple space separated + options to --config-paths works. + Check $var expansion and Environment loading. """ # Here we have multiple paths following --config-paths and --service. - cli_args = '--config-paths ' + ' '.join(config_paths) + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' services/remote/mock/mock_remote_exec_service.jsonc' + \ - ' --scheduler schedulers/sync_scheduler.jsonc' + \ - f' --environment {ENV_CONF_PATH}' + \ - ' --globals globals/global_test_config.jsonc' + \ - ' --globals globals/global_test_extra_config.jsonc' \ - ' --test_global_value_2 from-args' + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + " --service services/remote/mock/mock_auth_service.jsonc" + + " services/remote/mock/mock_remote_exec_service.jsonc" + + " --scheduler schedulers/sync_scheduler.jsonc" + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + + " --globals globals/global_test_extra_config.jsonc" + " --test_global_value_2 from-args" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. - assert launcher.global_config['experiment_id'] == 'MockExperiment' - assert launcher.global_config['testVmName'] == 'MockExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" # Check that the second --globals file is loaded. - assert launcher.global_config['test_global_value'] == 'from-file' + assert launcher.global_config["test_global_value"] == "from-file" # Check overriding values in a file from the command line. - assert launcher.global_config['test_global_value_2'] == 'from-args' + assert launcher.global_config["test_global_value_2"] == "from-args" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) @@ -146,68 +155,78 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. assert launcher.optimizer.tunable_params.is_defaults() - assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 3 # from the custom sync_scheduler.jsonc config + assert ( + launcher.scheduler.trial_config_repeat_count == 3 + ) # from the custom sync_scheduler.jsonc config assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: - """ - Test multiple --config-path instances, --config file vs --arg, --var=val + """Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - config_file = 'cli/test-cli-config.jsonc' - globals_file = 'globals/global_test_config.jsonc' + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" # Here we have multiple --config-path and --service args, each with their own path. - cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ - f' --config {config_file}' + \ - ' --service services/remote/mock/mock_auth_service.jsonc' + \ - ' --service services/remote/mock/mock_remote_exec_service.jsonc' + \ - f' --globals {globals_file}' + \ - ' --experiment_id MockeryExperiment' + \ - ' --no-teardown' + \ - ' --random-init' + \ - ' --random-seed 1234' + \ - ' --trial-config-repeat-count 5' + \ - ' --max_trials 200' + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + " --service services/remote/mock/mock_auth_service.jsonc" + + " --service services/remote/mock/mock_remote_exec_service.jsonc" + + f" --globals {globals_file}" + + " --experiment_id MockeryExperiment" + + " --no-teardown" + + " --random-init" + + " --random-seed 1234" + + " --trial-config-repeat-count 5" + + " --max_trials 200" + ) launcher = _get_launcher(__name__, cli_args) # Check some additional features of the the parent service - assert isinstance(launcher.service, SupportsAuth) # from --service - assert isinstance(launcher.service, SupportsFileShareOps) # from --config - assert isinstance(launcher.service, SupportsRemoteExec) # from --service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. - assert launcher.global_config['experiment_id'] == 'MockeryExperiment' - assert launcher.global_config['testVmName'] == 'MockeryExperiment-vm' + assert launcher.global_config["experiment_id"] == "MockeryExperiment" + assert launcher.global_config["testVmName"] == "MockeryExperiment-vm" # Check that secondary expansion also works. - assert launcher.global_config['testVnetName'] == 'MockeryExperiment-vm-vnet' + assert launcher.global_config["testVnetName"] == "MockeryExperiment-vm-vnet" # Check that we can expand a $var in a config file that references an environment variable. - assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) \ - == path_join(os.getcwd(), "foo", abs_path=True) - assert launcher.global_config["varWithEnvVarRef"] == f'user:{getuser()}' + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert not launcher.teardown config = launcher.config_loader.load_config(config_file, ConfigSchema.CLI) - assert launcher.config_loader.config_paths == [path_join(path, abs_path=True) for path in config_paths + config['config_path']] + assert launcher.config_loader.config_paths == [ + path_join(path, abs_path=True) for path in config_paths + config["config_path"] + ] # Check that the environment that got loaded looks to be of the right type. - env_config_file = config['environment'] + env_config_file = config["environment"] env_config = launcher.config_loader.load_config(env_config_file, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.environment, env_config['class']) + assert check_class_name(launcher.environment, env_config["class"]) # Check that the optimizer looks right. assert isinstance(launcher.optimizer, MlosCoreOptimizer) - opt_config_file = config['optimizer'] + opt_config_file = config["optimizer"] opt_config = launcher.config_loader.load_config(opt_config_file, ConfigSchema.OPTIMIZER) globals_file_config = launcher.config_loader.load_config(globals_file, ConfigSchema.GLOBALS) # The actual global_config gets overwritten as a part of processing, so to test # this we read the original value out of the source files. - orig_max_iters = globals_file_config.get('max_suggestions', opt_config.get('config', {}).get('max_suggestions', 100)) - assert launcher.optimizer.max_iterations \ - == orig_max_iters \ - == launcher.global_config['max_suggestions'] + orig_max_iters = globals_file_config.get( + "max_suggestions", opt_config.get("config", {}).get("max_suggestions", 100) + ) + assert ( + launcher.optimizer.max_iterations + == orig_max_iters + == launcher.global_config["max_suggestions"] + ) # Check that the optimizer got initialized with random values instead of the defaults. # Note: the environment doesn't get updated until suggest() is called to @@ -220,11 +239,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args + assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. - assert config['random_seed'] == 42 + assert config["random_seed"] == 42 # TODO: This isn't actually respected yet because the `--random-init` only # applies to a temporary Optimizer used to populate the initial values via # random sampling. @@ -232,16 +251,16 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: def test_launcher_args_parse_3(config_paths: List[str]) -> None: - """ - Check that cli file values take precedence over other values. - """ - config_file = 'cli/test-cli-config.jsonc' - globals_file = 'globals/global_test_config.jsonc' + """Check that cli file values take precedence over other values.""" + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" # Here we don't override values in test-cli-config with cli args but ensure that # those take precedence over other config files. - cli_args = ' '.join([f"--config-path {config_path}" for config_path in config_paths]) + \ - f' --config {config_file}' + \ - f' --globals {globals_file}' + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + f" --globals {globals_file}" + ) launcher = _get_launcher(__name__, cli_args) # Check that CLI file parameter overrides JSON config: @@ -250,5 +269,5 @@ def test_launcher_args_parse_3(config_paths: List[str]) -> None: assert launcher.scheduler.trial_config_repeat_count == 2 -if __name__ == '__main__': +if __name__ == "__main__": pytest.main([__file__, "-n0"]) From 2dee79ff396b6feda3fba0b1619f995f97efb1cc Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 22 Jul 2024 20:30:05 +0000 Subject: [PATCH 095/121] fixups --- mlos_bench/mlos_bench/launcher.py | 2 +- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index f9fd1ebf26..05f7d1b544 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -98,7 +98,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st cli_config_args = { key: val for (key, val) in config.items() - if (key not in args_dict or args_dict[key] is None) and key not in excluded_cli_args + if (args_dict.get(key) is None) and key not in excluded_cli_args } self.global_config = self._load_config( diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index c0119d7d25..dac362101b 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -91,9 +91,9 @@ def __init__( # pylint: disable=too-many-arguments _LOG.debug("Scheduler instantiated: %s :: %s", self, config) def _validate_json_config(self, config: dict) -> None: - """Reconstructs a basic json config that this class might have been - instantiated from in order to validate configs provided outside the - file loading mechanism. + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. """ json_config: dict = { "class": self.__class__.__module__ + "." + self.__class__.__name__, From 990745547a817a5278ad40db6db0983b1ab219a0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 17:44:29 +0000 Subject: [PATCH 096/121] apply comments --- mlos_bench/mlos_bench/launcher.py | 48 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 05f7d1b544..ae29735596 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -44,6 +44,7 @@ class Launcher: def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements + # pylint: disable=too-many-locals _LOG.info("Launch: %s", description) epilog = """ Additional --key=value pairs can be specified to augment or override @@ -185,9 +186,20 @@ def _parse_args( argv: Optional[List[str]], ) -> Tuple[argparse.Namespace, List[str], List[str]]: """Parse the command line arguments.""" - path_args = [] + class PathArgsTracker: + """Simple class to help track which arguments are paths.""" - parser.add_argument( + def __init__(self, parser: argparse.ArgumentParser): + self._parser = parser + self.path_args: List[str] = [] + + def add_argument(self, *args: Any, **kwargs: Any) -> None: + """Add an argument to the parser and track its destination.""" + self.path_args.append(self._parser.add_argument(*args, **kwargs).dest) + + path_args_tracker = PathArgsTracker(parser) + + path_args_tracker.add_argument( "--config", required=False, help=( @@ -198,15 +210,13 @@ def _parse_args( "for additional config examples for this and other arguments." ), ) - path_args.append("config") - parser.add_argument( + path_args_tracker.add_argument( "--log_file", "--log-file", required=False, help="Path to the log file. Use stdout if omitted.", ) - path_args.append("log_file") parser.add_argument( "--log_level", @@ -219,7 +229,7 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--config_path", "--config-path", "--config-paths", @@ -229,10 +239,8 @@ def _parse_args( required=False, help="One or more locations of JSON config files.", ) - path_args.append("config_path") - path_args.append("config_paths") - parser.add_argument( + path_args_tracker.add_argument( "--service", "--services", nargs="+", @@ -243,17 +251,14 @@ def _parse_args( "of the service(s) for environment(s) to use." ), ) - path_args.append("service") - path_args.append("services") - parser.add_argument( + path_args_tracker.add_argument( "--environment", required=False, help="Path to JSON file with the configuration of the benchmarking environment(s).", ) - path_args.append("environment") - parser.add_argument( + path_args_tracker.add_argument( "--optimizer", required=False, help=( @@ -261,7 +266,6 @@ def _parse_args( "a single trial with default (or specified in --tunable_values)." ), ) - path_args.append("optimizer") parser.add_argument( "--trial_config_repeat_count", @@ -274,7 +278,7 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--scheduler", required=False, help=( @@ -282,9 +286,8 @@ def _parse_args( "a single worker synchronous scheduler." ), ) - path_args.append("scheduler") - parser.add_argument( + path_args_tracker.add_argument( "--storage", required=False, help=( @@ -292,7 +295,6 @@ def _parse_args( "If omitted, use the ephemeral in-memory SQL storage." ), ) - path_args.append("storage") parser.add_argument( "--random_init", @@ -312,7 +314,7 @@ def _parse_args( help="Seed to use with --random_init", ) - parser.add_argument( + path_args_tracker.add_argument( "--tunable_values", "--tunable-values", nargs="+", @@ -324,9 +326,8 @@ def _parse_args( "is specified) or as default values for the first run in optimization." ), ) - path_args.append("tunable_values") - parser.add_argument( + path_args_tracker.add_argument( "--globals", nargs="+", action="extend", @@ -336,7 +337,6 @@ def _parse_args( "[private] parameters of the benchmarking environment." ), ) - path_args.append("globals") parser.add_argument( "--no_teardown", @@ -371,7 +371,7 @@ def _parse_args( argv = sys.argv[1:].copy() (args, args_rest) = parser.parse_known_args(argv) - return (args, path_args, args_rest) + return (args, path_args_tracker.path_args, args_rest) @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: From f4e9c3f50147aa9438a3f1412039f6b257e4f6e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 17:52:58 +0000 Subject: [PATCH 097/121] Ignore negative config_id from the scheduler schema validation --- mlos_bench/mlos_bench/launcher.py | 1 + mlos_bench/mlos_bench/schedulers/base_scheduler.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index ae29735596..1d51a1abf4 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -524,6 +524,7 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: # All config values can be overridden from global config config={ "experiment_id": "UNDEFINED - override from global config", + "config_id": -1, "trial_id": 0, "trial_config_repeat_count": 1, "teardown": self.teardown, diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index dac362101b..30805c189e 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -99,7 +99,13 @@ def _validate_json_config(self, config: dict) -> None: "class": self.__class__.__module__ + "." + self.__class__.__name__, } if config: - json_config["config"] = config + json_config["config"] = config.copy() + # The json schema does not allow for -1 as a valid value for config_id. + # As it is just a default placeholder value, and not required, we can + # remove it from the config copy prior to validation safely. + config_id = json_config["config"].get("config_id") + if config_id is not None and isinstance(config_id, int) and config_id < 0: + json_config["config"].pop("config_id") ConfigSchema.SCHEDULER.validate(json_config) @property From 70647dc885b97ef75100564b9d5d763f02f31fb3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 19:36:04 +0000 Subject: [PATCH 098/121] whitespace --- mlos_bench/mlos_bench/launcher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 1d51a1abf4..10e1731ba4 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -186,6 +186,7 @@ def _parse_args( argv: Optional[List[str]], ) -> Tuple[argparse.Namespace, List[str], List[str]]: """Parse the command line arguments.""" + class PathArgsTracker: """Simple class to help track which arguments are paths.""" From 4220aacdc872699ba72f2752fc005f94aa916661 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 19:37:37 +0000 Subject: [PATCH 099/121] revert unnecessary lineswap --- mlos_bench/mlos_bench/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 10e1731ba4..274b35d91e 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -525,8 +525,8 @@ def _load_scheduler(self, args_scheduler: Optional[str]) -> Scheduler: # All config values can be overridden from global config config={ "experiment_id": "UNDEFINED - override from global config", - "config_id": -1, "trial_id": 0, + "config_id": -1, "trial_config_repeat_count": 1, "teardown": self.teardown, }, From 5a327dda48fc08fd3e383eba2b526dfb7c969b36 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 22:05:57 +0000 Subject: [PATCH 100/121] apply suggestion --- mlos_bench/mlos_bench/launcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 02d002cea1..64f5035f3e 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -153,7 +153,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.root_env_config = self._config_loader.resolve_path(env_path) self.trial_runners: List[TrialRunner] = [] - for trial_runner_id in range(0, self.global_config["num_trial_runners"]): + for trial_runner_id in range(self.global_config["num_trial_runners"]): # Create a new global config for each Environment with a unique trial_runner_id for it. env_global_config = self.global_config.copy() env_global_config["trial_runner_id"] = trial_runner_id From 49087c4143c0dd83fa078424b1e810e5a03da374 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 22:12:23 +0000 Subject: [PATCH 101/121] formatting --- mlos_bench/mlos_bench/storage/base_storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 03b9b48a20..d0963569cd 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -356,7 +356,10 @@ def __init__( # pylint: disable=too-many-arguments self._config = config or {} def __repr__(self) -> str: - return f"{self._experiment_id}:{self._trial_id}:{self._tunable_config_id}:{self.trial_runner_id}" + return ( + f"{self._experiment_id}:{self._trial_id}:" + f"{self._tunable_config_id}:{self.trial_runner_id}" + ) @property def trial_id(self) -> int: From 0ac79aa3a168397b2541245096b8119468d3e594 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 22:34:38 +0000 Subject: [PATCH 102/121] wip: assigning new trial runner ids to old trials --- .../mlos_bench/schedulers/base_scheduler.py | 24 +++++++---- mlos_bench/mlos_bench/storage/base_storage.py | 41 ++++++++++++++++++- mlos_bench/mlos_bench/storage/sql/trial.py | 12 +++++- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 735de78905..ffe9b7f610 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -173,7 +173,14 @@ def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: TrialRunner """ if trial.trial_runner_id is None: - raise ValueError(f"Trial {trial} has no trial_runner_id") + new_trial_runner_id = self._atomic_get_and_increment_current_trial_runner_index() + _LOG.warning( + "Trial %s missing trial_runner_id. Assigning %d", + trial, + new_trial_runner_id, + ) + trial.add_new_config_data({"trial_runner_id": new_trial_runner_id}) + assert trial.trial_runner_id is not None return self._trial_runners[trial.trial_runner_id] def __repr__(self) -> str: @@ -293,6 +300,14 @@ def _schedule_new_optimizer_suggestions(self) -> bool: return not_done + def _atomic_get_and_increment_current_trial_runner_index(self) -> int: + current_trial_runner_index = self._current_trial_runner_idx + # Rotate which TrialRunner the Trial is assigned to. + # TODO: This could be a more sophisticated policy. + self._current_trial_runner_idx += 1 + self._current_trial_runner_idx %= len(self._trial_runners) + return current_trial_runner_index + def schedule_trial(self, tunables: TunableGroups) -> None: """Add a configuration to the queue of trials.""" # TODO: Alternative scheduling policies may prefer to expand repeats over @@ -312,7 +327,7 @@ def schedule_trial(self, tunables: TunableGroups) -> None: "optimizer": self.optimizer.name, "repeat_i": repeat_i, "trial_runner_id": self._trial_runners[ - self._current_trial_runner_idx + self._atomic_get_and_increment_current_trial_runner_index() ].trial_runner_id, "is_defaults": tunables.is_defaults(), **{ @@ -322,11 +337,6 @@ def schedule_trial(self, tunables: TunableGroups) -> None: }, }, ) - # Rotate which TrialRunner the Trial is assigned to. - # TODO: This could be a more sophisticated policy. - self._current_trial_runner_idx = (self._current_trial_runner_idx + 1) % len( - self._trial_runners - ) def _add_trial_to_queue( self, diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index d0963569cd..012893f5c7 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -8,7 +8,7 @@ from abc import ABCMeta, abstractmethod from datetime import datetime from types import TracebackType -from typing import Any, Dict, Iterator, List, Optional, Tuple, Type +from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union from typing_extensions import Literal @@ -409,6 +409,45 @@ def config(self, global_config: Optional[Dict[str, Any]] = None) -> Dict[str, An config.setdefault("trial_runner_id", trial_runner_id) return config + def add_new_config_data( + self, + new_config_data: Dict[str, Union[int, float, str]], + ) -> None: + """Add new config data to the trial. + + Parameters + ---------- + new_config_data : Dict[str, Union[int, float, str]] + New data to add (must not already exist for the trial). + + Raises + ------ + ValueError + If any of the data already exists. + """ + + for key, value in new_config_data.items(): + if key in self._config: + raise ValueError( + f"New config data {key}={value} already exists for trial {self}: " + f"{self._config[key]}" + ) + self._config[key] = value + self._save_new_config_data(new_config_data) + + @abstractmethod + def _save_new_config_data( + self, + new_config_data: Dict[str, Union[int, float, str]], + ) -> None: + """Save the new config data to the storage. + + Parameters + ---------- + new_config_data : Dict[str, Union[int, float, str]] + New data to add. + """ + @abstractmethod def update( self, diff --git a/mlos_bench/mlos_bench/storage/sql/trial.py b/mlos_bench/mlos_bench/storage/sql/trial.py index 5942912efd..d5fa80d593 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial.py +++ b/mlos_bench/mlos_bench/storage/sql/trial.py @@ -6,7 +6,7 @@ import logging from datetime import datetime -from typing import Any, Dict, List, Literal, Optional, Tuple +from typing import Any, Dict, List, Literal, Optional, Tuple, Union from sqlalchemy import Connection, Engine from sqlalchemy.exc import IntegrityError @@ -46,6 +46,16 @@ def __init__( # pylint: disable=too-many-arguments self._engine = engine self._schema = schema + def _save_new_config_data(self, new_config_data: Dict[str, Union[int, float, str]]) -> None: + with self._engine.begin() as conn: + self._experiment._save_params( + conn, + self._schema.trial_param, + new_config_data, + exp_id=self._experiment_id, + trial_id=self._trial_id, + ) + def update( self, status: Status, From 094cbf63bc2b7a9107a5f026917586572b376687 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 19:16:17 +0000 Subject: [PATCH 103/121] format --- mlos_bench/mlos_bench/storage/base_storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 29e9eef8a1..11a4fc8135 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -451,7 +451,8 @@ def add_new_config_data( self, new_config_data: Dict[str, Union[int, float, str]], ) -> None: - """Add new config data to the trial. + """ + Add new config data to the trial. Parameters ---------- @@ -478,7 +479,8 @@ def _save_new_config_data( self, new_config_data: Dict[str, Union[int, float, str]], ) -> None: - """Save the new config data to the storage. + """ + Save the new config data to the storage. Parameters ---------- From 6f54b21df8e6b46eb4f073ed5454f8ddc2086657 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 19:55:05 +0000 Subject: [PATCH 104/121] refactor to allow easier scheduling overrides --- .../mlos_bench/schedulers/base_scheduler.py | 89 ++++++++++++++----- mlos_bench/mlos_bench/storage/sql/schema.py | 3 +- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 7c44103ab8..86e3f86be2 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -139,7 +139,7 @@ def root_environment(self) -> Environment: """ Gets the root (prototypical) Environment from the first TrialRunner. - Note: This all TrialRunners have the same Environment config and are made + Note: All TrialRunners have the same Environment config and are made unique by their use of the unique trial_runner_id assigned to each TrialRunner's Environment's global_config. """ @@ -165,6 +165,70 @@ def storage(self) -> Storage: """Gets the Storage.""" return self._storage + def assign_trial_runner( + self, + trial: Storage.Trial, + trial_runner: Optional[TrialRunner] = None, + ) -> TrialRunner: + """ + Assigns a TrialRunner to the given Trial. + + The base class implements a simple round-robin scheduling algorithm. + + Subclasses can override this method to implement a more sophisticated policy. + For instance: + + ```python + def assign_trial_runner( + self, + trial: Storage.Trial, + trial_runner: Optional[TrialRunner] = None, + ) -> TrialRunner: + if trial_runner is None: + # Implement a more sophisticated policy here. + # For example, to assign the Trial to the TrialRunner with the least + # number of running Trials. + # Or assign the Trial to the TrialRunner that hasn't executed this + # TunableValues Config yet. + trial_runner = ... + # Call the base class method to assign the TrialRunner in the Trial's metadata. + return super().assign_trial_runner(trial, trial_runner) + ... + ``` + + Parameters + ---------- + trial : Storage.Trial + The trial to assign a TrialRunner to. + trial_runner : Optional[TrialRunner] + The ID of the TrialRunner to assign to the given Trial. + + Returns + ------- + TrialRunner + The assigned TrialRunner. + """ + assert ( + trial.trial_runner_id is None + ), f"Trial {trial} already has a TrialRunner assigned: {trial.trial_runner_id}" + if trial_runner is None: + # Basic round-robin trial runner assignment policy: + # fetch and increment the current TrialRunner index. + # Override in the subclass for a more sophisticated policy. + trial_runner_id = self._current_trial_runner_idx + self._current_trial_runner_idx += 1 + self._current_trial_runner_idx %= len(self._trial_runners) + + trial_runner_id = self._atomic_get_and_increment_current_trial_runner_index() + _LOG.info( + "Trial %s missing trial_runner_id. Assigning %d via basic round-robin policy.", + trial, + trial_runner_id, + ) + trial_runner = self._trial_runners[trial_runner_id] + trial.add_new_config_data({"trial_runner_id": trial_runner.trial_runner_id}) + return trial_runner + def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: """ Gets the TrialRunner associated with the given Trial. @@ -179,13 +243,7 @@ def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: TrialRunner """ if trial.trial_runner_id is None: - new_trial_runner_id = self._atomic_get_and_increment_current_trial_runner_index() - _LOG.warning( - "Trial %s missing trial_runner_id. Assigning %d", - trial, - new_trial_runner_id, - ) - trial.add_new_config_data({"trial_runner_id": new_trial_runner_id}) + self.assign_trial_runner(trial, trial_runner=None) assert trial.trial_runner_id is not None return self._trial_runners[trial.trial_runner_id] @@ -306,14 +364,6 @@ def _schedule_new_optimizer_suggestions(self) -> bool: return not_done - def _atomic_get_and_increment_current_trial_runner_index(self) -> int: - current_trial_runner_index = self._current_trial_runner_idx - # Rotate which TrialRunner the Trial is assigned to. - # TODO: This could be a more sophisticated policy. - self._current_trial_runner_idx += 1 - self._current_trial_runner_idx %= len(self._trial_runners) - return current_trial_runner_index - def schedule_trial(self, tunables: TunableGroups) -> None: """Add a configuration to the queue of trials.""" # TODO: Alternative scheduling policies may prefer to expand repeats over @@ -332,9 +382,6 @@ def schedule_trial(self, tunables: TunableGroups) -> None: # prevented). "optimizer": self.optimizer.name, "repeat_i": repeat_i, - "trial_runner_id": self._trial_runners[ - self._atomic_get_and_increment_current_trial_runner_index() - ].trial_runner_id, "is_defaults": tunables.is_defaults(), **{ f"opt_{key}_{i}": val @@ -357,7 +404,9 @@ def _add_trial_to_queue( """ assert self.experiment is not None trial = self.experiment.new_trial(tunables, ts_start, config) - _LOG.info("QUEUE: Add new trial: %s", trial) + # Select a TrialRunner based on the trial's metadata. + trial_runner = self.assign_trial_runner(trial, trial_runner=None) + _LOG.info("QUEUE: Added new trial: %s (assigned to %s)", trial, trial_runner) def _run_schedule(self, running: bool = False) -> None: """ diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 15459242c7..431cfe1bb1 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -148,7 +148,8 @@ def __init__(self, engine: Engine): # Values of additional non-tunable parameters of the trial, # e.g., scheduled execution time, VM name / location, number of repeats, etc. - # In particular, the trial_runner_id is stored here (in part to avoid updating the trial table schema). + # In particular, the trial_runner_id is stored here (in part to avoid + # updating the trial table schema). self.trial_param = Table( "trial_param", self._meta, From 0ac19aec09bbd3b97f31eed9e53b938713d56971 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 19:58:55 +0000 Subject: [PATCH 105/121] reformat --- mlos_bench/mlos_bench/schedulers/trial_runner.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py index 006282e214..a8da73dc26 100644 --- a/mlos_bench/mlos_bench/schedulers/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -114,9 +114,8 @@ def run_trial( # TODO: start background status polling of the environments in the event loop. - (status, timestamp, results) = ( - self.environment.run() - ) # Block and wait for the final result. + # Block and wait for the final result. + (status, timestamp, results) = self.environment.run() _LOG.info("TrialRunner Results: %s :: %s\n%s", trial.tunables, status, results) # In async mode (TODO), poll the environment for status and telemetry From eab3c6e811808dcb127f0bfa3c335dccaae5f643 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 20:15:55 +0000 Subject: [PATCH 106/121] comments --- mlos_bench/mlos_bench/launcher.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index f65e8febca..e0e9275731 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -484,8 +484,6 @@ def _init_tunable_values( ).suggest() _LOG.debug("Init tunables: random = %s", tunables) - # TODO: should we assign the same or different tunables for all TrialRunner Environments? - if args_tunables is not None: for data_file in args_tunables: values = self._config_loader.load_config(data_file, ConfigSchema.TUNABLE_VALUES) From b1fa8c2b54000e0258a57be90611a07295d3b07c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 20:24:06 +0000 Subject: [PATCH 107/121] tweaks --- .../mlos_bench/schedulers/base_scheduler.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 86e3f86be2..b312f07480 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -165,7 +165,7 @@ def storage(self) -> Storage: """Gets the Storage.""" return self._storage - def assign_trial_runner( + def _assign_trial_runner( self, trial: Storage.Trial, trial_runner: Optional[TrialRunner] = None, @@ -218,14 +218,12 @@ def assign_trial_runner( trial_runner_id = self._current_trial_runner_idx self._current_trial_runner_idx += 1 self._current_trial_runner_idx %= len(self._trial_runners) - - trial_runner_id = self._atomic_get_and_increment_current_trial_runner_index() + trial_runner = self._trial_runners[trial_runner_id] _LOG.info( - "Trial %s missing trial_runner_id. Assigning %d via basic round-robin policy.", + "Trial %s missing trial_runner_id. Assigning %s via basic round-robin policy.", trial, - trial_runner_id, + trial_runner, ) - trial_runner = self._trial_runners[trial_runner_id] trial.add_new_config_data({"trial_runner_id": trial_runner.trial_runner_id}) return trial_runner @@ -243,7 +241,7 @@ def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: TrialRunner """ if trial.trial_runner_id is None: - self.assign_trial_runner(trial, trial_runner=None) + self._assign_trial_runner(trial, trial_runner=None) assert trial.trial_runner_id is not None return self._trial_runners[trial.trial_runner_id] @@ -350,8 +348,6 @@ def _schedule_new_optimizer_suggestions(self) -> bool: over, False otherwise. """ assert self.experiment is not None - # FIXME: In async mode, trial_ids may be returned out of order, so we may - # need to adjust this fetching logic. (trial_ids, configs, scores, status) = self.experiment.load(self._last_trial_id) _LOG.info("QUEUE: Update the optimizer with trial results: %s", trial_ids) self.optimizer.bulk_register(configs, scores, status) @@ -405,7 +401,9 @@ def _add_trial_to_queue( assert self.experiment is not None trial = self.experiment.new_trial(tunables, ts_start, config) # Select a TrialRunner based on the trial's metadata. - trial_runner = self.assign_trial_runner(trial, trial_runner=None) + # TODO: May want to further split this in the future to support scheduling a + # batch of new trials. + trial_runner = self._assign_trial_runner(trial, trial_runner=None) _LOG.info("QUEUE: Added new trial: %s (assigned to %s)", trial, trial_runner) def _run_schedule(self, running: bool = False) -> None: From a48e83184f3b8030d89852d8111338fec3645f88 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Sep 2024 21:52:37 +0000 Subject: [PATCH 108/121] wip --- .../mlos_bench/tests/storage/sql/fixtures.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 8a9065e436..1b987f1b78 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -14,6 +14,7 @@ from mlos_bench.environments.status import Status from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.sync_scheduler import SyncScheduler from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests import SEED @@ -110,9 +111,14 @@ def _dummy_run_exp( exp: SqlStorage.Experiment, tunable_name: Optional[str], ) -> SqlStorage.Experiment: + # pylint: disable=too-many-locals """Generates data by doing a simulated run of the given experiment.""" # Add some trials to that experiment. - # Note: we're just fabricating some made up function for the ML libraries to try and learn. + + # TODO: Add MockEnv (from conftest fixtures) to simulate the Environment to "run". + # TODO: Add MockOptimizer (local fixture?) to suggest tunables. + # TODO: Add SyncScheduler (new fixture) to run them? + base_score = 10.0 if tunable_name: tunable = exp.tunables.get_tunable(tunable_name)[0] @@ -130,6 +136,21 @@ def _dummy_run_exp( # "start_with_defaults": True, }, ) + scheduler = SyncScheduler( + # All config values can be overridden from global config + config={ + "experiment_id": "UNDEFINED - override from global config", + "trial_id": 0, + "config_id": -1, + "trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT, + }, + global_config=self.global_config, + trial_runners=self.trial_runners, + optimizer=self.optimizer, + storage=self.storage, + root_env_config=self.root_env_config, + ) + assert opt.start_with_defaults for config_i in range(CONFIG_COUNT): tunables = opt.suggest() @@ -145,6 +166,7 @@ def _dummy_run_exp( }, }, ) + # TODO: scheduler trial on a runner if exp.tunables: assert trial.tunable_config_id == config_i + 1 else: From 6557291b102d4776f5e6ce4a3d9790bd4e74a05a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Sep 2024 19:58:23 +0000 Subject: [PATCH 109/121] Adding status() output to MockEnv --- .../mlos_bench/environments/mock_env.py | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 765deb05b3..f9b33e6970 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -7,7 +7,7 @@ import logging import random from datetime import datetime -from typing import Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import numpy @@ -66,6 +66,20 @@ def __init__( # pylint: disable=too-many-arguments self._metrics = self.config.get("mock_env_metrics", ["score"]) self._is_ready = True + def _produce_metrics(self) -> Dict[str, TunableValue]: + # Simple convex function of all tunable parameters. + score = numpy.mean( + numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params]) + ) + + # Add noise and shift the benchmark value from [0, 1] to a given range. + noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0 + score = numpy.clip(score + noise, 0, 1) + if self._range: + score = self._range[0] + score * (self._range[1] - self._range[0]) + + return {metric: score for metric in self._metrics} + def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: """ Produce mock benchmark data for one experiment. @@ -83,18 +97,29 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: if not status.is_ready(): return result - # Simple convex function of all tunable parameters. - score = numpy.mean( - numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params]) - ) + return (Status.SUCCEEDED, timestamp, self._produce_metrics()) - # Add noise and shift the benchmark value from [0, 1] to a given range. - noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0 - score = numpy.clip(score + noise, 0, 1) - if self._range: - score = self._range[0] + score * (self._range[1] - self._range[0]) + def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: + """ + Produce mock benchmark status telemetry for one experiment. - return (Status.SUCCEEDED, timestamp, {metric: score for metric in self._metrics}) + Returns + ------- + Tuple[Status, datetime, List[Tuple[datetime, str, Any]]] + 3-tuple of (Status, timestamp, output) values, where `output` is a dict + with the results or None if the status is not COMPLETED. + The keys of the `output` dict are the names of the metrics + specified in the config; by default it's just one metric + named "score". All output metrics have the same value. + """ + (status, timestamp, _) = result = super().status() + if not status.is_ready(): + return result + return ( + Status.RUNNING, + timestamp, + [(timestamp, metric, score) for (metric, score) in self._produce_metrics().items()], + ) @staticmethod def _normalized(tunable: Tunable) -> float: From 2877df18b9a686037e7e6e8997ec8f68ac60d16b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 25 Sep 2024 19:43:22 +0000 Subject: [PATCH 110/121] expose root_env_config property --- mlos_bench/mlos_bench/storage/base_storage.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index d3e9b6583d..867c4e0bc0 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -215,6 +215,11 @@ def description(self) -> str: """Get the Experiment's description.""" return self._description + @property + def root_env_config(self) -> str: + """Get the Experiment's root Environment config file path.""" + return self._root_env_config + @property def tunables(self) -> TunableGroups: """Get the Experiment's tunables.""" From e3005a00f083ed077654e12733a7701319028abf Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 25 Sep 2024 19:43:46 +0000 Subject: [PATCH 111/121] refactor to use scheduler --- .../mlos_bench/tests/storage/conftest.py | 5 - .../mlos_bench/tests/storage/sql/fixtures.py | 148 ++++++++---------- 2 files changed, 63 insertions(+), 90 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index 52b0fdcd53..a143705282 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -15,11 +15,6 @@ exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage -exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials -exp_no_tunables_storage_with_trials = sql_storage_fixtures.exp_no_tunables_storage_with_trials -mixed_numerics_exp_storage_with_trials = ( - sql_storage_fixtures.mixed_numerics_exp_storage_with_trials -) exp_data = sql_storage_fixtures.exp_data exp_no_tunables_data = sql_storage_fixtures.exp_no_tunables_data mixed_numerics_exp_data = sql_storage_fixtures.mixed_numerics_exp_data diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 8a9065e436..37b0b44525 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -5,15 +5,14 @@ """Test fixtures for mlos_bench storage.""" from datetime import datetime -from random import random from random import seed as rand_seed -from typing import Generator, Optional +from typing import Generator import pytest -from pytz import UTC -from mlos_bench.environments.status import Status +from mlos_bench.environments.mock_env import MockEnv from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.sync_scheduler import SyncScheduler from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests import SEED @@ -107,22 +106,45 @@ def mixed_numerics_exp_storage( def _dummy_run_exp( + storage: SqlStorage, exp: SqlStorage.Experiment, - tunable_name: Optional[str], -) -> SqlStorage.Experiment: - """Generates data by doing a simulated run of the given experiment.""" - # Add some trials to that experiment. - # Note: we're just fabricating some made up function for the ML libraries to try and learn. - base_score = 10.0 - if tunable_name: - tunable = exp.tunables.get_tunable(tunable_name)[0] - assert isinstance(tunable.default, int) - (tunable_min, tunable_max) = tunable.range - tunable_range = tunable_max - tunable_min +) -> ExperimentData: + """ + Generates data by doing a simulated run of the given experiment. + + Parameters + ---------- + storage : SqlStorage + The storage object to use. + exp : SqlStorage.Experiment + The experiment to "run". + Note: this particular object won't be updated, but a new one will be created + from its metadata. + + Returns + ------- + ExperimentData + The data generated by the simulated run. + """ + # pylint: disable=too-many-locals + rand_seed(SEED) + + env = MockEnv( + name="Test Env", + config={ + "tunable_params": list(exp.tunables.get_covariant_group_names()), + "mock_env_seed": SEED, + "mock_env_range": [60, 120], + "mock_env_metrics": ["score"], + }, + tunables=exp.tunables, + ) + opt = MockOptimizer( tunables=exp.tunables, config={ + "optimization_targets": exp.opt_targets, "seed": SEED, # This should be the default, so we leave it omitted for now to test the default. # But the test logic relies on this (e.g., trial 1 is config 1 is the @@ -130,97 +152,53 @@ def _dummy_run_exp( # "start_with_defaults": True, }, ) - assert opt.start_with_defaults - for config_i in range(CONFIG_COUNT): - tunables = opt.suggest() - for repeat_j in range(CONFIG_TRIAL_REPEAT_COUNT): - trial = exp.new_trial( - tunables=tunables.copy(), - config={ - "trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1, - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(exp.opt_targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - }, - }, - ) - if exp.tunables: - assert trial.tunable_config_id == config_i + 1 - else: - assert trial.tunable_config_id == 1 - if tunable_name: - tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value) - tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range - else: - tunable_value_norm = 0 - timestamp = datetime.now(UTC) - trial.update_telemetry( - status=Status.RUNNING, - timestamp=timestamp, - metrics=[ - (timestamp, "some-metric", tunable_value_norm + random() / 100), - ], - ) - trial.update( - Status.SUCCEEDED, - timestamp, - metrics={ - # Give some variance on the score. - # And some influence from the tunable value. - "score": tunable_value_norm - + random() / 100 - }, - ) - return exp - -@pytest.fixture -def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - return _dummy_run_exp(exp_storage, tunable_name="kernel_sched_latency_ns") - - -@pytest.fixture -def exp_no_tunables_storage_with_trials( - exp_no_tunables_storage: SqlStorage.Experiment, -) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - assert not exp_no_tunables_storage.tunables - return _dummy_run_exp(exp_no_tunables_storage, tunable_name=None) + scheduler = SyncScheduler( + # All config values can be overridden from global config + config={ + "experiment_id": exp.experiment_id, + "trial_id": exp.trial_id, + "config_id": -1, + "trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT, + "max_trials": CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT, + }, + global_config={}, + environment=env, + optimizer=opt, + storage=storage, + root_env_config=exp.root_env_config, + ) + # Add some trial data to that experiment by "running" it. + with scheduler: + scheduler.start() + scheduler.teardown() -@pytest.fixture -def mixed_numerics_exp_storage_with_trials( - mixed_numerics_exp_storage: SqlStorage.Experiment, -) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - tunable = next(iter(mixed_numerics_exp_storage.tunables))[0] - return _dummy_run_exp(mixed_numerics_exp_storage, tunable_name=tunable.name) + return storage.experiments[exp.experiment_id] @pytest.fixture def exp_data( storage: SqlStorage, - exp_storage_with_trials: SqlStorage.Experiment, + exp_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData.""" - return storage.experiments[exp_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, exp_storage) @pytest.fixture def exp_no_tunables_data( storage: SqlStorage, - exp_no_tunables_storage_with_trials: SqlStorage.Experiment, + exp_no_tunables_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData with no tunable configs.""" - return storage.experiments[exp_no_tunables_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, exp_no_tunables_storage) @pytest.fixture def mixed_numerics_exp_data( storage: SqlStorage, - mixed_numerics_exp_storage_with_trials: SqlStorage.Experiment, + mixed_numerics_exp_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData with mixed numerical tunable types.""" - return storage.experiments[mixed_numerics_exp_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, mixed_numerics_exp_storage) From 376461a85fb67de88812e19fa7e9d5d1878ab386 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 27 Sep 2024 13:37:35 -0500 Subject: [PATCH 112/121] wip --- .../mlos_bench/environments/mock_env.py | 19 +++++++++---------- .../mlos_bench/tests/storage/sql/fixtures.py | 1 - .../tests/storage/tunable_config_data_test.py | 9 ++++++++- mlos_viz/mlos_viz/tests/conftest.py | 1 - 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index f9b33e6970..a003c5b450 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -96,8 +96,8 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: (status, timestamp, _) = result = super().run() if not status.is_ready(): return result - - return (Status.SUCCEEDED, timestamp, self._produce_metrics()) + metrics = self._produce_metrics() + return (Status.SUCCEEDED, timestamp, metrics) def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: """ @@ -105,20 +105,19 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: Returns ------- - Tuple[Status, datetime, List[Tuple[datetime, str, Any]]] - 3-tuple of (Status, timestamp, output) values, where `output` is a dict - with the results or None if the status is not COMPLETED. - The keys of the `output` dict are the names of the metrics - specified in the config; by default it's just one metric - named "score". All output metrics have the same value. + (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + 3-tuple of (benchmark status, timestamp, telemetry) values. + `timestamp` is UTC time stamp of the status; it's current time by default. + `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. """ (status, timestamp, _) = result = super().status() if not status.is_ready(): return result + metrics = self._produce_metrics() return ( - Status.RUNNING, + Status.READY, timestamp, - [(timestamp, metric, score) for (metric, score) in self._produce_metrics().items()], + [(timestamp, metric, score) for (metric, score) in metrics.items()], ) @staticmethod diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 37b0b44525..4e92d9ab9d 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,7 +4,6 @@ # """Test fixtures for mlos_bench storage.""" -from datetime import datetime from random import seed as rand_seed from typing import Generator diff --git a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py index 755fc0205a..d40801c972 100644 --- a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py @@ -7,6 +7,8 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tests.storage import CONFIG_COUNT, CONFIG_TRIAL_REPEAT_COUNT + def test_trial_data_tunable_config_data( exp_data: ExperimentData, @@ -27,10 +29,15 @@ def test_trial_metadata(exp_data: ExperimentData) -> None: """Check expected return values for TunableConfigData metadata.""" assert exp_data.objectives == {"score": "min"} for trial_id, trial in exp_data.trials.items(): + assert trial.tunable_config_id == int(trial_id / CONFIG_TRIAL_REPEAT_COUNT) + 1 assert trial.metadata_dict == { + # Only the first CONFIG_TRIAL_REPEAT_COUNT set should be the defaults. + "is_defaults": str(trial_id <= CONFIG_TRIAL_REPEAT_COUNT), "opt_target_0": "score", "opt_direction_0": "min", - "trial_number": trial_id, + "optimizer": "MockOptimizer", + # FIXME: + "repeat_i": (trial_id % CONFIG_TRIAL_REPEAT_COUNT) + 1, } diff --git a/mlos_viz/mlos_viz/tests/conftest.py b/mlos_viz/mlos_viz/tests/conftest.py index 228609ba09..9299ebb377 100644 --- a/mlos_viz/mlos_viz/tests/conftest.py +++ b/mlos_viz/mlos_viz/tests/conftest.py @@ -11,7 +11,6 @@ storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage -exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials exp_data = sql_storage_fixtures.exp_data tunable_groups_config = tunable_groups_fixtures.tunable_groups_config From a61678ce5ef016c3bdddc90323b247330af7dfd4 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 27 Sep 2024 19:27:02 +0000 Subject: [PATCH 113/121] tweaks to metadata checks --- mlos_bench/mlos_bench/tests/storage/trial_data_test.py | 2 +- .../mlos_bench/tests/storage/tunable_config_data_test.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py index 9fe59b426b..ddd77b3acb 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py @@ -20,7 +20,7 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None: assert trial.trial_id == trial_id assert trial.tunable_config_id == expected_config_id assert trial.status == Status.SUCCEEDED - assert trial.metadata_dict["trial_number"] == trial_id + assert trial.metadata_dict["repeat_i"] == 1 assert list(trial.results_dict.keys()) == ["score"] assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1) assert isinstance(trial.ts_start, datetime) diff --git a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py index d40801c972..8b18e7c085 100644 --- a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py @@ -4,10 +4,12 @@ # """Unit tests for loading the TunableConfigData.""" +from math import ceil + from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.tests.storage import CONFIG_COUNT, CONFIG_TRIAL_REPEAT_COUNT +from mlos_bench.tests.storage import CONFIG_TRIAL_REPEAT_COUNT def test_trial_data_tunable_config_data( @@ -29,7 +31,7 @@ def test_trial_metadata(exp_data: ExperimentData) -> None: """Check expected return values for TunableConfigData metadata.""" assert exp_data.objectives == {"score": "min"} for trial_id, trial in exp_data.trials.items(): - assert trial.tunable_config_id == int(trial_id / CONFIG_TRIAL_REPEAT_COUNT) + 1 + assert trial.tunable_config_id == ceil(trial_id / CONFIG_TRIAL_REPEAT_COUNT) assert trial.metadata_dict == { # Only the first CONFIG_TRIAL_REPEAT_COUNT set should be the defaults. "is_defaults": str(trial_id <= CONFIG_TRIAL_REPEAT_COUNT), @@ -37,7 +39,7 @@ def test_trial_metadata(exp_data: ExperimentData) -> None: "opt_direction_0": "min", "optimizer": "MockOptimizer", # FIXME: - "repeat_i": (trial_id % CONFIG_TRIAL_REPEAT_COUNT) + 1, + "repeat_i": ((trial_id - 1) % CONFIG_TRIAL_REPEAT_COUNT) + 1, } From fbdf3a1b6a8fd57d9eb424f17832c1e76edb855c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 27 Sep 2024 14:54:30 -0500 Subject: [PATCH 114/121] comments --- mlos_bench/mlos_bench/environments/mock_env.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index a003c5b450..41b5a70aaf 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -115,6 +115,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: return result metrics = self._produce_metrics() return ( + # FIXME: this causes issues if we report RUNNING instead of READY Status.READY, timestamp, [(timestamp, metric, score) for (metric, score) in metrics.items()], From cfbe44010620bc3b40692bdb7339ccb2d22005a3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 27 Sep 2024 14:58:03 -0500 Subject: [PATCH 115/121] comments --- mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py index 8b18e7c085..2b9b7aa781 100644 --- a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py @@ -38,7 +38,6 @@ def test_trial_metadata(exp_data: ExperimentData) -> None: "opt_target_0": "score", "opt_direction_0": "min", "optimizer": "MockOptimizer", - # FIXME: "repeat_i": ((trial_id - 1) % CONFIG_TRIAL_REPEAT_COUNT) + 1, } From 9e433b715936894c9643f43cbc2344dd3b95c06d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 30 Sep 2024 20:38:29 +0000 Subject: [PATCH 116/121] separate run vs status random --- mlos_bench/mlos_bench/environments/mock_env.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 41b5a70aaf..6d3309f35b 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -61,19 +61,20 @@ def __init__( # pylint: disable=too-many-arguments service=service, ) seed = int(self.config.get("mock_env_seed", -1)) - self._random = random.Random(seed or None) if seed >= 0 else None + self._run_random = random.Random(seed or None) if seed >= 0 else None + self._status_random = random.Random(seed or None) if seed >= 0 else None self._range = self.config.get("mock_env_range") self._metrics = self.config.get("mock_env_metrics", ["score"]) self._is_ready = True - def _produce_metrics(self) -> Dict[str, TunableValue]: + def _produce_metrics(self, rand: Optional[random.Random]) -> Dict[str, TunableValue]: # Simple convex function of all tunable parameters. score = numpy.mean( numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params]) ) # Add noise and shift the benchmark value from [0, 1] to a given range. - noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0 + noise = rand.gauss(0, self._NOISE_VAR) if rand else 0 score = numpy.clip(score + noise, 0, 1) if self._range: score = self._range[0] + score * (self._range[1] - self._range[0]) @@ -96,7 +97,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: (status, timestamp, _) = result = super().run() if not status.is_ready(): return result - metrics = self._produce_metrics() + metrics = self._produce_metrics(self._run_random) return (Status.SUCCEEDED, timestamp, metrics) def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: @@ -113,7 +114,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: (status, timestamp, _) = result = super().status() if not status.is_ready(): return result - metrics = self._produce_metrics() + metrics = self._produce_metrics(self._status_random) return ( # FIXME: this causes issues if we report RUNNING instead of READY Status.READY, From 1b1ad690964660e3e10f02b5ce1928adfb51d144 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 1 Oct 2024 16:17:15 -0500 Subject: [PATCH 117/121] adjustments --- .../mlos_bench/tests/storage/__init__.py | 1 + .../mlos_bench/tests/storage/sql/fixtures.py | 36 ++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/__init__.py b/mlos_bench/mlos_bench/tests/storage/__init__.py index c3b294cae1..9f3819c35f 100644 --- a/mlos_bench/mlos_bench/tests/storage/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/__init__.py @@ -6,3 +6,4 @@ CONFIG_COUNT = 10 CONFIG_TRIAL_REPEAT_COUNT = 3 +TRIAL_RUNNER_COUNT = 5 diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 4e92d9ab9d..f8a18695a9 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -5,13 +5,14 @@ """Test fixtures for mlos_bench storage.""" from random import seed as rand_seed -from typing import Generator +from typing import Generator, List import pytest from mlos_bench.environments.mock_env import MockEnv from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler +from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests import SEED @@ -129,16 +130,24 @@ def _dummy_run_exp( rand_seed(SEED) - env = MockEnv( - name="Test Env", - config={ - "tunable_params": list(exp.tunables.get_covariant_group_names()), - "mock_env_seed": SEED, - "mock_env_range": [60, 120], - "mock_env_metrics": ["score"], - }, - tunables=exp.tunables, - ) + trial_runners: List[TrialRunner] = [] + global_config: dict = {} + for i in range(1, TRIAL_RUNNER_COUNT): + # Create a new global config for each Environment with a unique trial_runner_id for it. + global_config_copy = global_config.copy() + global_config_copy["trial_runner_id"] = i + env = MockEnv( + name="Test Env", + config={ + "tunable_params": list(exp.tunables.get_covariant_group_names()), + "mock_env_seed": SEED, + "mock_env_range": [60, 120], + "mock_env_metrics": ["score"], + }, + global_config=global_config_copy, + tunables=exp.tunables, + ) + trial_runners.append(TrialRunner(trial_runner_id=i, env=env)) opt = MockOptimizer( tunables=exp.tunables, @@ -150,6 +159,7 @@ def _dummy_run_exp( # default values for the tunable params) # "start_with_defaults": True, }, + global_config=global_config, ) scheduler = SyncScheduler( @@ -161,8 +171,8 @@ def _dummy_run_exp( "trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT, "max_trials": CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT, }, - global_config={}, - environment=env, + global_config=global_config, + trial_runners=trial_runners, optimizer=opt, storage=storage, root_env_config=exp.root_env_config, From 65de07855d665c5003c52c83e010f8258b101788 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 3 Oct 2024 23:05:07 +0000 Subject: [PATCH 118/121] move save_params to common for reuse by trial --- mlos_bench/mlos_bench/storage/sql/common.py | 23 +++++++++++++++--- .../mlos_bench/storage/sql/experiment.py | 24 ++++--------------- mlos_bench/mlos_bench/storage/sql/trial.py | 3 ++- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index da68669d9c..4f4ffc0910 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -3,16 +3,33 @@ # Licensed under the MIT License. # """Common SQL methods for accessing the stored benchmark data.""" -from typing import Dict, Optional +from typing import Any, Dict, Optional import pandas -from sqlalchemy import Engine, Integer, and_, func, select +from sqlalchemy import Connection, Engine, Integer, Table, and_, func, select from mlos_bench.environments.status import Status from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.base_trial_data import TrialData from mlos_bench.storage.sql.schema import DbSchema -from mlos_bench.util import utcify_nullable_timestamp, utcify_timestamp +from mlos_bench.util import nullable, utcify_nullable_timestamp, utcify_timestamp + + +def save_params( + conn: Connection, + table: Table, + params: Dict[str, Any], + **kwargs: Any, +) -> None: + if not params: + return + conn.execute( + table.insert(), + [ + {**kwargs, "param_id": key, "param_value": nullable(str, val)} + for (key, val) in params.items() + ], + ) def get_trials( diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 56a3f26049..abd9fe80e9 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -14,10 +14,11 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage +from mlos_bench.storage.sql.common import save_params from mlos_bench.storage.sql.schema import DbSchema from mlos_bench.storage.sql.trial import Trial from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.util import nullable, utcify_timestamp +from mlos_bench.util import utcify_timestamp _LOG = logging.getLogger(__name__) @@ -224,23 +225,6 @@ def _get_key_val(conn: Connection, table: Table, field: str, **kwargs: Any) -> D row._tuple() for row in cur_result.fetchall() # pylint: disable=protected-access ) - @staticmethod - def _save_params( - conn: Connection, - table: Table, - params: Dict[str, Any], - **kwargs: Any, - ) -> None: - if not params: - return - conn.execute( - table.insert(), - [ - {**kwargs, "param_id": key, "param_value": nullable(str, val)} - for (key, val) in params.items() - ], - ) - def pending_trials(self, timestamp: datetime, *, running: bool) -> Iterator[Storage.Trial]: timestamp = utcify_timestamp(timestamp, origin="local") _LOG.info("Retrieve pending trials for: %s @ %s", self._experiment_id, timestamp) @@ -302,7 +286,7 @@ def _get_config_id(self, conn: Connection, tunables: TunableGroups) -> int: config_id: int = conn.execute( self._schema.config.insert().values(config_hash=config_hash) ).inserted_primary_key[0] - self._save_params( + save_params( conn, self._schema.config_param, {tunable.name: tunable.value for (tunable, _group) in tunables}, @@ -338,7 +322,7 @@ def _new_trial( # Note: config here is the framework config, not the target # environment config (i.e., tunables). if config is not None: - self._save_params( + save_params( conn, self._schema.trial_param, config, diff --git a/mlos_bench/mlos_bench/storage/sql/trial.py b/mlos_bench/mlos_bench/storage/sql/trial.py index d5fa80d593..75cb65d0cc 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial.py +++ b/mlos_bench/mlos_bench/storage/sql/trial.py @@ -13,6 +13,7 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage +from mlos_bench.storage.sql.common import save_params from mlos_bench.storage.sql.schema import DbSchema from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.util import nullable, utcify_timestamp @@ -48,7 +49,7 @@ def __init__( # pylint: disable=too-many-arguments def _save_new_config_data(self, new_config_data: Dict[str, Union[int, float, str]]) -> None: with self._engine.begin() as conn: - self._experiment._save_params( + save_params( conn, self._schema.trial_param, new_config_data, From 67dedf222d175cc25c818bb9b3fe907ee54ba513 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 3 Oct 2024 23:05:20 +0000 Subject: [PATCH 119/121] add more tests --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 2 +- mlos_bench/mlos_bench/tests/storage/trial_data_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index f8a18695a9..54e337f7ae 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -16,7 +16,7 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests import SEED -from mlos_bench.tests.storage import CONFIG_COUNT, CONFIG_TRIAL_REPEAT_COUNT +from mlos_bench.tests.storage import CONFIG_COUNT, CONFIG_TRIAL_REPEAT_COUNT, TRIAL_RUNNER_COUNT from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name diff --git a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py index ea513eace2..77bc1eb243 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py @@ -21,6 +21,7 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None: assert trial.tunable_config_id == expected_config_id assert trial.status == Status.SUCCEEDED assert trial.metadata_dict["repeat_i"] == 1 + assert trial.metadata_dict["trial_runner_id"] == "1" assert list(trial.results_dict.keys()) == ["score"] assert trial.results_dict["score"] == pytest.approx(73.27, 0.01) assert isinstance(trial.ts_start, datetime) From 273ed9e202949da3729a496f221abc36ff4ea040 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 3 Oct 2024 23:10:30 +0000 Subject: [PATCH 120/121] docstings --- mlos_bench/mlos_bench/storage/sql/common.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 4f4ffc0910..6f3d594ac4 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -21,6 +21,19 @@ def save_params( params: Dict[str, Any], **kwargs: Any, ) -> None: + """Updates a set of (param_id, param_value) tuples in the given Table. + + Parameters + ---------- + conn : Connection + A connection to the backend database. + table : Table + The table to update. + params : Dict[str, Any] + The new (param_id, param_value) tuples to upsert to the Table. + **kwargs : Dict[str, Any] + Primary key info for the given table. + """ if not params: return conn.execute( From 110cb798a099ae2fd7a86dda22b49bbe28df50ad Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 14 Oct 2024 14:41:08 -0500 Subject: [PATCH 121/121] comments --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 54e337f7ae..f2eb92e6da 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -132,6 +132,7 @@ def _dummy_run_exp( trial_runners: List[TrialRunner] = [] global_config: dict = {} + # TODO: Make a utility function for this? for i in range(1, TRIAL_RUNNER_COUNT): # Create a new global config for each Environment with a unique trial_runner_id for it. global_config_copy = global_config.copy()