From c7151cf508bcfbc783a7d1868d73eb269a4f47cb Mon Sep 17 00:00:00 2001 From: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Date: Mon, 27 Mar 2023 08:24:46 -0700 Subject: [PATCH 1/4] Revert "Revert "[tune-telemetry] Tag searcher and scheduler types. (#33561)" (#33731)" This reverts commit c0545c267282744daaa6f2fa4f227feacfed647f. --- python/ray/air/_internal/usage.py | 99 +++++++++++++++++-- python/ray/train/base_trainer.py | 3 + python/ray/tune/schedulers/async_hyperband.py | 2 +- python/ray/tune/schedulers/hyperband.py | 2 +- .../tune/schedulers/median_stopping_rule.py | 2 +- python/ray/tune/schedulers/pbt.py | 4 +- python/ray/tune/schedulers/trial_scheduler.py | 13 ++- python/ray/tune/search/basic_variant.py | 2 + python/ray/tune/search/searcher.py | 5 + src/ray/protobuf/usage.proto | 6 ++ 10 files changed, 124 insertions(+), 14 deletions(-) diff --git a/python/ray/air/_internal/usage.py b/python/ray/air/_internal/usage.py index 9bd171756660a..8d6068260eb9f 100644 --- a/python/ray/air/_internal/usage.py +++ b/python/ray/air/_internal/usage.py @@ -1,9 +1,11 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Set, Union from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag if TYPE_CHECKING: from ray.train.trainer import BaseTrainer + from ray.tune.schedulers import TrialScheduler + from ray.tune.search import BasicVariantGenerator, Searcher AIR_TRAINERS = { "HorovodTrainer", @@ -18,16 +20,97 @@ "XGBoostTrainer", } +# searchers implemented by Ray Tune. +TUNE_SEARCHERS = { + "AxSearch", + "BayesOptSearch", + "TuneBOHB", + "DragonflySearch", + "HEBOSearch", + "HyperOptSearch", + "NevergradSearch", + "OptunaSearch", + "SkOptSearch", + "ZOOptSearch", +} + +# These are just wrappers around real searchers. +# We don't want to double tag in this case, otherwise, the real tag +# will be overwritten. +TUNE_SEARCHER_WRAPPERS = { + "ConcurrencyLimiter", + "Repeater", +} + +TUNE_SCHEDULERS = { + "FIFOScheduler", + "AsyncHyperBandScheduler", + "AsyncHyperBandScheduler", + "MedianStoppingRule", + "HyperBandScheduler", + "HyperBandForBOHB", + "PopulationBasedTraining", + "PopulationBasedTrainingReplay", + "PB2", + "ResourceChangingScheduler", +} + + +def _find_class_name(obj, allowed_module_path_prefix: str, whitelist: Set[str]): + """Find the class name of the object. If the object is not + under `allowed_module_path_prefix` or if its class is not in the whitelist, + return "Custom". + + Args: + obj: The object under inspection. + allowed_module_path_prefix: If the `obj`'s class is not under + the `allowed_module_path_prefix`, its class name will be anonymized. + whitelist: If the `obj`'s class is not in the `whitelist`, + it will be anonymized. + Returns: + The class name to be tagged with telemetry. + """ + module_path = obj.__module__ + cls_name = obj.__class__.__name__ + if module_path.startswith(allowed_module_path_prefix) and cls_name in whitelist: + return cls_name + else: + return "Custom" + def tag_air_trainer(trainer: "BaseTrainer"): from ray.train.trainer import BaseTrainer assert isinstance(trainer, BaseTrainer) - module_path = trainer.__module__ - if module_path.startswith("ray.train"): - trainer_name = trainer.__class__.__name__ - if trainer_name not in AIR_TRAINERS: - trainer_name = "Custom" - else: - trainer_name = "Custom" + trainer_name = _find_class_name(trainer, "ray.train", AIR_TRAINERS) record_extra_usage_tag(TagKey.AIR_TRAINER, trainer_name) + + +def tag_searcher(searcher: Union["BasicVariantGenerator", "Searcher"]): + from ray.tune.search import BasicVariantGenerator, Searcher + + if isinstance(searcher, BasicVariantGenerator): + # Note this could be highly inflated as all train flows are treated + # as using BasicVariantGenerator. + record_extra_usage_tag(TagKey.TUNE_SEARCHER, "BasicVariantGenerator") + elif isinstance(searcher, Searcher): + searcher_name = _find_class_name( + searcher, "ray.tune.search", TUNE_SEARCHERS.union(TUNE_SEARCHER_WRAPPERS) + ) + if searcher_name in TUNE_SEARCHER_WRAPPERS: + # ignore to avoid double tagging with wrapper name. + return + record_extra_usage_tag(TagKey.TUNE_SEARCHER, searcher_name) + else: + assert False, ( + "Not expecting a non-BasicVariantGenerator, " + "non-Searcher type passed in for `tag_searcher`." + ) + + +def tag_scheduler(scheduler: "TrialScheduler"): + from ray.tune.schedulers import TrialScheduler + + assert isinstance(scheduler, TrialScheduler) + scheduler_name = _find_class_name(scheduler, "ray.tune.schedulers", TUNE_SCHEDULERS) + record_extra_usage_tag(TagKey.TUNE_SCHEDULER, scheduler_name) diff --git a/python/ray/train/base_trainer.py b/python/ray/train/base_trainer.py index 93a85fe38669f..ecf433ab4ea09 100644 --- a/python/ray/train/base_trainer.py +++ b/python/ray/train/base_trainer.py @@ -56,6 +56,9 @@ class BaseTrainer(abc.ABC): Note: The base ``BaseTrainer`` class cannot be instantiated directly. Only one of its subclasses can be used. + Note to AIR developers: If a new AIR trainer is added, please update + `air/_internal/usage.py`. + **How does a trainer work?** - First, initialize the Trainer. The initialization runs locally, diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py index 42d8c90cceeab..3a3524b24e952 100644 --- a/python/ray/tune/schedulers/async_hyperband.py +++ b/python/ray/tune/schedulers/async_hyperband.py @@ -65,7 +65,7 @@ def __init__( if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" - FIFOScheduler.__init__(self) + super().__init__() self._reduction_factor = reduction_factor self._max_t = max_t diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py index 5569491bef8fd..a9cb803fbcb87 100644 --- a/python/ray/tune/schedulers/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -97,7 +97,7 @@ def __init__( if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" - FIFOScheduler.__init__(self) + super().__init__() self._eta = reduction_factor self._s_max_1 = int(np.round(np.log(max_t) / np.log(reduction_factor))) + 1 self._max_t_attr = max_t diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py index 2a4381557c05f..98c610dc96364 100644 --- a/python/ray/tune/schedulers/median_stopping_rule.py +++ b/python/ray/tune/schedulers/median_stopping_rule.py @@ -54,7 +54,7 @@ def __init__( min_time_slice: int = 0, hard_stop: bool = True, ): - FIFOScheduler.__init__(self) + super().__init__() self._stopped_trials = set() self._grace_period = grace_period self._min_samples_required = min_samples_required diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py index 11b5013ad0890..69634ef277adf 100644 --- a/python/ray/tune/schedulers/pbt.py +++ b/python/ray/tune/schedulers/pbt.py @@ -391,7 +391,7 @@ def __init__( if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." - FIFOScheduler.__init__(self) + super().__init__() self._metric = metric self._mode = mode self._metric_op = None @@ -1024,7 +1024,7 @@ def _load_policy(self, policy_file: str) -> Tuple[Dict, List[Tuple[int, Dict]]]: policy = [] last_new_tag = None last_old_conf = None - for (old_tag, new_tag, old_step, new_step, old_conf, new_conf) in reversed( + for old_tag, new_tag, old_step, new_step, old_conf, new_conf in reversed( raw_policy ): if last_new_tag and old_tag != last_new_tag: diff --git a/python/ray/tune/schedulers/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py index a7bfbf92ba8f6..d12ad43f04d5a 100644 --- a/python/ray/tune/schedulers/trial_scheduler.py +++ b/python/ray/tune/schedulers/trial_scheduler.py @@ -1,5 +1,6 @@ from typing import Dict, Optional +from ray.air._internal.usage import tag_scheduler from ray.tune.execution import trial_runner from ray.tune.result import DEFAULT_METRIC from ray.tune.experiment import Trial @@ -8,7 +9,11 @@ @DeveloperAPI class TrialScheduler: - """Interface for implementing a Trial Scheduler class.""" + """Interface for implementing a Trial Scheduler class. + + Note to Tune developers: If a new scheduler is added, please update + `air/_internal/usage.py`. + """ CONTINUE = "CONTINUE" #: Status for continuing trial execution PAUSE = "PAUSE" #: Status for pausing trial execution @@ -23,6 +28,9 @@ class TrialScheduler: _supports_buffered_results = True + def __init__(self): + tag_scheduler(self) + @property def metric(self): return self._metric @@ -127,6 +135,9 @@ def restore(self, checkpoint_path: str): class FIFOScheduler(TrialScheduler): """Simple scheduler that just runs trials in submission order.""" + def __init__(self): + super().__init__() + def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", trial: Trial): pass diff --git a/python/ray/tune/search/basic_variant.py b/python/ray/tune/search/basic_variant.py index 70bdaaf0c873f..6c8acafa2f117 100644 --- a/python/ray/tune/search/basic_variant.py +++ b/python/ray/tune/search/basic_variant.py @@ -7,6 +7,7 @@ import warnings import numpy as np +from ray.air._internal.usage import tag_searcher from ray.tune.error import TuneError from ray.tune.experiment.config_parser import _make_parser, _create_trial_from_spec from ray.tune.search.sample import np_random_generator, _BackwardsCompatibleNumpyRng @@ -294,6 +295,7 @@ def __init__( Union[int, "np_random_generator", np.random.RandomState] ] = None, ): + tag_searcher(self) self._trial_generator = [] self._iterators = [] self._trial_iter = None diff --git a/python/ray/tune/search/searcher.py b/python/ray/tune/search/searcher.py index 7cec8bb5d23ca..5f3d0dbbff3eb 100644 --- a/python/ray/tune/search/searcher.py +++ b/python/ray/tune/search/searcher.py @@ -5,6 +5,7 @@ import warnings from typing import Dict, Optional, List, Union, Any, TYPE_CHECKING +from ray.air._internal.usage import tag_searcher from ray.tune.search.util import _set_search_properties_backwards_compatible from ray.util.annotations import DeveloperAPI, PublicAPI from ray.util.debug import log_once @@ -32,6 +33,9 @@ class Searcher: Not all implementations support multi objectives. + Note to Tune developers: If a new searcher is added, please update + `air/_internal/usage.py`. + Args: metric: The training result objective value attribute. If list then list of training result objective value attributes @@ -76,6 +80,7 @@ def __init__( metric: Optional[str] = None, mode: Optional[str] = None, ): + tag_searcher(self) self._metric = metric self._mode = mode diff --git a/src/ray/protobuf/usage.proto b/src/ray/protobuf/usage.proto index d59c237d6108b..14a188b4d80d9 100644 --- a/src/ray/protobuf/usage.proto +++ b/src/ray/protobuf/usage.proto @@ -118,4 +118,10 @@ enum TagKey { // Name of AIR trainer, or "Custom" if user-defined. // Example: "TorchTrainer" AIR_TRAINER = 500; + // Name of Tune search algorithm or "Custom" if user-defined. + // Example: "TuneBOHB", "BasicVariantGenerator" + TUNE_SEARCHER = 501; + // Name of Tune scheduler algorithm or "Custom" if user-defined. + // Example: "FIFOScheduler" + TUNE_SCHEDULER = 502; } From b6e7366dd825af4ce66cfcb577de53e97d3329e8 Mon Sep 17 00:00:00 2001 From: xwjiang2010 Date: Sun, 26 Mar 2023 17:28:53 -0700 Subject: [PATCH 2/4] fix usage test. Signed-off-by: xwjiang2010 --- python/ray/tests/test_usage_stats.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/ray/tests/test_usage_stats.py b/python/ray/tests/test_usage_stats.py index ca5b0cb3e4a93..94fa921f468ea 100644 --- a/python/ray/tests/test_usage_stats.py +++ b/python/ray/tests/test_usage_stats.py @@ -1214,6 +1214,8 @@ def ready(self): "num_drivers": "0", "gcs_storage": gcs_storage_type, "dashboard_used": "False", + "tune_scheduler": "FIFOScheduler", + "tune_searcher": "BasicVariantGenerator", } assert payload["total_num_nodes"] == 1 assert payload["total_num_running_jobs"] == 1 From 5f8d6f913cc11a7d5343537ef5116599c20f843d Mon Sep 17 00:00:00 2001 From: xwjiang2010 Date: Mon, 27 Mar 2023 08:30:00 -0700 Subject: [PATCH 3/4] [no_early_kickoff] From b039eac8a4407ad96e0becf63920adc433007ab9 Mon Sep 17 00:00:00 2001 From: xwjiang2010 Date: Mon, 27 Mar 2023 09:18:05 -0700 Subject: [PATCH 4/4] for minimal test, remove tune tags. Signed-off-by: xwjiang2010 --- python/ray/tests/test_usage_stats.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/ray/tests/test_usage_stats.py b/python/ray/tests/test_usage_stats.py index 94fa921f468ea..4f53728e7aac9 100644 --- a/python/ray/tests/test_usage_stats.py +++ b/python/ray/tests/test_usage_stats.py @@ -1197,7 +1197,7 @@ def ready(self): payload["extra_usage_tags"]["num_actor_tasks"] = "0" payload["extra_usage_tags"]["num_normal_tasks"] = "0" payload["extra_usage_tags"]["num_drivers"] = "0" - assert payload["extra_usage_tags"] == { + expected_payload = { "extra_k1": "extra_v1", "_test1": "extra_v2", "_test2": "extra_v3", @@ -1214,9 +1214,11 @@ def ready(self): "num_drivers": "0", "gcs_storage": gcs_storage_type, "dashboard_used": "False", - "tune_scheduler": "FIFOScheduler", - "tune_searcher": "BasicVariantGenerator", } + if os.environ.get("RAY_MINIMAL") != "1": + expected_payload["tune_scheduler"] = "FIFOScheduler" + expected_payload["tune_searcher"] = "BasicVariantGenerator" + assert payload["extra_usage_tags"] == expected_payload assert payload["total_num_nodes"] == 1 assert payload["total_num_running_jobs"] == 1 if os.environ.get("RAY_MINIMAL") == "1":