From 67e9dc2817358ca2f8440e46759421faea2615e2 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:23:48 +0900 Subject: [PATCH 01/41] Init tuner for finding best lr --- mmengine/tuner/__init__.py | 6 ++ mmengine/tuner/_report_hook.py | 91 ++++++++++++++++++ mmengine/tuner/searcher,.py | 167 +++++++++++++++++++++++++++++++++ mmengine/tuner/tunner.py | 145 ++++++++++++++++++++++++++++ 4 files changed, 409 insertions(+) create mode 100644 mmengine/tuner/__init__.py create mode 100644 mmengine/tuner/_report_hook.py create mode 100644 mmengine/tuner/searcher,.py create mode 100644 mmengine/tuner/tunner.py diff --git a/mmengine/tuner/__init__.py b/mmengine/tuner/__init__.py new file mode 100644 index 0000000000..8b6e583e0e --- /dev/null +++ b/mmengine/tuner/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .tunner import Tuner, find_optimial_lr + +__all__ = [ + 'Tuner', 'find_optimial_lr' +] \ No newline at end of file diff --git a/mmengine/tuner/_report_hook.py b/mmengine/tuner/_report_hook.py new file mode 100644 index 0000000000..ea79979d21 --- /dev/null +++ b/mmengine/tuner/_report_hook.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import Hook +import math + +from typing import Dict, Union, Sequence, Optional, List + +DATA_BATCH = Optional[Union[dict, tuple, list]] + +class ReportingHook(Hook): + + _max_history = 1024 + + def __init__(self, + monitor: str, + rule: str, + tuning_iter: int = 0, + tunning_epoch: int = 0, + report_op: str = 'latest' + ): + assert rule in ['greater', 'less'], f'rule {rule} is not supported' + self.rule = rule + assert (tuning_iter == 0 and tunning_epoch > 0) or (tunning_epoch == 0 and tuning_iter > 0), 'tuning_iter and tuning_epoch should be set only one' + assert report_op in ['latest', 'mean'], f'report_op {report_op} is not supported' + self.report_op = report_op + self.tuning_iter = tuning_iter + self.tuning_epoch = tunning_epoch + self.enabled_by_epoch = self._tuning_epoch != 0 + + self.monitor = monitor + self.history = [] + + def _append_score(self, score): + self.history.append(score) + if len(self.history) > self._max_history: + self.history.pop(0) + + def after_train_iter( + self, + runner, + batch_idx: int, + data_batch: DATA_BATCH = None, + outputs: Optional[Union[dict, Sequence]] = None, + mode: str = 'train') -> None: + + tag, _ = runner.log_processor.get_log_after_iter( + runner, batch_idx, 'train') + score = tag.get(self.monitor, None) + if score is not None: + self._append_score(score) + if self.enabled_by_epoch: + return + if runner.iter + 1 == self.tuning_iter: + runner.train_loop.stop_training = True + + def after_train_epoch(self, runner) -> None: + if not self.enabled_by_epoch: + return + if runner.epoch + 1 == self.tuning_epoch: + runner.train_loop.stop_training = True + + def after_val_epoch(self, + runner, + metrics: Optional[Dict[str, float]] = None) -> None: + if metrics is None: + return + score = metrics.get(self.monitor, None) + if score is not None: + self._append_score(score) + + def report_score(self): + + if self.report_op == 'latest': + score = self.history[-1] + if math.isnan(score) or math.isinf(score): + if self.rule == 'greater': + score = float('-inf') + else: + score = float('inf') + + elif self.report_op == 'mean': + if any(math.isnan(s) or math.isinf(s) for s in self.history): + if self.rule == 'greater': + score = float('-inf') + else: + score = float('inf') + else: + score = sum(self.history) / len(self.history) + return score + + def clear_history(self): + self.history = [] \ No newline at end of file diff --git a/mmengine/tuner/searcher,.py b/mmengine/tuner/searcher,.py new file mode 100644 index 0000000000..5129358d98 --- /dev/null +++ b/mmengine/tuner/searcher,.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from tying import Dict, List, Union, Optional + +try: + import nevergard as ng +except ImportError: + ng = None + +try: + import skopt +except ImportError: + skopt = None + +try: + import hyperopt as hp +except ImportError: + hp = None + +class Searcher: + def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): + assert rule in ['less', 'greater'], f"rule must be 'less' or 'greater', but got {rule}" + self._rule = rule + for _, v in hparam_spec.items(): + assert v.get('type', None) in ['discrete', 'continuous'], f'hparam_spec must have a key "type" and its value must be "discrete" or "continuous", but got {v}' + if v['type'] == 'discrete': + assert v.get('values', None) is not None, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' + else: + assert v.get('lower', None) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "lower", but got {v}' + assert v.get('upper', None) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "upper", but got {v}' + self._hparam_spec = hparam_spec + + @property + def hparam_spec(self) -> Dict[str, Dict]: + return self._hparam_spec + + @property + def rule(self) -> str: + return self._rule + + def record(hparam: Dict, score: float): + """Record hparam and score to solver + + Args: + hparam (Dict): The hparam to be updated + score (float): The score to be updated + """ + + def suggest(self) -> Dict: + """Suggest a new hparam based on solver's strategy + + Returns: + Dict: suggested hparam + """ + +class NevergradSearcher(Searcher): + def __init__(self, rule: str, hparam_spec: Dict[str, Dict], num_trials: int, solver_type: str = 'NGOpt', *args, **kwargs): + super().__init__(rule, hparam_spec) + assert ng is not None, 'nevergrad is not installed' + self._optimizer = self._build_optimizer(solver_type, num_trials) + + if self.rule == 'less': + self._rule_op = 1.0 + else: + self._rule_op = -1.0 + + def _build_optimizer(self, solver_type: str, num_trials: int): + converted_hp_spec = ng.p.Dict(**{ + k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) if v['type'] == 'continuous' else ng.p.Choice(v['values']) + for k, v in self.hp_spec.items() + }) + solver = ng.optimization.optimizerlib.registry[solver_type](parametrization=converted_hp_spec, budget=num_trials) + return solver + + def sugget(self) -> Dict: + return self._optimizer.ask() + + def record(self, hparam: Dict, score: float): + self._optimizer.tell(hparam, score*self._rule_op) + + +class SkoptSearcher(Searcher): + def __init__(self, rule: str, hparam_spec: Dict[str, Dict], base_estimator:str = 'gp', n_initial_points: int = 10, initial_point_generator:str='random', acq_func: str='gp_hedge', acq_optimizer:str = 'auto', *args, **kwargs): + super().__init__(rule, hparam_spec) + + # Ensure that skopt is installed + assert skopt is not None, 'Scikit-Optimize (skopt) is not installed' + + self._optimizer = self._build_optimizer(base_estimator, n_initial_points, initial_point_generator, acq_func, acq_optimizer) + if self.rule == 'less': + self._rule_op = 1.0 + else: + self._rule_op = -1.0 + + def _build_optimizer(self, base_estimator: str, n_initial_points: int, initial_point_generator: str, acq_func: str, acq_optimizer: str): + space = [] + for k, v in self.hparam_spec.items(): + if v['type'] == 'continuous': + space.append(skopt.space.Real(v['lower'], v['upper'], name=k)) + elif v['type'] == 'discrete': + space.append(skopt.space.Categorical(v['values'], name=k)) + + return skopt.Optimizer(dimensions=space, base_estimator=base_estimator, n_initial_points=n_initial_points, initial_point_generator=initial_point_generator, acq_func=acq_func, acq_optimizer=acq_optimizer) + + def suggest(self) -> Dict: + x = self._optimizer.ask() + return {dim.name: val for dim, val in zip(self._optimizer.space.dimensions, x)} + + def record(self, hparam: Dict, score: float): + ordered_values = [hparam[dim.name] for dim in self._optimizer.space.dimensions] + self._optimizer.tell(ordered_values, score*self._rule_op) + +class HyperoptSearcher(Searcher): + def __init__(self, + rule: str, + hparam_spec: Dict[str, Dict], + num_trials: int, + n_initial_points: int = 20, + random_state_seed: Optional[int] = None, + gamma: float = 0.25, + *args, **kwargs): + super().__init__(rule, hparam_spec) + + # Ensure that hyperopt is installed + assert hp is not None, 'hyperopt is not installed' + + self._space = self._build_space() + self._trials = hp.Trials() + self._num_trials = num_trials + self._n_initial_points = n_initial_points + self._random_state_seed = random_state_seed + self._gamma = gamma + + if self.rule == 'less': + self._rule_op = 1.0 + else: + self._rule_op = -1.0 + + def _build_space(self): + space = {} + for k, v in self.hparam_spec.items(): + if v['type'] == 'continuous': + space[k] = hp.hp.uniform(k, v['lower'], v['upper']) + elif v['type'] == 'discrete': + space[k] = hp.hp.choice(k, v['values']) + return space + + def suggest(self) -> Dict: + suggested_params = hp.fless(fn=lambda x: 0, # Dummy objective, we'll replace it with `record` later + space=self._space, + algo=hp.partial(hp.tpe.suggest, gamma=self._gamma), + greater_evals=self._n_initial_points + len(self._trials.trials), + trials=self._trials, + rstate=hp.pyll.stochastic.RandomState(self._random_state_seed), # Seeded random state + return_argless=True, + verbose=0) # Not verbose + return suggested_params + + def record(self, hparam: Dict, score: float): + # Hyperopt requires loss (lower is better), so we should adjust our score if in "greater" rule. + self._trials.insert_trial_docs([{ + 'tid': len(self._trials.trials), + 'book_time': hp.utils.coarse_utcnow(), + 'misc': {'tid': len(self._trials.trials), 'cmd': ('domain_attachment', 'FlessIter_Domain'), 'vals': hparam, 'idxs': {k: [len(self._trials.trials)] for k in hparam}}, + 'state': 2, # 2 is the state for "ok" in hyperopt + 'result': {'loss': score * self._rule_op, 'status': 'ok'} + }]) + self._trials.refresh() diff --git a/mmengine/tuner/tunner.py b/mmengine/tuner/tunner.py new file mode 100644 index 0000000000..a6942a148c --- /dev/null +++ b/mmengine/tuner/tunner.py @@ -0,0 +1,145 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmengine.runner import Runner +from mmengine.config import Config, ConfigDict +from mmengine.dist import init_dist, broadcast_object_list, is_main_process + +from tying import Dict, Union, Sequence, Optional, List + +from mmengine.dist import is_distributed + +from ._report_hook import ReportingHook + +class Tuner: + + def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], hparam_spec: Dict[str, Dict], monitor: str, rule: str, num_trials: int, tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', **searcher_kwargs): + self._runner_cfg = runner_cfg.copy() + self._hparam_spec = hparam_spec + self._monitor = monitor + assert rule in ['greater', 'less'], f'rule {rule} is not supported' + self._rule = rule + self._num_trials = num_trials + self._searcher = self._build_searcher(searcher, **searcher_kwargs) + self._reporting_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, report_op) + self._history = [] + + launcher = self._runner_cfg.get('launcher', 'none') + env_cfg = self._runner_cfg.get('env_cfg', {}) + self._distributed: bool + if launcher == 'none': + self._distributed = False + else: + self._distributed = True + if self.distributed and not is_distributed(): + dist_cfg: dict = env_cfg.get('dist_cfg', {}) + init_dist(self.launcher, **dist_cfg) + + def _build_searcher(self, searcher: str = 'nevergrad', **kwargs): + if searcher == 'nevergrad': + from .searcher import NevergradSearcher + searcher = NevergradSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + elif searcher == 'skopt': + from .searcher import SkoptSearcher + searcher = SkoptSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + elif searcher == 'hyperopt': + from .searcher import HyperoptSearcher + searcher = HyperoptSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + else: + raise NotImplementedError(f'searcher {searcher} is not implemented') + return searcher + + @staticmethod + def inject_config(cfg, key, value): + key = key.split('.') + suffix = '' + for item in key[:-1]: + if isinstance(cfg, Sequence) and not isinstance(cfg, str): + item = cfg[int(item)] + else: + assert item in cfg, f'key {key} is not in cfg' + item = cfg[item] + suffix += f'{item}.' + assert key[-1] in cfg, f'attribute {key[-1]} is not in cfg{suffix}' + cfg[key[-1]] = value + return + + def tune(self): + for _ in range(self._num_trials): + if is_main_process(): + hparam = [self._searcher.suggest()] + else: + hparam = [None] + broadcast_object_list(hparam) + # Sync hparam if distributed + for k, v in hparam[0].items(): + self.inject_config(self._runner_cfg, k, v) + runner = Runner.from_cfg(self._runner_cfg) + runner.register_hook(self._reporting_hook, priority='VERY_LOW') + score: float + try: + runner.train() + score = [self._reporting_hook.get_score()] + except Exception as e: + if self._rule == 'greater': + score = [float('-inf')] + else: + score = [float('inf')] + finally: + broadcast_object_list(score) + self._searcher.record(hparam[0], score[0]) + runner = self.tear_down_trial(runner) + self._history.append((hparam[0], score[0])) + + beset_hparam: dict + if self._rule == 'greater': + beset_hparam = max(self._history, key=lambda x: x[1])[0] + else: + beset_hparam = min(self._history, key=lambda x: x[1])[0] + return beset_hparam + + def tear_down_trial(self, runner): + del runner + torch.cuda.empty_cache() + self._reporting_hook.clear_history() + + +def find_optimial_lr( + runner_cfg: Union[Dict, Config, ConfigDict], + monitor: str = 'loss', + rule: str = 'less', + num_trials: int = 32, + lower_lr: Optional[float] = 1e-6, upper_lr: Optional[float] = 1e-2, lr_choices : Optional[List[float]] = None, tuning_iter: int = 1e4, tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', **searcher_kwargs +): + is_discrete = lr_choices is not None + assert (lower_lr is None and upper_lr is None and lr_choices is not None) or (lower_lr is not None and upper_lr is not None and lr_choices is None), 'lower_lr and upper_lr should be set only one' + hparam_spec: dict + if is_discrete: + hparam_spec = { + 'optimizer.lr': { + 'type': 'discrete', + 'values': lr_choices + } + } + else: + hparam_spec = { + 'optimizer.lr': { + 'type': 'continuous', + 'lower': lower_lr, + 'upper': upper_lr + } + } + + tunner = Tuner( + runner_cfg, + hparam_spec=hparam_spec, + monitor=monitor, + rule=rule, + num_trials=num_trials, + tuning_iter=tuning_iter, + tunning_epoch=tunning_epoch, + report_op=report_op, + searcher=searcher, + **searcher_kwargs + ) + return tunner.tune() \ No newline at end of file From b714913ab22f28819921436a96504d0a7c25c879 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 11 Aug 2023 09:30:43 +0900 Subject: [PATCH 02/41] Apply lint --- mmengine/tuner/__init__.py | 4 +- mmengine/tuner/_report_hook.py | 46 +++++----- mmengine/tuner/searcher,.py | 161 +++++++++++++++++++++++---------- mmengine/tuner/tunner.py | 142 +++++++++++++++++------------ 4 files changed, 219 insertions(+), 134 deletions(-) diff --git a/mmengine/tuner/__init__.py b/mmengine/tuner/__init__.py index 8b6e583e0e..cb944d0dba 100644 --- a/mmengine/tuner/__init__.py +++ b/mmengine/tuner/__init__.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .tunner import Tuner, find_optimial_lr -__all__ = [ - 'Tuner', 'find_optimial_lr' -] \ No newline at end of file +__all__ = ['Tuner', 'find_optimial_lr'] diff --git a/mmengine/tuner/_report_hook.py b/mmengine/tuner/_report_hook.py index ea79979d21..2c96a4ff61 100644 --- a/mmengine/tuner/_report_hook.py +++ b/mmengine/tuner/_report_hook.py @@ -1,31 +1,34 @@ # Copyright (c) OpenMMLab. All rights reserved. -from mmengine.hooks import Hook import math +from typing import Dict, Optional, Sequence, Union -from typing import Dict, Union, Sequence, Optional, List +from mmengine.hooks import Hook DATA_BATCH = Optional[Union[dict, tuple, list]] + class ReportingHook(Hook): _max_history = 1024 - + def __init__(self, - monitor: str, - rule: str, - tuning_iter: int = 0, - tunning_epoch: int = 0, - report_op: str = 'latest' - ): + monitor: str, + rule: str, + tuning_iter: int = 0, + tunning_epoch: int = 0, + report_op: str = 'latest'): assert rule in ['greater', 'less'], f'rule {rule} is not supported' - self.rule = rule - assert (tuning_iter == 0 and tunning_epoch > 0) or (tunning_epoch == 0 and tuning_iter > 0), 'tuning_iter and tuning_epoch should be set only one' - assert report_op in ['latest', 'mean'], f'report_op {report_op} is not supported' + self.rule = rule + assert (tuning_iter == 0 and tunning_epoch > 0) or ( + tunning_epoch == 0 and tuning_iter > 0 + ), 'tuning_iter and tuning_epoch should be set only one' + assert report_op in ['latest', + 'mean'], f'report_op {report_op} is not supported' self.report_op = report_op self.tuning_iter = tuning_iter self.tuning_epoch = tunning_epoch self.enabled_by_epoch = self._tuning_epoch != 0 - + self.monitor = monitor self.history = [] @@ -34,13 +37,12 @@ def _append_score(self, score): if len(self.history) > self._max_history: self.history.pop(0) - def after_train_iter( - self, - runner, - batch_idx: int, - data_batch: DATA_BATCH = None, - outputs: Optional[Union[dict, Sequence]] = None, - mode: str = 'train') -> None: + def after_train_iter(self, + runner, + batch_idx: int, + data_batch: DATA_BATCH = None, + outputs: Optional[Union[dict, Sequence]] = None, + mode: str = 'train') -> None: tag, _ = runner.log_processor.get_log_after_iter( runner, batch_idx, 'train') @@ -58,7 +60,7 @@ def after_train_epoch(self, runner) -> None: if runner.epoch + 1 == self.tuning_epoch: runner.train_loop.stop_training = True - def after_val_epoch(self, + def after_val_epoch(self, runner, metrics: Optional[Dict[str, float]] = None) -> None: if metrics is None: @@ -88,4 +90,4 @@ def report_score(self): return score def clear_history(self): - self.history = [] \ No newline at end of file + self.history = [] diff --git a/mmengine/tuner/searcher,.py b/mmengine/tuner/searcher,.py index 5129358d98..140c5e5153 100644 --- a/mmengine/tuner/searcher,.py +++ b/mmengine/tuner/searcher,.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from tying import Dict, List, Union, Optional +from tying import Dict, Optional try: import nevergard as ng @@ -16,19 +16,30 @@ except ImportError: hp = None + class Searcher: + def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): - assert rule in ['less', 'greater'], f"rule must be 'less' or 'greater', but got {rule}" + assert rule in ['less', 'greater' + ], f"rule must be 'less' or 'greater', but got {rule}" self._rule = rule for _, v in hparam_spec.items(): - assert v.get('type', None) in ['discrete', 'continuous'], f'hparam_spec must have a key "type" and its value must be "discrete" or "continuous", but got {v}' + assert v.get('type', None) in [ + 'discrete', 'continuous' + ], f'hparam_spec must have a key "type" and its value must be "discrete" or "continuous", but got {v}' if v['type'] == 'discrete': - assert v.get('values', None) is not None, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' + assert v.get( + 'values', None + ) is not None, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' else: - assert v.get('lower', None) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "lower", but got {v}' - assert v.get('upper', None) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "upper", but got {v}' + assert v.get( + 'lower', None + ) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "lower", but got {v}' + assert v.get( + 'upper', None + ) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "upper", but got {v}' self._hparam_spec = hparam_spec - + @property def hparam_spec(self) -> Dict[str, Dict]: return self._hparam_spec @@ -36,9 +47,9 @@ def hparam_spec(self) -> Dict[str, Dict]: @property def rule(self) -> str: return self._rule - + def record(hparam: Dict, score: float): - """Record hparam and score to solver + """Record hparam and score to solver. Args: hparam (Dict): The hparam to be updated @@ -46,14 +57,22 @@ def record(hparam: Dict, score: float): """ def suggest(self) -> Dict: - """Suggest a new hparam based on solver's strategy + """Suggest a new hparam based on solver's strategy. Returns: Dict: suggested hparam """ + class NevergradSearcher(Searcher): - def __init__(self, rule: str, hparam_spec: Dict[str, Dict], num_trials: int, solver_type: str = 'NGOpt', *args, **kwargs): + + def __init__(self, + rule: str, + hparam_spec: Dict[str, Dict], + num_trials: int, + solver_type: str = 'NGOpt', + *args, + **kwargs): super().__init__(rule, hparam_spec) assert ng is not None, 'nevergrad is not installed' self._optimizer = self._build_optimizer(solver_type, num_trials) @@ -64,72 +83,104 @@ def __init__(self, rule: str, hparam_spec: Dict[str, Dict], num_trials: int, sol self._rule_op = -1.0 def _build_optimizer(self, solver_type: str, num_trials: int): - converted_hp_spec = ng.p.Dict(**{ - k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) if v['type'] == 'continuous' else ng.p.Choice(v['values']) - for k, v in self.hp_spec.items() - }) - solver = ng.optimization.optimizerlib.registry[solver_type](parametrization=converted_hp_spec, budget=num_trials) + converted_hp_spec = ng.p.Dict( + **{ + k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) + if v['type'] == 'continuous' else ng.p.Choice(v['values']) + for k, v in self.hp_spec.items() + }) + solver = ng.optimization.optimizerlib.registry[solver_type]( + parametrization=converted_hp_spec, budget=num_trials) return solver - def sugget(self) -> Dict: + def suggest(self) -> Dict: return self._optimizer.ask() - + def record(self, hparam: Dict, score: float): - self._optimizer.tell(hparam, score*self._rule_op) + self._optimizer.tell(hparam, score * self._rule_op) + - class SkoptSearcher(Searcher): - def __init__(self, rule: str, hparam_spec: Dict[str, Dict], base_estimator:str = 'gp', n_initial_points: int = 10, initial_point_generator:str='random', acq_func: str='gp_hedge', acq_optimizer:str = 'auto', *args, **kwargs): + + def __init__(self, + rule: str, + hparam_spec: Dict[str, Dict], + base_estimator: str = 'gp', + n_initial_points: int = 10, + initial_point_generator: str = 'random', + acq_func: str = 'gp_hedge', + acq_optimizer: str = 'auto', + *args, + **kwargs): super().__init__(rule, hparam_spec) - + # Ensure that skopt is installed assert skopt is not None, 'Scikit-Optimize (skopt) is not installed' - - self._optimizer = self._build_optimizer(base_estimator, n_initial_points, initial_point_generator, acq_func, acq_optimizer) + + self._optimizer = self._build_optimizer(base_estimator, + n_initial_points, + initial_point_generator, + acq_func, acq_optimizer) if self.rule == 'less': self._rule_op = 1.0 else: self._rule_op = -1.0 - def _build_optimizer(self, base_estimator: str, n_initial_points: int, initial_point_generator: str, acq_func: str, acq_optimizer: str): + def _build_optimizer(self, base_estimator: str, n_initial_points: int, + initial_point_generator: str, acq_func: str, + acq_optimizer: str): space = [] for k, v in self.hparam_spec.items(): if v['type'] == 'continuous': space.append(skopt.space.Real(v['lower'], v['upper'], name=k)) elif v['type'] == 'discrete': space.append(skopt.space.Categorical(v['values'], name=k)) - - return skopt.Optimizer(dimensions=space, base_estimator=base_estimator, n_initial_points=n_initial_points, initial_point_generator=initial_point_generator, acq_func=acq_func, acq_optimizer=acq_optimizer) + + return skopt.Optimizer( + dimensions=space, + base_estimator=base_estimator, + n_initial_points=n_initial_points, + initial_point_generator=initial_point_generator, + acq_func=acq_func, + acq_optimizer=acq_optimizer) def suggest(self) -> Dict: x = self._optimizer.ask() - return {dim.name: val for dim, val in zip(self._optimizer.space.dimensions, x)} + return { + dim.name: val + for dim, val in zip(self._optimizer.space.dimensions, x) + } def record(self, hparam: Dict, score: float): - ordered_values = [hparam[dim.name] for dim in self._optimizer.space.dimensions] - self._optimizer.tell(ordered_values, score*self._rule_op) + ordered_values = [ + hparam[dim.name] for dim in self._optimizer.space.dimensions + ] + self._optimizer.tell(ordered_values, score * self._rule_op) + class HyperoptSearcher(Searcher): - def __init__(self, - rule: str, - hparam_spec: Dict[str, Dict], - num_trials: int, + + def __init__(self, + rule: str, + hparam_spec: Dict[str, Dict], + num_trials: int, n_initial_points: int = 20, random_state_seed: Optional[int] = None, gamma: float = 0.25, - *args, **kwargs): + *args, + **kwargs): super().__init__(rule, hparam_spec) - + # Ensure that hyperopt is installed assert hp is not None, 'hyperopt is not installed' - + self._space = self._build_space() self._trials = hp.Trials() self._num_trials = num_trials self._n_initial_points = n_initial_points self._random_state_seed = random_state_seed self._gamma = gamma - + if self.rule == 'less': self._rule_op = 1.0 else: @@ -145,23 +196,35 @@ def _build_space(self): return space def suggest(self) -> Dict: - suggested_params = hp.fless(fn=lambda x: 0, # Dummy objective, we'll replace it with `record` later - space=self._space, - algo=hp.partial(hp.tpe.suggest, gamma=self._gamma), - greater_evals=self._n_initial_points + len(self._trials.trials), - trials=self._trials, - rstate=hp.pyll.stochastic.RandomState(self._random_state_seed), # Seeded random state - return_argless=True, - verbose=0) # Not verbose + suggested_params = hp.fless( + fn=lambda x: + 0, # Dummy objective, we'll replace it with `record` later + space=self._space, + algo=hp.partial(hp.tpe.suggest, gamma=self._gamma), + greater_evals=self._n_initial_points + len(self._trials.trials), + trials=self._trials, + rstate=hp.pyll.stochastic.RandomState( + self._random_state_seed), # Seeded random state + return_argless=True, + verbose=0) # Not verbose return suggested_params - + def record(self, hparam: Dict, score: float): # Hyperopt requires loss (lower is better), so we should adjust our score if in "greater" rule. self._trials.insert_trial_docs([{ 'tid': len(self._trials.trials), 'book_time': hp.utils.coarse_utcnow(), - 'misc': {'tid': len(self._trials.trials), 'cmd': ('domain_attachment', 'FlessIter_Domain'), 'vals': hparam, 'idxs': {k: [len(self._trials.trials)] for k in hparam}}, + 'misc': { + 'tid': len(self._trials.trials), + 'cmd': ('domain_attachment', 'FlessIter_Domain'), + 'vals': hparam, + 'idxs': {k: [len(self._trials.trials)] + for k in hparam} + }, 'state': 2, # 2 is the state for "ok" in hyperopt - 'result': {'loss': score * self._rule_op, 'status': 'ok'} + 'result': { + 'loss': score * self._rule_op, + 'status': 'ok' + } }]) self._trials.refresh() diff --git a/mmengine/tuner/tunner.py b/mmengine/tuner/tunner.py index a6942a148c..a63a9d3150 100644 --- a/mmengine/tuner/tunner.py +++ b/mmengine/tuner/tunner.py @@ -1,27 +1,38 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch +from tying import Dict, List, Optional, Sequence, Union -from mmengine.runner import Runner from mmengine.config import Config, ConfigDict -from mmengine.dist import init_dist, broadcast_object_list, is_main_process - -from tying import Dict, Union, Sequence, Optional, List - -from mmengine.dist import is_distributed - +from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, + is_main_process) +from mmengine.runner import Runner from ._report_hook import ReportingHook + class Tuner: - def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], hparam_spec: Dict[str, Dict], monitor: str, rule: str, num_trials: int, tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', **searcher_kwargs): + dist_sanity_check_interval = 10 + + def __init__(self, + runner_cfg: Union[Dict, Config, ConfigDict], + hparam_spec: Dict[str, Dict], + monitor: str, + rule: str, + num_trials: int, + tuning_iter: int = 0, + tunning_epoch: int = 0, + report_op: str = 'latest', + searcher: str = 'nevergrad', + **searcher_kwargs): self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec self._monitor = monitor - assert rule in ['greater', 'less'], f'rule {rule} is not supported' + assert rule in ['greater', 'less'], f'rule {rule} is not supported' self._rule = rule self._num_trials = num_trials self._searcher = self._build_searcher(searcher, **searcher_kwargs) - self._reporting_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, report_op) + self._reporting_hook = ReportingHook(monitor, rule, tuning_iter, + tunning_epoch, report_op) self._history = [] launcher = self._runner_cfg.get('launcher', 'none') @@ -38,17 +49,21 @@ def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], hparam_spec: Dic def _build_searcher(self, searcher: str = 'nevergrad', **kwargs): if searcher == 'nevergrad': from .searcher import NevergradSearcher - searcher = NevergradSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + searcher = NevergradSearcher(self._mode, self._hparam_spec, + self._num_trials, **kwargs) elif searcher == 'skopt': from .searcher import SkoptSearcher - searcher = SkoptSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + searcher = SkoptSearcher(self._mode, self._hparam_spec, + self._num_trials, **kwargs) elif searcher == 'hyperopt': from .searcher import HyperoptSearcher - searcher = HyperoptSearcher(self._mode, self._hparam_spec, self._num_trials, **kwargs) + searcher = HyperoptSearcher(self._mode, self._hparam_spec, + self._num_trials, **kwargs) else: - raise NotImplementedError(f'searcher {searcher} is not implemented') + raise NotImplementedError( + f'searcher {searcher} is not implemented') return searcher - + @staticmethod def inject_config(cfg, key, value): key = key.split('.') @@ -60,59 +75,67 @@ def inject_config(cfg, key, value): assert item in cfg, f'key {key} is not in cfg' item = cfg[item] suffix += f'{item}.' - assert key[-1] in cfg, f'attribute {key[-1]} is not in cfg{suffix}' + assert key[-1] in cfg, f'attribute {key[-1]} is not in cfg{suffix}' cfg[key[-1]] = value return + def _run_trial(self): + if is_main_process(): + hparam = [self._searcher.suggest()] + else: + hparam = [None] + broadcast_object_list(hparam) + for k, v in hparam[0].items(): + self.inject_config(self._runner_cfg, k, v) + runner = Runner.from_cfg(self._runner_cfg) + runner.register_hook(self._reporting_hook, priority='VERY_LOW') + score: float + try: + runner.train() + score = [self._reporting_hook.get_score()] + except Exception: + if self._rule == 'greater': + score = [float('-inf')] + else: + score = [float('inf')] + finally: + broadcast_object_list(score) + self._searcher.record(hparam[0], score[0]) + self._history.append((hparam[0], score[0])) + del runner + torch.cuda.empty_cache() + self._reporting_hook.clear_history() + def tune(self): for _ in range(self._num_trials): - if is_main_process(): - hparam = [self._searcher.suggest()] - else: - hparam = [None] - broadcast_object_list(hparam) - # Sync hparam if distributed - for k, v in hparam[0].items(): - self.inject_config(self._runner_cfg, k, v) - runner = Runner.from_cfg(self._runner_cfg) - runner.register_hook(self._reporting_hook, priority='VERY_LOW') - score: float - try: - runner.train() - score = [self._reporting_hook.get_score()] - except Exception as e: - if self._rule == 'greater': - score = [float('-inf')] - else: - score = [float('inf')] - finally: - broadcast_object_list(score) - self._searcher.record(hparam[0], score[0]) - runner = self.tear_down_trial(runner) - self._history.append((hparam[0], score[0])) + self._run_trial() - beset_hparam: dict + best_hparam: dict + best_score: float if self._rule == 'greater': - beset_hparam = max(self._history, key=lambda x: x[1])[0] + best_hparam, best_score = max(self._history, key=lambda x: x[1])[0] else: - beset_hparam = min(self._history, key=lambda x: x[1])[0] - return beset_hparam - - def tear_down_trial(self, runner): - del runner - torch.cuda.empty_cache() - self._reporting_hook.clear_history() + best_hparam, best_score = min(self._history, key=lambda x: x[1])[0] + return best_hparam, best_score -def find_optimial_lr( - runner_cfg: Union[Dict, Config, ConfigDict], - monitor: str = 'loss', - rule: str = 'less', - num_trials: int = 32, - lower_lr: Optional[float] = 1e-6, upper_lr: Optional[float] = 1e-2, lr_choices : Optional[List[float]] = None, tuning_iter: int = 1e4, tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', **searcher_kwargs -): +def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], + monitor: str = 'loss', + rule: str = 'less', + num_trials: int = 32, + lower_lr: Optional[float] = 1e-6, + upper_lr: Optional[float] = 1e-2, + lr_choices: Optional[List[float]] = None, + tuning_iter: int = 1e4, + tunning_epoch: int = 0, + report_op: str = 'latest', + searcher: str = 'nevergrad', + **searcher_kwargs): is_discrete = lr_choices is not None - assert (lower_lr is None and upper_lr is None and lr_choices is not None) or (lower_lr is not None and upper_lr is not None and lr_choices is None), 'lower_lr and upper_lr should be set only one' + assert (lower_lr is None and upper_lr is None and lr_choices + is not None) or (lower_lr is not None and upper_lr is not None + and lr_choices is None + ), 'lower_lr and upper_lr should be set only one' hparam_spec: dict if is_discrete: hparam_spec = { @@ -140,6 +163,5 @@ def find_optimial_lr( tunning_epoch=tunning_epoch, report_op=report_op, searcher=searcher, - **searcher_kwargs - ) - return tunner.tune() \ No newline at end of file + **searcher_kwargs) + return tunner.tune() From a923847a423470f64a64bc54a55f95b3536b4e7e Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:27:09 +0900 Subject: [PATCH 03/41] Add ex for tuning --- examples/tune/find_lr.py | 131 +++++++++++++++++++++++++++++++++++++++ mmengine/tuner/tunner.py | 2 +- 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 examples/tune/find_lr.py diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py new file mode 100644 index 0000000000..f4e8996b5f --- /dev/null +++ b/examples/tune/find_lr.py @@ -0,0 +1,131 @@ +import torch +import torch.nn as nn + +from mmengine.evaluator import BaseMetric +from mmengine.model import BaseModel + +from mmengine.tuner import find_optimial_lr + +from mmengine.registry import DATASETS, METRICS, MODELS + +import tempfile + +import argparse + +class ToyModel(BaseModel): + + def __init__(self, data_preprocessor=None): + super().__init__(data_preprocessor=data_preprocessor) + self.linear1 = nn.Linear(2, 2) + self.linear2 = nn.Linear(2, 1) + + def forward(self, inputs, data_samples=None, mode='tensor'): + if isinstance(inputs, list): + inputs = torch.stack(inputs) + if isinstance(data_samples, list): + data_samples = torch.stack(data_samples) + outputs = self.linear1(inputs) + outputs = self.linear2(outputs) + + if mode == 'tensor': + return outputs + elif mode == 'loss': + loss = (data_samples - outputs).sum() + outputs = dict(loss=loss) + return outputs + elif mode == 'predict': + return outputs + + +class ToyDataset(Dataset): + METAINFO = dict() # type: ignore + data = torch.randn(12, 2) + label = torch.ones(12) + + @property + def metainfo(self): + return self.METAINFO + + def __len__(self): + return self.data.size(0) + + def __getitem__(self, index): + return dict(inputs=self.data[index], data_samples=self.label[index]) + + +class ToyMetric(BaseMetric): + + def __init__(self, collect_device='cpu', dummy_metrics=None): + super().__init__(collect_device=collect_device) + self.dummy_metrics = dummy_metrics + + def process(self, data_batch, predictions): + result = {'acc': 1} + self.results.append(result) + + def compute_metrics(self, results): + return dict(acc=1) + +def parse_args(): + parser = argparse.ArgumentParser(description='Distributed Training') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + MODELS.register_module(module=ToyModel, force=True) + METRICS.register_module(module=ToyMetric, force=True) + DATASETS.register_module(module=ToyDataset, force=True) + + temp_dir = tempfile.TemporaryDirectory() + + runner_cfg = dict( + work_dir=temp_dir.name, + model=dict(type='ToyModel'), + train_dataloader=dict( + dataset=dict(type='ToyDataset'), + sampler=dict(type='DefaultSampler', shuffle=True), + batch_size=3, + num_workers=0), + val_dataloader=dict( + dataset=dict(type='ToyDataset'), + sampler=dict(type='DefaultSampler', shuffle=False), + batch_size=3, + num_workers=0), + val_evaluator=[dict(type='ToyMetric')], + test_dataloader=dict( + dataset=dict(type='ToyDataset'), + sampler=dict(type='DefaultSampler', shuffle=False), + batch_size=3, + num_workers=0), + test_evaluator=[dict(type='ToyMetric')], + optim_wrapper=dict(optimizer=dict(type='SGD', lr=0.1)), + train_cfg=dict(by_epoch=True, max_epochs=2, val_interval=1), + val_cfg=dict(), + test_cfg=dict(), + launcher=args.launcher, + default_hooks=dict(logger=dict(type='LoggerHook', interval=1)), + custom_hooks=[], + env_cfg=dict(dist_cfg=dict(backend='nccl')), + experiment_name='test1') + + temp_dir.cleanup() + + best_lr, lowest_loss = find_optimial_lr( + runner_cfg=runner_cfg, + num_trials = 32, + tunning_epoch = 1, + ) + print("best_lr: ", best_lr) + print("lowest_loss: ", lowest_loss) + +if __name__ == '_main__': + main() \ No newline at end of file diff --git a/mmengine/tuner/tunner.py b/mmengine/tuner/tunner.py index a63a9d3150..07509bb569 100644 --- a/mmengine/tuner/tunner.py +++ b/mmengine/tuner/tunner.py @@ -126,7 +126,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], lower_lr: Optional[float] = 1e-6, upper_lr: Optional[float] = 1e-2, lr_choices: Optional[List[float]] = None, - tuning_iter: int = 1e4, + tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', From 4c9ef09c67fa0fca351d35bcd3017f55db63ce55 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Thu, 17 Aug 2023 15:39:17 +0900 Subject: [PATCH 04/41] Refactor to rpc --- examples/tune/find_lr.py | 28 ++-- mmengine/tuner/_report_hook.py | 24 ++-- mmengine/tuner/{searcher,.py => searcher.py} | 8 +- mmengine/tuner/tunner.py | 131 ++++++++++++------- 4 files changed, 114 insertions(+), 77 deletions(-) rename mmengine/tuner/{searcher,.py => searcher.py} (97%) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index f4e8996b5f..88f2646a39 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -1,16 +1,15 @@ +import argparse +import tempfile + import torch import torch.nn as nn +from torch.utils.data import Dataset from mmengine.evaluator import BaseMetric from mmengine.model import BaseModel - -from mmengine.tuner import find_optimial_lr - from mmengine.registry import DATASETS, METRICS, MODELS +from mmengine.tuner import find_optimial_lr -import tempfile - -import argparse class ToyModel(BaseModel): @@ -66,6 +65,7 @@ def process(self, data_batch, predictions): def compute_metrics(self, results): return dict(acc=1) + def parse_args(): parser = argparse.ArgumentParser(description='Distributed Training') parser.add_argument( @@ -78,6 +78,7 @@ def parse_args(): args = parser.parse_args() return args + def main(): args = parse_args() @@ -116,16 +117,17 @@ def main(): custom_hooks=[], env_cfg=dict(dist_cfg=dict(backend='nccl')), experiment_name='test1') - + temp_dir.cleanup() best_lr, lowest_loss = find_optimial_lr( runner_cfg=runner_cfg, - num_trials = 32, - tunning_epoch = 1, + num_trials=32, + tunning_epoch=1, ) - print("best_lr: ", best_lr) - print("lowest_loss: ", lowest_loss) + print('best_lr: ', best_lr) + print('lowest_loss: ', lowest_loss) + -if __name__ == '_main__': - main() \ No newline at end of file +if __name__ == '__main__': + main() diff --git a/mmengine/tuner/_report_hook.py b/mmengine/tuner/_report_hook.py index 2c96a4ff61..7a2ce37630 100644 --- a/mmengine/tuner/_report_hook.py +++ b/mmengine/tuner/_report_hook.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import math -from typing import Dict, Optional, Sequence, Union +from typing import Dict, List, Optional, Sequence, Union from mmengine.hooks import Hook @@ -9,7 +9,7 @@ class ReportingHook(Hook): - _max_history = 1024 + _max_scoreboard_len = 1024 def __init__(self, monitor: str, @@ -27,15 +27,15 @@ def __init__(self, self.report_op = report_op self.tuning_iter = tuning_iter self.tuning_epoch = tunning_epoch - self.enabled_by_epoch = self._tuning_epoch != 0 + self.enabled_by_epoch = self.tuning_epoch != 0 self.monitor = monitor - self.history = [] + self.scoreboard: List[float] = [] def _append_score(self, score): - self.history.append(score) - if len(self.history) > self._max_history: - self.history.pop(0) + self.scoreboard.append(score) + if len(self.scoreboard) > self._max_scoreboard_len: + self.scoreboard.pop(0) def after_train_iter(self, runner, @@ -72,7 +72,7 @@ def after_val_epoch(self, def report_score(self): if self.report_op == 'latest': - score = self.history[-1] + score = self.scoreboard[-1] if math.isnan(score) or math.isinf(score): if self.rule == 'greater': score = float('-inf') @@ -80,14 +80,14 @@ def report_score(self): score = float('inf') elif self.report_op == 'mean': - if any(math.isnan(s) or math.isinf(s) for s in self.history): + if any(math.isnan(s) or math.isinf(s) for s in self.scoreboard): if self.rule == 'greater': score = float('-inf') else: score = float('inf') else: - score = sum(self.history) / len(self.history) + score = sum(self.scoreboard) / len(self.scoreboard) return score - def clear_history(self): - self.history = [] + def clear_scoreboard(self): + self.scoreboard = [] diff --git a/mmengine/tuner/searcher,.py b/mmengine/tuner/searcher.py similarity index 97% rename from mmengine/tuner/searcher,.py rename to mmengine/tuner/searcher.py index 140c5e5153..7f96ca587d 100644 --- a/mmengine/tuner/searcher,.py +++ b/mmengine/tuner/searcher.py @@ -48,7 +48,7 @@ def hparam_spec(self) -> Dict[str, Dict]: def rule(self) -> str: return self._rule - def record(hparam: Dict, score: float): + def record(self, hparam: Dict, score: float): """Record hparam and score to solver. Args: @@ -83,14 +83,14 @@ def __init__(self, self._rule_op = -1.0 def _build_optimizer(self, solver_type: str, num_trials: int): - converted_hp_spec = ng.p.Dict( + converted_hparam_spec = ng.p.Dict( **{ k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) if v['type'] == 'continuous' else ng.p.Choice(v['values']) - for k, v in self.hp_spec.items() + for k, v in self.hparam_spec.items() }) solver = ng.optimization.optimizerlib.registry[solver_type]( - parametrization=converted_hp_spec, budget=num_trials) + parametrization=converted_hparam_spec, budget=num_trials) return solver def suggest(self) -> Dict: diff --git a/mmengine/tuner/tunner.py b/mmengine/tuner/tunner.py index 07509bb569..88b10c663b 100644 --- a/mmengine/tuner/tunner.py +++ b/mmengine/tuner/tunner.py @@ -1,18 +1,22 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os +import tempfile +from typing import Dict, List, Optional, Sequence, Tuple, Union + import torch -from tying import Dict, List, Optional, Sequence, Union +import torch.distributed.rpc as rpc +from torch.distributed.rpc import TensorPipeRpcBackendOptions from mmengine.config import Config, ConfigDict -from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, - is_main_process) +from mmengine.dist import (broadcast_object_list, get_rank, get_world_size, + init_dist, is_distributed, is_main_process) from mmengine.runner import Runner from ._report_hook import ReportingHook +from .searcher import Searcher class Tuner: - dist_sanity_check_interval = 10 - def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], hparam_spec: Dict[str, Dict], @@ -22,7 +26,8 @@ def __init__(self, tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', - searcher: str = 'nevergrad', + searcher_type: str = 'nevergrad', + rpc_port: int = 29501, **searcher_kwargs): self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec @@ -30,10 +35,11 @@ def __init__(self, assert rule in ['greater', 'less'], f'rule {rule} is not supported' self._rule = rule self._num_trials = num_trials - self._searcher = self._build_searcher(searcher, **searcher_kwargs) - self._reporting_hook = ReportingHook(monitor, rule, tuning_iter, - tunning_epoch, report_op) - self._history = [] + self._tuning_iter = tuning_iter + self._tuning_epoch = tunning_epoch + self._reporting_op = report_op + self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) + self._history: List[Tuple[Dict, float]] = [] launcher = self._runner_cfg.get('launcher', 'none') env_cfg = self._runner_cfg.get('env_cfg', {}) @@ -42,23 +48,35 @@ def __init__(self, self._distributed = False else: self._distributed = True - if self.distributed and not is_distributed(): + if self._distributed and not is_distributed(): dist_cfg: dict = env_cfg.get('dist_cfg', {}) - init_dist(self.launcher, **dist_cfg) + init_dist(launcher, **dist_cfg) + self._rpc_port = rpc_port - def _build_searcher(self, searcher: str = 'nevergrad', **kwargs): - if searcher == 'nevergrad': + def _init_rpc(self, rpc_port: int): + rpc_backend_options = TensorPipeRpcBackendOptions() + master_addr = os.environ.get('MASTER_ADDR' + 'localhost') + rpc_backend_options.init_method = f'tcp://{master_addr}:{rpc_port}' + rank = get_rank() + world_size = get_world_size() + rpc.init_rpc(f'worker{rank}', rank=rank, world_size=world_size) + + def _build_searcher(self, + searcher_type: str = 'nevergrad', + **kwargs) -> Searcher: + searcher: Searcher + if searcher_type == 'nevergrad': from .searcher import NevergradSearcher - searcher = NevergradSearcher(self._mode, self._hparam_spec, + searcher = NevergradSearcher(self._rule, self._hparam_spec, self._num_trials, **kwargs) - elif searcher == 'skopt': + elif searcher_type == 'skopt': from .searcher import SkoptSearcher - searcher = SkoptSearcher(self._mode, self._hparam_spec, - self._num_trials, **kwargs) - elif searcher == 'hyperopt': + searcher = SkoptSearcher(self._rule, self._hparam_spec, **kwargs) + elif searcher_type == 'hyperopt': from .searcher import HyperoptSearcher - searcher = HyperoptSearcher(self._mode, self._hparam_spec, - self._num_trials, **kwargs) + searcher = HyperoptSearcher(self._rule, self._hparam_spec, + **kwargs) else: raise NotImplementedError( f'searcher {searcher} is not implemented') @@ -79,43 +97,60 @@ def inject_config(cfg, key, value): cfg[key[-1]] = value return - def _run_trial(self): + def _run_trial(self, runner_cfg, monitor, rule, tuning_iter, tunning_epoch, + report_op): + runner = Runner.from_cfg(runner_cfg) + report_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, + report_op) + runner.register_hook(report_hook, priority='VERY_LOW') + runner.train() + return report_hook.get_score() + + def _submit(self): + self._init_rpc(self._rpc_port) + if is_main_process(): - hparam = [self._searcher.suggest()] + hparam = self._searcher.suggest() + for k, v in hparam.items(): + self.inject_config(self._runner_cfg, k, v) + temp_dir = tempfile.TemporaryDirectory() + self._runner_cfg['work_dir'] = temp_dir.name + + futs = [] + for rank in range(get_world_size()): + fut = rpc.rpc_async( + f'worker{rank}', + self._run_trial, + args=(self._runner_cfg, self._monitor, self._rule, + self._tuning_iter, self._tuning_epoch, + self._reporting_op)) + futs.append(fut) + score: float + try: + score = [torch.futures.wait_all(futs)[0]] + except Exception: + if self._rule == 'greater': + score = [float('-inf')] + else: + score = [float('inf')] + self._searcher.record(hparam, score[0]) + temp_dir.cleanup() else: - hparam = [None] - broadcast_object_list(hparam) - for k, v in hparam[0].items(): - self.inject_config(self._runner_cfg, k, v) - runner = Runner.from_cfg(self._runner_cfg) - runner.register_hook(self._reporting_hook, priority='VERY_LOW') - score: float - try: - runner.train() - score = [self._reporting_hook.get_score()] - except Exception: - if self._rule == 'greater': - score = [float('-inf')] - else: - score = [float('inf')] - finally: - broadcast_object_list(score) - self._searcher.record(hparam[0], score[0]) - self._history.append((hparam[0], score[0])) - del runner - torch.cuda.empty_cache() - self._reporting_hook.clear_history() + score = [None] + broadcast_object_list(score, src=0) + self._history.append((hparam, score[0])) + rpc.shutdown() def tune(self): for _ in range(self._num_trials): - self._run_trial() + self._submit() best_hparam: dict best_score: float if self._rule == 'greater': - best_hparam, best_score = max(self._history, key=lambda x: x[1])[0] + best_hparam, best_score = max(self._history, key=lambda x: x[1]) else: - best_hparam, best_score = min(self._history, key=lambda x: x[1])[0] + best_hparam, best_score = min(self._history, key=lambda x: x[1]) return best_hparam, best_score From 3580dd80d1d5ef63915a77c83821cc5dc28be0d3 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 18 Aug 2023 08:38:27 +0900 Subject: [PATCH 05/41] Apply lint --- examples/tune/find_lr.py | 2 +- mmengine/registry/__init__.py | 17 ++-- mmengine/registry/root.py | 3 + mmengine/tune/__init__.py | 6 ++ mmengine/{tuner => tune}/_report_hook.py | 7 +- mmengine/tune/api.py | 54 ++++++++++ mmengine/{tuner => tune}/searcher.py | 34 ++++--- mmengine/{tuner => tune}/tunner.py | 119 ++++++----------------- mmengine/tuner/__init__.py | 4 - 9 files changed, 125 insertions(+), 121 deletions(-) create mode 100644 mmengine/tune/__init__.py rename mmengine/{tuner => tune}/_report_hook.py (93%) create mode 100644 mmengine/tune/api.py rename mmengine/{tuner => tune}/searcher.py (89%) rename mmengine/{tuner => tune}/tunner.py (54%) delete mode 100644 mmengine/tuner/__init__.py diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 88f2646a39..080286a591 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -8,7 +8,7 @@ from mmengine.evaluator import BaseMetric from mmengine.model import BaseModel from mmengine.registry import DATASETS, METRICS, MODELS -from mmengine.tuner import find_optimial_lr +from mmengine.tune import find_optimial_lr class ToyModel(BaseModel): diff --git a/mmengine/registry/__init__.py b/mmengine/registry/__init__.py index cce2737043..ff0ced91fa 100644 --- a/mmengine/registry/__init__.py +++ b/mmengine/registry/__init__.py @@ -4,11 +4,11 @@ from .default_scope import DefaultScope from .registry import Registry from .root import (DATA_SAMPLERS, DATASETS, EVALUATOR, FUNCTIONS, HOOKS, - INFERENCERS, LOG_PROCESSORS, LOOPS, METRICS, MODEL_WRAPPERS, - MODELS, OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, - OPTIMIZERS, PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, RUNNERS, - STRATEGIES, TASK_UTILS, TRANSFORMS, VISBACKENDS, - VISUALIZERS, WEIGHT_INITIALIZERS) + HYPER_SEARCHERS, INFERENCERS, LOG_PROCESSORS, LOOPS, + METRICS, MODEL_WRAPPERS, MODELS, OPTIM_WRAPPER_CONSTRUCTORS, + OPTIM_WRAPPERS, OPTIMIZERS, PARAM_SCHEDULERS, + RUNNER_CONSTRUCTORS, RUNNERS, STRATEGIES, TASK_UTILS, + TRANSFORMS, VISBACKENDS, VISUALIZERS, WEIGHT_INITIALIZERS) from .utils import (count_registered_modules, init_default_scope, traverse_registry_tree) @@ -18,7 +18,8 @@ 'OPTIMIZERS', 'OPTIM_WRAPPER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', 'MODEL_WRAPPERS', 'OPTIM_WRAPPERS', 'LOOPS', 'VISBACKENDS', 'VISUALIZERS', 'LOG_PROCESSORS', 'EVALUATOR', 'INFERENCERS', - 'DefaultScope', 'traverse_registry_tree', 'count_registered_modules', - 'build_model_from_cfg', 'build_runner_from_cfg', 'build_from_cfg', - 'build_scheduler_from_cfg', 'init_default_scope', 'FUNCTIONS', 'STRATEGIES' + 'HYPER_SEARCHERS', 'DefaultScope', 'traverse_registry_tree', + 'count_registered_modules', 'build_model_from_cfg', + 'build_runner_from_cfg', 'build_from_cfg', 'build_scheduler_from_cfg', + 'init_default_scope', 'FUNCTIONS', 'STRATEGIES' ] diff --git a/mmengine/registry/root.py b/mmengine/registry/root.py index 2663dffcd9..4d507801e3 100644 --- a/mmengine/registry/root.py +++ b/mmengine/registry/root.py @@ -65,3 +65,6 @@ # manage function FUNCTIONS = Registry('function') + +# hyper parameter searcher +HYPER_SEARCHERS = Registry('hyper parameter searcher') diff --git a/mmengine/tune/__init__.py b/mmengine/tune/__init__.py new file mode 100644 index 0000000000..97f2e3f797 --- /dev/null +++ b/mmengine/tune/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .api import find_optimial_lr +from .searcher import * # noqa F403 +from .tunner import Tuner + +__all__ = ['Tuner', 'find_optimial_lr'] diff --git a/mmengine/tuner/_report_hook.py b/mmengine/tune/_report_hook.py similarity index 93% rename from mmengine/tuner/_report_hook.py rename to mmengine/tune/_report_hook.py index 7a2ce37630..60a9a0b02f 100644 --- a/mmengine/tuner/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -9,7 +9,8 @@ class ReportingHook(Hook): - _max_scoreboard_len = 1024 + max_scoreboard_len = 1024 + rules_supported = ['greater', 'less'] def __init__(self, monitor: str, @@ -17,7 +18,7 @@ def __init__(self, tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest'): - assert rule in ['greater', 'less'], f'rule {rule} is not supported' + assert rule in self.rules_supported, f'rule {rule} is not supported' self.rule = rule assert (tuning_iter == 0 and tunning_epoch > 0) or ( tunning_epoch == 0 and tuning_iter > 0 @@ -34,7 +35,7 @@ def __init__(self, def _append_score(self, score): self.scoreboard.append(score) - if len(self.scoreboard) > self._max_scoreboard_len: + if len(self.scoreboard) > self.max_scoreboard_len: self.scoreboard.pop(0) def after_train_iter(self, diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py new file mode 100644 index 0000000000..e7d21dfce4 --- /dev/null +++ b/mmengine/tune/api.py @@ -0,0 +1,54 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .tunner import Tuner + +from typing import Dict, List, Optional, Union, Tuple + +from mmengine.config import Config, ConfigDict + + +def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], + monitor: str = 'loss', + rule: str = 'less', + num_trials: int = 32, + lower_lr: Optional[float] = 1e-6, + upper_lr: Optional[float] = 1e-2, + lr_choices: Optional[List[float]] = None, + tuning_iter: int = 0, + tunning_epoch: int = 0, + report_op: str = 'latest', + searcher: str = 'nevergrad', + **searcher_kwargs) -> Tuple[dict, float]: + is_discrete = lr_choices is not None + assert (lower_lr is None and upper_lr is None and lr_choices + is not None) or (lower_lr is not None and upper_lr is not None + and lr_choices is None + ), 'lower_lr and upper_lr should be set only one' + hparam_spec: dict + if is_discrete: + hparam_spec = { + 'optimizer.lr': { + 'type': 'discrete', + 'values': lr_choices + } + } + else: + hparam_spec = { + 'optimizer.lr': { + 'type': 'continuous', + 'lower': lower_lr, + 'upper': upper_lr + } + } + + tunner = Tuner( + runner_cfg, + hparam_spec=hparam_spec, + monitor=monitor, + rule=rule, + num_trials=num_trials, + tuning_iter=tuning_iter, + tunning_epoch=tunning_epoch, + report_op=report_op, + searcher=searcher, + **searcher_kwargs) + return tunner.tune() \ No newline at end of file diff --git a/mmengine/tuner/searcher.py b/mmengine/tune/searcher.py similarity index 89% rename from mmengine/tuner/searcher.py rename to mmengine/tune/searcher.py index 7f96ca587d..d84f39cba8 100644 --- a/mmengine/tuner/searcher.py +++ b/mmengine/tune/searcher.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. from tying import Dict, Optional +from mmengine.registry import HYPER_SEARCHERS + try: import nevergard as ng except ImportError: @@ -17,28 +19,25 @@ hp = None -class Searcher: +class _Searcher: + + rules_supported = ['greater', 'less'] def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): - assert rule in ['less', 'greater' - ], f"rule must be 'less' or 'greater', but got {rule}" + assert rule in self.rules_supported, f"rule must be 'less' or 'greater', but got {rule}" self._rule = rule + self._validate_hparam_spec(hparam_spec) + self._hparam_spec = hparam_spec + + def _validate_hparam_spec(self, hparam_spec): for _, v in hparam_spec.items(): assert v.get('type', None) in [ 'discrete', 'continuous' ], f'hparam_spec must have a key "type" and its value must be "discrete" or "continuous", but got {v}' if v['type'] == 'discrete': - assert v.get( - 'values', None - ) is not None, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' + assert 'values' in v, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' else: - assert v.get( - 'lower', None - ) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "lower", but got {v}' - assert v.get( - 'upper', None - ) is not None, f'if hparam_spec["type"] is "continuous", hparam_spec must have a key "upper", but got {v}' - self._hparam_spec = hparam_spec + assert 'lower' in v and 'upper' in v, f'if hparam_spec["type"] is "continuous", hparam_spec must have keys "lower" and "upper", but got {v}' @property def hparam_spec(self) -> Dict[str, Dict]: @@ -64,7 +63,8 @@ def suggest(self) -> Dict: """ -class NevergradSearcher(Searcher): +@HYPER_SEARCHERS.register_module() +class NevergradSearcher(_Searcher): def __init__(self, rule: str, @@ -100,7 +100,8 @@ def record(self, hparam: Dict, score: float): self._optimizer.tell(hparam, score * self._rule_op) -class SkoptSearcher(Searcher): +@HYPER_SEARCHERS.register_module() +class SkoptSearcher(_Searcher): def __init__(self, rule: str, @@ -158,7 +159,8 @@ def record(self, hparam: Dict, score: float): self._optimizer.tell(ordered_values, score * self._rule_op) -class HyperoptSearcher(Searcher): +@HYPER_SEARCHERS.register_module() +class HyperoptSearcher(_Searcher): def __init__(self, rule: str, diff --git a/mmengine/tuner/tunner.py b/mmengine/tune/tunner.py similarity index 54% rename from mmengine/tuner/tunner.py rename to mmengine/tune/tunner.py index 88b10c663b..a002e65ecb 100644 --- a/mmengine/tuner/tunner.py +++ b/mmengine/tune/tunner.py @@ -1,21 +1,23 @@ # Copyright (c) OpenMMLab. All rights reserved. import os import tempfile -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Dict, List, Sequence, Tuple, Union import torch import torch.distributed.rpc as rpc -from torch.distributed.rpc import TensorPipeRpcBackendOptions +from torch.distributed.rpc import TensorPipeRpcBackendOptions, is_available from mmengine.config import Config, ConfigDict from mmengine.dist import (broadcast_object_list, get_rank, get_world_size, init_dist, is_distributed, is_main_process) +from mmengine.registry import HYPER_SEARCHERS from mmengine.runner import Runner from ._report_hook import ReportingHook -from .searcher import Searcher +from .searcher import _Searcher class Tuner: + rules_supported = ['greater', 'less'] def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], @@ -29,11 +31,16 @@ def __init__(self, searcher_type: str = 'nevergrad', rpc_port: int = 29501, **searcher_kwargs): + assert is_available(), 'torch.distributed.rpc is not available.' + self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec self._monitor = monitor - assert rule in ['greater', 'less'], f'rule {rule} is not supported' + + if rule not in self.rules_supported: + raise ValueError(f'Rule {rule} is not supported') self._rule = rule + self._num_trials = num_trials self._tuning_iter = tuning_iter self._tuning_epoch = tunning_epoch @@ -42,14 +49,10 @@ def __init__(self, self._history: List[Tuple[Dict, float]] = [] launcher = self._runner_cfg.get('launcher', 'none') - env_cfg = self._runner_cfg.get('env_cfg', {}) - self._distributed: bool - if launcher == 'none': - self._distributed = False - else: - self._distributed = True + self._distributed = launcher != 'none' if self._distributed and not is_distributed(): - dist_cfg: dict = env_cfg.get('dist_cfg', {}) + env_cfg = runner_cfg.get('env_cfg', {}) + dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) self._rpc_port = rpc_port @@ -64,23 +67,10 @@ def _init_rpc(self, rpc_port: int): def _build_searcher(self, searcher_type: str = 'nevergrad', - **kwargs) -> Searcher: - searcher: Searcher - if searcher_type == 'nevergrad': - from .searcher import NevergradSearcher - searcher = NevergradSearcher(self._rule, self._hparam_spec, - self._num_trials, **kwargs) - elif searcher_type == 'skopt': - from .searcher import SkoptSearcher - searcher = SkoptSearcher(self._rule, self._hparam_spec, **kwargs) - elif searcher_type == 'hyperopt': - from .searcher import HyperoptSearcher - searcher = HyperoptSearcher(self._rule, self._hparam_spec, - **kwargs) - else: - raise NotImplementedError( - f'searcher {searcher} is not implemented') - return searcher + **kwargs) -> _Searcher: + build_config = dict(type=searcher_type) + build_config.update(kwargs) + return HYPER_SEARCHERS.build(build_config) @staticmethod def inject_config(cfg, key, value): @@ -106,6 +96,12 @@ def _run_trial(self, runner_cfg, monitor, rule, tuning_iter, tunning_epoch, runner.train() return report_hook.get_score() + def _get_score_from_futures(self, futs) -> float: + try: + return torch.futures.wait_all(futs)[0] + except Exception: + return float('-inf') if self._rule == 'greater' else float('inf') + def _submit(self): self._init_rpc(self._rpc_port) @@ -125,23 +121,16 @@ def _submit(self): self._tuning_iter, self._tuning_epoch, self._reporting_op)) futs.append(fut) - score: float - try: - score = [torch.futures.wait_all(futs)[0]] - except Exception: - if self._rule == 'greater': - score = [float('-inf')] - else: - score = [float('inf')] - self._searcher.record(hparam, score[0]) + score = self._get_score_from_futures(futs) + self._searcher.record(hparam, score) temp_dir.cleanup() else: - score = [None] - broadcast_object_list(score, src=0) - self._history.append((hparam, score[0])) + score = None + broadcast_object_list([score], src=0) + self._history.append((hparam, score)) rpc.shutdown() - def tune(self): + def tune(self) -> Tuple[dict, float]: for _ in range(self._num_trials): self._submit() @@ -152,51 +141,3 @@ def tune(self): else: best_hparam, best_score = min(self._history, key=lambda x: x[1]) return best_hparam, best_score - - -def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], - monitor: str = 'loss', - rule: str = 'less', - num_trials: int = 32, - lower_lr: Optional[float] = 1e-6, - upper_lr: Optional[float] = 1e-2, - lr_choices: Optional[List[float]] = None, - tuning_iter: int = 0, - tunning_epoch: int = 0, - report_op: str = 'latest', - searcher: str = 'nevergrad', - **searcher_kwargs): - is_discrete = lr_choices is not None - assert (lower_lr is None and upper_lr is None and lr_choices - is not None) or (lower_lr is not None and upper_lr is not None - and lr_choices is None - ), 'lower_lr and upper_lr should be set only one' - hparam_spec: dict - if is_discrete: - hparam_spec = { - 'optimizer.lr': { - 'type': 'discrete', - 'values': lr_choices - } - } - else: - hparam_spec = { - 'optimizer.lr': { - 'type': 'continuous', - 'lower': lower_lr, - 'upper': upper_lr - } - } - - tunner = Tuner( - runner_cfg, - hparam_spec=hparam_spec, - monitor=monitor, - rule=rule, - num_trials=num_trials, - tuning_iter=tuning_iter, - tunning_epoch=tunning_epoch, - report_op=report_op, - searcher=searcher, - **searcher_kwargs) - return tunner.tune() diff --git a/mmengine/tuner/__init__.py b/mmengine/tuner/__init__.py deleted file mode 100644 index cb944d0dba..0000000000 --- a/mmengine/tuner/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .tunner import Tuner, find_optimial_lr - -__all__ = ['Tuner', 'find_optimial_lr'] From 55364e018bf680b379648b0e05abf53d38bbf01a Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 18 Aug 2023 09:03:36 +0900 Subject: [PATCH 06/41] Add logger to tune --- examples/tune/find_lr.py | 6 ++--- mmengine/tune/__init__.py | 2 +- mmengine/tune/api.py | 9 ++++--- mmengine/tune/tunner.py | 50 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 080286a591..4484a129b0 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -120,13 +120,13 @@ def main(): temp_dir.cleanup() - best_lr, lowest_loss = find_optimial_lr( + result = find_optimial_lr( runner_cfg=runner_cfg, num_trials=32, tunning_epoch=1, ) - print('best_lr: ', best_lr) - print('lowest_loss: ', lowest_loss) + print('best_lr: ', result.get('hparam')) + print('lowest_loss: ', result.get('score')) if __name__ == '__main__': diff --git a/mmengine/tune/__init__.py b/mmengine/tune/__init__.py index 97f2e3f797..c324fc49b6 100644 --- a/mmengine/tune/__init__.py +++ b/mmengine/tune/__init__.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .api import find_optimial_lr -from .searcher import * # noqa F403 +from .searcher import * # noqa F403 from .tunner import Tuner __all__ = ['Tuner', 'find_optimial_lr'] diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py index e7d21dfce4..255a2ced75 100644 --- a/mmengine/tune/api.py +++ b/mmengine/tune/api.py @@ -1,9 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .tunner import Tuner - -from typing import Dict, List, Optional, Union, Tuple +from typing import Dict, List, Optional, Union from mmengine.config import Config, ConfigDict +from .tunner import Tuner def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], @@ -17,7 +16,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], tunning_epoch: int = 0, report_op: str = 'latest', searcher: str = 'nevergrad', - **searcher_kwargs) -> Tuple[dict, float]: + **searcher_kwargs) -> Dict[str, Union[dict, float]]: is_discrete = lr_choices is not None assert (lower_lr is None and upper_lr is None and lr_choices is not None) or (lower_lr is not None and upper_lr is not None @@ -51,4 +50,4 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], report_op=report_op, searcher=searcher, **searcher_kwargs) - return tunner.tune() \ No newline at end of file + return tunner.tune() diff --git a/mmengine/tune/tunner.py b/mmengine/tune/tunner.py index a002e65ecb..2ae407443f 100644 --- a/mmengine/tune/tunner.py +++ b/mmengine/tune/tunner.py @@ -10,6 +10,7 @@ from mmengine.config import Config, ConfigDict from mmengine.dist import (broadcast_object_list, get_rank, get_world_size, init_dist, is_distributed, is_main_process) +from mmengine.logging import MMLogger from mmengine.registry import HYPER_SEARCHERS from mmengine.runner import Runner from ._report_hook import ReportingHook @@ -55,6 +56,45 @@ def __init__(self, dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) self._rpc_port = rpc_port + self._logger = MMLogger.get_instance('Tuner', log_level='INFO') + self._logger.info( + f'Tuner initialized with rule: {rule} and monitor: {monitor}') + + @property + def hparam_spec(self) -> Dict[str, Dict]: + return self._hparam_spec + + @property + def monitor(self) -> str: + return self._monitor + + @property + def rule(self) -> str: + return self._rule + + @property + def num_trials(self) -> int: + return self._num_trials + + @property + def tuning_iter(self) -> int: + return self._tuning_iter + + @property + def tuning_epoch(self) -> int: + return self._tuning_epoch + + @property + def reporting_op(self) -> str: + return self._reporting_op + + @property + def history(self) -> List[Tuple[Dict, float]]: + return self._history + + @property + def rpc_port(self) -> int: + return self._rpc_port def _init_rpc(self, rpc_port: int): rpc_backend_options = TensorPipeRpcBackendOptions() @@ -68,6 +108,7 @@ def _init_rpc(self, rpc_port: int): def _build_searcher(self, searcher_type: str = 'nevergrad', **kwargs) -> _Searcher: + self._logger.info(f'Building searcher of type: {searcher_type}') build_config = dict(type=searcher_type) build_config.update(kwargs) return HYPER_SEARCHERS.build(build_config) @@ -122,6 +163,7 @@ def _submit(self): self._reporting_op)) futs.append(fut) score = self._get_score_from_futures(futs) + self._logger.info(f'Trial completed with score: {score}') self._searcher.record(hparam, score) temp_dir.cleanup() else: @@ -130,7 +172,8 @@ def _submit(self): self._history.append((hparam, score)) rpc.shutdown() - def tune(self) -> Tuple[dict, float]: + def tune(self) -> Dict[str, Union[dict, float]]: + self._logger.info(f'Starting tuning for {self._num_trials} trials...') for _ in range(self._num_trials): self._submit() @@ -140,4 +183,7 @@ def tune(self) -> Tuple[dict, float]: best_hparam, best_score = max(self._history, key=lambda x: x[1]) else: best_hparam, best_score = min(self._history, key=lambda x: x[1]) - return best_hparam, best_score + self._logger.info(f'Best hyperparameters obtained: {best_hparam}') + self._logger.info(f'Best score obtained: {best_score}') + self._logger.info('Tuning completed.') + return dict(hparam=best_hparam, score=best_score) From 882271a87c561c8ca5229587cf9d69683b0eb246 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 18 Aug 2023 09:35:08 +0900 Subject: [PATCH 07/41] Fix searcher init args --- mmengine/tune/tunner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmengine/tune/tunner.py b/mmengine/tune/tunner.py index 2ae407443f..ce24bc4c05 100644 --- a/mmengine/tune/tunner.py +++ b/mmengine/tune/tunner.py @@ -109,7 +109,7 @@ def _build_searcher(self, searcher_type: str = 'nevergrad', **kwargs) -> _Searcher: self._logger.info(f'Building searcher of type: {searcher_type}') - build_config = dict(type=searcher_type) + build_config = dict(type=searcher_type, num_trials=self._num_trials) build_config.update(kwargs) return HYPER_SEARCHERS.build(build_config) From 6285928738d34ede4c5e3348ce90f9bfd2e2e75c Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:46:38 +0900 Subject: [PATCH 08/41] Apply lint --- examples/tune/find_lr.py | 2 -- mmengine/tune/api.py | 10 ++++++---- mmengine/tune/searcher.py | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 4484a129b0..6d5dc28f45 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -118,8 +118,6 @@ def main(): env_cfg=dict(dist_cfg=dict(backend='nccl')), experiment_name='test1') - temp_dir.cleanup() - result = find_optimial_lr( runner_cfg=runner_cfg, num_trials=32, diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py index 255a2ced75..05482b1b33 100644 --- a/mmengine/tune/api.py +++ b/mmengine/tune/api.py @@ -18,10 +18,12 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], searcher: str = 'nevergrad', **searcher_kwargs) -> Dict[str, Union[dict, float]]: is_discrete = lr_choices is not None - assert (lower_lr is None and upper_lr is None and lr_choices - is not None) or (lower_lr is not None and upper_lr is not None - and lr_choices is None - ), 'lower_lr and upper_lr should be set only one' + if is_discrete: + assert lower_lr is None and upper_lr is None, \ + 'lower_lr and upper_lr should be None if lr_choices is not None' + else: + assert lower_lr is not None and upper_lr is not None, \ + 'lower_lr and upper_lr should set if lr_choices is None' hparam_spec: dict if is_discrete: hparam_spec = { diff --git a/mmengine/tune/searcher.py b/mmengine/tune/searcher.py index d84f39cba8..24270ef889 100644 --- a/mmengine/tune/searcher.py +++ b/mmengine/tune/searcher.py @@ -24,7 +24,8 @@ class _Searcher: rules_supported = ['greater', 'less'] def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): - assert rule in self.rules_supported, f"rule must be 'less' or 'greater', but got {rule}" + assert rule in self.rules_supported, \ + f"rule must be 'less' or 'greater', but got {rule}" self._rule = rule self._validate_hparam_spec(hparam_spec) self._hparam_spec = hparam_spec @@ -33,11 +34,18 @@ def _validate_hparam_spec(self, hparam_spec): for _, v in hparam_spec.items(): assert v.get('type', None) in [ 'discrete', 'continuous' - ], f'hparam_spec must have a key "type" and its value must be "discrete" or "continuous", but got {v}' + ], \ + 'hparam_spec must have a key "type" and ' \ + f'its value must be "discrete" or "continuous", but got {v}' if v['type'] == 'discrete': - assert 'values' in v, f'if hparam_spec["type"] is "discrete", hparam_spec must have a key "values", but got {v}' + assert 'values' in v, \ + 'if hparam_spec["type"] is "discrete", ' +\ + f'hparam_spec must have a key "values", but got {v}' else: - assert 'lower' in v and 'upper' in v, f'if hparam_spec["type"] is "continuous", hparam_spec must have keys "lower" and "upper", but got {v}' + assert 'lower' in v and 'upper' in v, \ + 'if hparam_spec["type"] is "continuous", ' +\ + 'hparam_spec must have keys "lower" and "upper", ' +\ + f'but got {v}' @property def hparam_spec(self) -> Dict[str, Dict]: @@ -212,7 +220,6 @@ def suggest(self) -> Dict: return suggested_params def record(self, hparam: Dict, score: float): - # Hyperopt requires loss (lower is better), so we should adjust our score if in "greater" rule. self._trials.insert_trial_docs([{ 'tid': len(self._trials.trials), 'book_time': hp.utils.coarse_utcnow(), From 0431eb0b555aff4a29b8df5c107cc6d066272319 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 18 Aug 2023 15:13:17 +0900 Subject: [PATCH 09/41] Fix typo --- mmengine/tune/__init__.py | 2 +- mmengine/tune/api.py | 10 +++++----- mmengine/tune/searcher.py | 2 +- mmengine/tune/{tunner.py => tuner.py} | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) rename mmengine/tune/{tunner.py => tuner.py} (100%) diff --git a/mmengine/tune/__init__.py b/mmengine/tune/__init__.py index c324fc49b6..fe4d37b268 100644 --- a/mmengine/tune/__init__.py +++ b/mmengine/tune/__init__.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .api import find_optimial_lr from .searcher import * # noqa F403 -from .tunner import Tuner +from .tuner import Tuner __all__ = ['Tuner', 'find_optimial_lr'] diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py index 05482b1b33..c4f70f9603 100644 --- a/mmengine/tune/api.py +++ b/mmengine/tune/api.py @@ -2,7 +2,7 @@ from typing import Dict, List, Optional, Union from mmengine.config import Config, ConfigDict -from .tunner import Tuner +from .tuner import Tuner def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], @@ -15,7 +15,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', - searcher: str = 'nevergrad', + searcher_type: str = 'nevergrad', **searcher_kwargs) -> Dict[str, Union[dict, float]]: is_discrete = lr_choices is not None if is_discrete: @@ -41,7 +41,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], } } - tunner = Tuner( + tuner = Tuner( runner_cfg, hparam_spec=hparam_spec, monitor=monitor, @@ -50,6 +50,6 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], tuning_iter=tuning_iter, tunning_epoch=tunning_epoch, report_op=report_op, - searcher=searcher, + searcher_type=searcher_type, **searcher_kwargs) - return tunner.tune() + return tuner.tune() diff --git a/mmengine/tune/searcher.py b/mmengine/tune/searcher.py index 24270ef889..b5a26ae58c 100644 --- a/mmengine/tune/searcher.py +++ b/mmengine/tune/searcher.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from tying import Dict, Optional +from typing import Dict, Optional from mmengine.registry import HYPER_SEARCHERS diff --git a/mmengine/tune/tunner.py b/mmengine/tune/tuner.py similarity index 100% rename from mmengine/tune/tunner.py rename to mmengine/tune/tuner.py index ce24bc4c05..cf12ed97db 100644 --- a/mmengine/tune/tunner.py +++ b/mmengine/tune/tuner.py @@ -46,7 +46,6 @@ def __init__(self, self._tuning_iter = tuning_iter self._tuning_epoch = tunning_epoch self._reporting_op = report_op - self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) self._history: List[Tuple[Dict, float]] = [] launcher = self._runner_cfg.get('launcher', 'none') @@ -59,6 +58,7 @@ def __init__(self, self._logger = MMLogger.get_instance('Tuner', log_level='INFO') self._logger.info( f'Tuner initialized with rule: {rule} and monitor: {monitor}') + self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) @property def hparam_spec(self) -> Dict[str, Dict]: From b5985fb8ed63e20254756554c96782bcf792804b Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 21 Aug 2023 10:30:09 +0900 Subject: [PATCH 10/41] Fix minor --- examples/tune/find_lr.py | 2 +- mmengine/registry/__init__.py | 17 ++++++++--------- mmengine/registry/root.py | 3 --- mmengine/tune/__init__.py | 2 +- mmengine/tune/{searcher.py => searchers.py} | 4 +++- mmengine/tune/tuner.py | 3 +-- 6 files changed, 14 insertions(+), 17 deletions(-) rename mmengine/tune/{searcher.py => searchers.py} (98%) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 6d5dc28f45..56a4bf72a5 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -67,7 +67,7 @@ def compute_metrics(self, results): def parse_args(): - parser = argparse.ArgumentParser(description='Distributed Training') + parser = argparse.ArgumentParser(description='Distributed Tuning') parser.add_argument( '--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], diff --git a/mmengine/registry/__init__.py b/mmengine/registry/__init__.py index ff0ced91fa..cce2737043 100644 --- a/mmengine/registry/__init__.py +++ b/mmengine/registry/__init__.py @@ -4,11 +4,11 @@ from .default_scope import DefaultScope from .registry import Registry from .root import (DATA_SAMPLERS, DATASETS, EVALUATOR, FUNCTIONS, HOOKS, - HYPER_SEARCHERS, INFERENCERS, LOG_PROCESSORS, LOOPS, - METRICS, MODEL_WRAPPERS, MODELS, OPTIM_WRAPPER_CONSTRUCTORS, - OPTIM_WRAPPERS, OPTIMIZERS, PARAM_SCHEDULERS, - RUNNER_CONSTRUCTORS, RUNNERS, STRATEGIES, TASK_UTILS, - TRANSFORMS, VISBACKENDS, VISUALIZERS, WEIGHT_INITIALIZERS) + INFERENCERS, LOG_PROCESSORS, LOOPS, METRICS, MODEL_WRAPPERS, + MODELS, OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, + OPTIMIZERS, PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, RUNNERS, + STRATEGIES, TASK_UTILS, TRANSFORMS, VISBACKENDS, + VISUALIZERS, WEIGHT_INITIALIZERS) from .utils import (count_registered_modules, init_default_scope, traverse_registry_tree) @@ -18,8 +18,7 @@ 'OPTIMIZERS', 'OPTIM_WRAPPER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', 'MODEL_WRAPPERS', 'OPTIM_WRAPPERS', 'LOOPS', 'VISBACKENDS', 'VISUALIZERS', 'LOG_PROCESSORS', 'EVALUATOR', 'INFERENCERS', - 'HYPER_SEARCHERS', 'DefaultScope', 'traverse_registry_tree', - 'count_registered_modules', 'build_model_from_cfg', - 'build_runner_from_cfg', 'build_from_cfg', 'build_scheduler_from_cfg', - 'init_default_scope', 'FUNCTIONS', 'STRATEGIES' + 'DefaultScope', 'traverse_registry_tree', 'count_registered_modules', + 'build_model_from_cfg', 'build_runner_from_cfg', 'build_from_cfg', + 'build_scheduler_from_cfg', 'init_default_scope', 'FUNCTIONS', 'STRATEGIES' ] diff --git a/mmengine/registry/root.py b/mmengine/registry/root.py index 4d507801e3..2663dffcd9 100644 --- a/mmengine/registry/root.py +++ b/mmengine/registry/root.py @@ -65,6 +65,3 @@ # manage function FUNCTIONS = Registry('function') - -# hyper parameter searcher -HYPER_SEARCHERS = Registry('hyper parameter searcher') diff --git a/mmengine/tune/__init__.py b/mmengine/tune/__init__.py index fe4d37b268..b38c038c0d 100644 --- a/mmengine/tune/__init__.py +++ b/mmengine/tune/__init__.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .api import find_optimial_lr -from .searcher import * # noqa F403 +from .searchers import * # noqa F403 from .tuner import Tuner __all__ = ['Tuner', 'find_optimial_lr'] diff --git a/mmengine/tune/searcher.py b/mmengine/tune/searchers.py similarity index 98% rename from mmengine/tune/searcher.py rename to mmengine/tune/searchers.py index b5a26ae58c..45155c1345 100644 --- a/mmengine/tune/searcher.py +++ b/mmengine/tune/searchers.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Dict, Optional -from mmengine.registry import HYPER_SEARCHERS +from mmengine.registry import Registry try: import nevergard as ng @@ -19,6 +19,8 @@ hp = None +HYPER_SEARCHERS = Registry('hyper parameter searcher') + class _Searcher: rules_supported = ['greater', 'less'] diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index cf12ed97db..3d067d1ada 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -11,10 +11,9 @@ from mmengine.dist import (broadcast_object_list, get_rank, get_world_size, init_dist, is_distributed, is_main_process) from mmengine.logging import MMLogger -from mmengine.registry import HYPER_SEARCHERS from mmengine.runner import Runner from ._report_hook import ReportingHook -from .searcher import _Searcher +from .searchers import HYPER_SEARCHERS, _Searcher class Tuner: From 4b5a249806157168ebe55199df4cf48aeb43a106 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:32:30 +0900 Subject: [PATCH 11/41] Fix rpc init --- mmengine/tune/_report_hook.py | 2 + mmengine/tune/api.py | 6 +-- mmengine/tune/searchers.py | 12 +++-- mmengine/tune/tuner.py | 87 ++++++++++++++++++++++------------- 4 files changed, 67 insertions(+), 40 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index 60a9a0b02f..48609fe1ff 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -88,6 +88,8 @@ def report_score(self): score = float('inf') else: score = sum(self.scoreboard) / len(self.scoreboard) + else: + raise NotImplementedError return score def clear_scoreboard(self): diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py index c4f70f9603..10abdd1c2b 100644 --- a/mmengine/tune/api.py +++ b/mmengine/tune/api.py @@ -15,7 +15,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', - searcher_type: str = 'nevergrad', + searcher_type: str = 'NevergradSearcher', **searcher_kwargs) -> Dict[str, Union[dict, float]]: is_discrete = lr_choices is not None if is_discrete: @@ -27,14 +27,14 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], hparam_spec: dict if is_discrete: hparam_spec = { - 'optimizer.lr': { + 'optim_wrapper.optimizer.lr': { 'type': 'discrete', 'values': lr_choices } } else: hparam_spec = { - 'optimizer.lr': { + 'optim_wrapper.optimizer.lr': { 'type': 'continuous', 'lower': lower_lr, 'upper': upper_lr diff --git a/mmengine/tune/searchers.py b/mmengine/tune/searchers.py index 45155c1345..c5f3bf2681 100644 --- a/mmengine/tune/searchers.py +++ b/mmengine/tune/searchers.py @@ -4,7 +4,7 @@ from mmengine.registry import Registry try: - import nevergard as ng + import nevergrad as ng except ImportError: ng = None @@ -18,9 +18,9 @@ except ImportError: hp = None - HYPER_SEARCHERS = Registry('hyper parameter searcher') + class _Searcher: rules_supported = ['greater', 'less'] @@ -86,6 +86,7 @@ def __init__(self, super().__init__(rule, hparam_spec) assert ng is not None, 'nevergrad is not installed' self._optimizer = self._build_optimizer(solver_type, num_trials) + self._latest_candidate = None if self.rule == 'less': self._rule_op = 1.0 @@ -104,10 +105,13 @@ def _build_optimizer(self, solver_type: str, num_trials: int): return solver def suggest(self) -> Dict: - return self._optimizer.ask() + self._latest_candidate = self._optimizer.ask() + return self._latest_candidate.value def record(self, hparam: Dict, score: float): - self._optimizer.tell(hparam, score * self._rule_op) + assert self._latest_candidate is not None, \ + 'suggest must be called before record' + self._optimizer.tell(self._latest_candidate, score * self._rule_op) @HYPER_SEARCHERS.register_module() diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 3d067d1ada..543e1424fc 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import os import tempfile from typing import Dict, List, Sequence, Tuple, Union @@ -18,6 +17,7 @@ class Tuner: rules_supported = ['greater', 'less'] + rpc_worker_name = 'RPC_WORKER_{}' def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], @@ -28,7 +28,7 @@ def __init__(self, tuning_iter: int = 0, tunning_epoch: int = 0, report_op: str = 'latest', - searcher_type: str = 'nevergrad', + searcher_type: str = 'NevergradSearcher', rpc_port: int = 29501, **searcher_kwargs): assert is_available(), 'torch.distributed.rpc is not available.' @@ -53,6 +53,7 @@ def __init__(self, env_cfg = runner_cfg.get('env_cfg', {}) dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) + self._init_rpc(rpc_port) self._rpc_port = rpc_port self._logger = MMLogger.get_instance('Tuner', log_level='INFO') self._logger.info( @@ -91,26 +92,9 @@ def reporting_op(self) -> str: def history(self) -> List[Tuple[Dict, float]]: return self._history - @property - def rpc_port(self) -> int: - return self._rpc_port - - def _init_rpc(self, rpc_port: int): - rpc_backend_options = TensorPipeRpcBackendOptions() - master_addr = os.environ.get('MASTER_ADDR' - 'localhost') - rpc_backend_options.init_method = f'tcp://{master_addr}:{rpc_port}' - rank = get_rank() - world_size = get_world_size() - rpc.init_rpc(f'worker{rank}', rank=rank, world_size=world_size) - - def _build_searcher(self, - searcher_type: str = 'nevergrad', - **kwargs) -> _Searcher: - self._logger.info(f'Building searcher of type: {searcher_type}') - build_config = dict(type=searcher_type, num_trials=self._num_trials) - build_config.update(kwargs) - return HYPER_SEARCHERS.build(build_config) + @staticmethod + def get_rpc_worker_name(rank) -> str: + return Tuner.rpc_worker_name.format(rank) @staticmethod def inject_config(cfg, key, value): @@ -118,23 +102,61 @@ def inject_config(cfg, key, value): suffix = '' for item in key[:-1]: if isinstance(cfg, Sequence) and not isinstance(cfg, str): - item = cfg[int(item)] + cfg = cfg[int(item)] else: - assert item in cfg, f'key {key} is not in cfg' - item = cfg[item] + assert item in cfg, f'key {item} is not in {cfg}' + cfg = cfg[item] suffix += f'{item}.' assert key[-1] in cfg, f'attribute {key[-1]} is not in cfg{suffix}' cfg[key[-1]] = value return - def _run_trial(self, runner_cfg, monitor, rule, tuning_iter, tunning_epoch, - report_op): + @staticmethod + def run_trial(runner_cfg, monitor, rule, tuning_iter, tunning_epoch, + report_op): runner = Runner.from_cfg(runner_cfg) report_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, report_op) runner.register_hook(report_hook, priority='VERY_LOW') runner.train() - return report_hook.get_score() + return report_hook.report_score() + + def _init_rpc(self, rpc_port: int): + rank = get_rank() + world_size = get_world_size() + rpc_init_method: str + if self._distributed: + rpc_init_method = 'env://' + else: + rpc_init_method = f'tcp://localhost:{rpc_port}' + rpc_backend_options = TensorPipeRpcBackendOptions( + init_method=rpc_init_method, + devices=[rank], + ) + + for other in range(world_size): + if other == rank: + continue + rpc_backend_options.set_device_map( + Tuner.get_rpc_worker_name(other), {rank: other}) + + rpc.init_rpc( + Tuner.get_rpc_worker_name(rank), + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + + def _build_searcher(self, + searcher_type: str = 'nevergrad', + **kwargs) -> _Searcher: + self._logger.info(f'Building searcher of type: {searcher_type}') + build_config = dict( + type=searcher_type, + rule=self.rule, + hparam_spec=self.hparam_spec, + num_trials=self._num_trials) + build_config.update(kwargs) + return HYPER_SEARCHERS.build(build_config) def _get_score_from_futures(self, futs) -> float: try: @@ -143,7 +165,6 @@ def _get_score_from_futures(self, futs) -> float: return float('-inf') if self._rule == 'greater' else float('inf') def _submit(self): - self._init_rpc(self._rpc_port) if is_main_process(): hparam = self._searcher.suggest() @@ -155,8 +176,8 @@ def _submit(self): futs = [] for rank in range(get_world_size()): fut = rpc.rpc_async( - f'worker{rank}', - self._run_trial, + Tuner.get_rpc_worker_name(rank), + Tuner.run_trial, args=(self._runner_cfg, self._monitor, self._rule, self._tuning_iter, self._tuning_epoch, self._reporting_op)) @@ -166,10 +187,10 @@ def _submit(self): self._searcher.record(hparam, score) temp_dir.cleanup() else: + hparam = None score = None - broadcast_object_list([score], src=0) + broadcast_object_list([hparam, score], src=0) self._history.append((hparam, score)) - rpc.shutdown() def tune(self) -> Dict[str, Union[dict, float]]: self._logger.info(f'Starting tuning for {self._num_trials} trials...') From 6846aba5c6092ce4e31c2ba5576707c5d1643725 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Tue, 22 Aug 2023 14:43:54 +0900 Subject: [PATCH 12/41] Fix env for rpc --- mmengine/tune/tuner.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 543e1424fc..5f952f30df 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os import tempfile from typing import Dict, List, Sequence, Tuple, Union @@ -17,7 +18,7 @@ class Tuner: rules_supported = ['greater', 'less'] - rpc_worker_name = 'RPC_WORKER_{}' + rpc_worker_name = 'RPC_WORKER{}' def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], @@ -114,6 +115,7 @@ def inject_config(cfg, key, value): @staticmethod def run_trial(runner_cfg, monitor, rule, tuning_iter, tunning_epoch, report_op): + os.environ['LOCAL_RANK'] = '0' runner = Runner.from_cfg(runner_cfg) report_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, report_op) @@ -131,15 +133,9 @@ def _init_rpc(self, rpc_port: int): rpc_init_method = f'tcp://localhost:{rpc_port}' rpc_backend_options = TensorPipeRpcBackendOptions( init_method=rpc_init_method, - devices=[rank], + devices=[int(os.environ.get('LOCAL_RANK', rank))], ) - for other in range(world_size): - if other == rank: - continue - rpc_backend_options.set_device_map( - Tuner.get_rpc_worker_name(other), {rank: other}) - rpc.init_rpc( Tuner.get_rpc_worker_name(rank), rank=rank, From a320ee1cf2ab1134b686c0697f588c1a56df99e8 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:11:36 +0900 Subject: [PATCH 13/41] fix rpc device map --- mmengine/tune/tuner.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 5f952f30df..6903e11f5c 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import os import tempfile from typing import Dict, List, Sequence, Tuple, Union @@ -61,6 +60,9 @@ def __init__(self, f'Tuner initialized with rule: {rule} and monitor: {monitor}') self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) + def __del__(self): + rpc.shutdown() + @property def hparam_spec(self) -> Dict[str, Dict]: return self._hparam_spec @@ -115,7 +117,6 @@ def inject_config(cfg, key, value): @staticmethod def run_trial(runner_cfg, monitor, rule, tuning_iter, tunning_epoch, report_op): - os.environ['LOCAL_RANK'] = '0' runner = Runner.from_cfg(runner_cfg) report_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, report_op) @@ -132,9 +133,12 @@ def _init_rpc(self, rpc_port: int): else: rpc_init_method = f'tcp://localhost:{rpc_port}' rpc_backend_options = TensorPipeRpcBackendOptions( - init_method=rpc_init_method, - devices=[int(os.environ.get('LOCAL_RANK', rank))], - ) + init_method=rpc_init_method, ) + for other in range(world_size): + if other == rank: + continue + rpc_backend_options.set_device_map( + Tuner.get_rpc_worker_name(other), {rank: other}) rpc.init_rpc( Tuner.get_rpc_worker_name(rank), From ccb8f078bb46ac187ec04476e2c590cbe933ed75 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 23 Aug 2023 13:44:09 +0900 Subject: [PATCH 14/41] Del rpc --- examples/tune/find_lr.py | 63 ++++++++++++--- mmengine/tune/__init__.py | 3 +- mmengine/tune/api.py | 55 ------------- mmengine/tune/searchers.py | 156 ++----------------------------------- mmengine/tune/tuner.py | 114 ++++++++------------------- 5 files changed, 93 insertions(+), 298 deletions(-) delete mode 100644 mmengine/tune/api.py diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 56a4bf72a5..5efb1e6e59 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -1,14 +1,16 @@ import argparse import tempfile +from typing import Dict, Optional, Union import torch import torch.nn as nn from torch.utils.data import Dataset +from mmengine.config import Config, ConfigDict from mmengine.evaluator import BaseMetric from mmengine.model import BaseModel from mmengine.registry import DATASETS, METRICS, MODELS -from mmengine.tune import find_optimial_lr +from mmengine.tune import Tuner class ToyModel(BaseModel): @@ -38,15 +40,17 @@ def forward(self, inputs, data_samples=None, mode='tensor'): class ToyDataset(Dataset): METAINFO = dict() # type: ignore - data = torch.randn(12, 2) - label = torch.ones(12) + num_samples = 1000 + data = torch.rand(num_samples, 2) * 10 # Random numbers between 0 and 10 + # Let's assume y = 3*x1 + 4*x2 + noise + label = 3 * data[:, 0] + 4 * data[:, 1] + torch.randn(num_samples) * 0.1 @property def metainfo(self): return self.METAINFO def __len__(self): - return self.data.size(0) + return len(self.data) def __getitem__(self, index): return dict(inputs=self.data[index], data_samples=self.label[index]) @@ -54,16 +58,18 @@ def __getitem__(self, index): class ToyMetric(BaseMetric): - def __init__(self, collect_device='cpu', dummy_metrics=None): + def __init__(self, collect_device='cpu'): super().__init__(collect_device=collect_device) - self.dummy_metrics = dummy_metrics + self.results = [] def process(self, data_batch, predictions): - result = {'acc': 1} - self.results.append(result) + true_values = data_batch['data_samples'] + squared_error = (true_values - predictions.squeeze())**2 + self.results.extend(squared_error.tolist()) - def compute_metrics(self, results): - return dict(acc=1) + def compute_metrics(self, results=None): + mse = torch.tensor(self.results).mean().item() + return dict(MSE=mse) def parse_args(): @@ -79,6 +85,39 @@ def parse_args(): return args +def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], + monitor: str = 'loss', + rule: str = 'less', + num_trials: int = 32, + lower_lr: Optional[float] = 1e-6, + upper_lr: Optional[float] = 1e-2, + tuning_iter: int = 0, + tunning_epoch: int = 0, + report_op: str = 'latest', + searcher_type: str = 'NevergradSearcher', + **searcher_kwargs) -> Dict[str, Union[dict, float]]: + hparam_spec = { + 'optim_wrapper.optimizer.lr': { + 'type': 'continuous', + 'lower': lower_lr, + 'upper': upper_lr + } + } + + tuner = Tuner( + runner_cfg, + hparam_spec=hparam_spec, + monitor=monitor, + rule=rule, + num_trials=num_trials, + tuning_iter=tuning_iter, + tunning_epoch=tunning_epoch, + report_op=report_op, + searcher_type=searcher_type, + **searcher_kwargs) + return tuner.tune() + + def main(): args = parse_args() @@ -109,7 +148,7 @@ def main(): num_workers=0), test_evaluator=[dict(type='ToyMetric')], optim_wrapper=dict(optimizer=dict(type='SGD', lr=0.1)), - train_cfg=dict(by_epoch=True, max_epochs=2, val_interval=1), + train_cfg=dict(by_epoch=True, max_epochs=10, val_interval=1), val_cfg=dict(), test_cfg=dict(), launcher=args.launcher, @@ -121,7 +160,7 @@ def main(): result = find_optimial_lr( runner_cfg=runner_cfg, num_trials=32, - tunning_epoch=1, + tunning_epoch=3, ) print('best_lr: ', result.get('hparam')) print('lowest_loss: ', result.get('score')) diff --git a/mmengine/tune/__init__.py b/mmengine/tune/__init__.py index b38c038c0d..3d921e9ebc 100644 --- a/mmengine/tune/__init__.py +++ b/mmengine/tune/__init__.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .api import find_optimial_lr from .searchers import * # noqa F403 from .tuner import Tuner -__all__ = ['Tuner', 'find_optimial_lr'] +__all__ = ['Tuner'] diff --git a/mmengine/tune/api.py b/mmengine/tune/api.py deleted file mode 100644 index 10abdd1c2b..0000000000 --- a/mmengine/tune/api.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Optional, Union - -from mmengine.config import Config, ConfigDict -from .tuner import Tuner - - -def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], - monitor: str = 'loss', - rule: str = 'less', - num_trials: int = 32, - lower_lr: Optional[float] = 1e-6, - upper_lr: Optional[float] = 1e-2, - lr_choices: Optional[List[float]] = None, - tuning_iter: int = 0, - tunning_epoch: int = 0, - report_op: str = 'latest', - searcher_type: str = 'NevergradSearcher', - **searcher_kwargs) -> Dict[str, Union[dict, float]]: - is_discrete = lr_choices is not None - if is_discrete: - assert lower_lr is None and upper_lr is None, \ - 'lower_lr and upper_lr should be None if lr_choices is not None' - else: - assert lower_lr is not None and upper_lr is not None, \ - 'lower_lr and upper_lr should set if lr_choices is None' - hparam_spec: dict - if is_discrete: - hparam_spec = { - 'optim_wrapper.optimizer.lr': { - 'type': 'discrete', - 'values': lr_choices - } - } - else: - hparam_spec = { - 'optim_wrapper.optimizer.lr': { - 'type': 'continuous', - 'lower': lower_lr, - 'upper': upper_lr - } - } - - tuner = Tuner( - runner_cfg, - hparam_spec=hparam_spec, - monitor=monitor, - rule=rule, - num_trials=num_trials, - tuning_iter=tuning_iter, - tunning_epoch=tunning_epoch, - report_op=report_op, - searcher_type=searcher_type, - **searcher_kwargs) - return tuner.tune() diff --git a/mmengine/tune/searchers.py b/mmengine/tune/searchers.py index c5f3bf2681..9e5cf63be9 100644 --- a/mmengine/tune/searchers.py +++ b/mmengine/tune/searchers.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, Optional +from typing import Dict from mmengine.registry import Registry @@ -8,16 +8,6 @@ except ImportError: ng = None -try: - import skopt -except ImportError: - skopt = None - -try: - import hyperopt as hp -except ImportError: - hp = None - HYPER_SEARCHERS = Registry('hyper parameter searcher') @@ -86,7 +76,7 @@ def __init__(self, super().__init__(rule, hparam_spec) assert ng is not None, 'nevergrad is not installed' self._optimizer = self._build_optimizer(solver_type, num_trials) - self._latest_candidate = None + self._records = dict() # type: ignore if self.rule == 'less': self._rule_op = 1.0 @@ -105,141 +95,11 @@ def _build_optimizer(self, solver_type: str, num_trials: int): return solver def suggest(self) -> Dict: - self._latest_candidate = self._optimizer.ask() - return self._latest_candidate.value - - def record(self, hparam: Dict, score: float): - assert self._latest_candidate is not None, \ - 'suggest must be called before record' - self._optimizer.tell(self._latest_candidate, score * self._rule_op) - - -@HYPER_SEARCHERS.register_module() -class SkoptSearcher(_Searcher): - - def __init__(self, - rule: str, - hparam_spec: Dict[str, Dict], - base_estimator: str = 'gp', - n_initial_points: int = 10, - initial_point_generator: str = 'random', - acq_func: str = 'gp_hedge', - acq_optimizer: str = 'auto', - *args, - **kwargs): - super().__init__(rule, hparam_spec) - - # Ensure that skopt is installed - assert skopt is not None, 'Scikit-Optimize (skopt) is not installed' - - self._optimizer = self._build_optimizer(base_estimator, - n_initial_points, - initial_point_generator, - acq_func, acq_optimizer) - if self.rule == 'less': - self._rule_op = 1.0 - else: - self._rule_op = -1.0 - - def _build_optimizer(self, base_estimator: str, n_initial_points: int, - initial_point_generator: str, acq_func: str, - acq_optimizer: str): - space = [] - for k, v in self.hparam_spec.items(): - if v['type'] == 'continuous': - space.append(skopt.space.Real(v['lower'], v['upper'], name=k)) - elif v['type'] == 'discrete': - space.append(skopt.space.Categorical(v['values'], name=k)) - - return skopt.Optimizer( - dimensions=space, - base_estimator=base_estimator, - n_initial_points=n_initial_points, - initial_point_generator=initial_point_generator, - acq_func=acq_func, - acq_optimizer=acq_optimizer) - - def suggest(self) -> Dict: - x = self._optimizer.ask() - return { - dim.name: val - for dim, val in zip(self._optimizer.space.dimensions, x) - } - - def record(self, hparam: Dict, score: float): - ordered_values = [ - hparam[dim.name] for dim in self._optimizer.space.dimensions - ] - self._optimizer.tell(ordered_values, score * self._rule_op) - - -@HYPER_SEARCHERS.register_module() -class HyperoptSearcher(_Searcher): - - def __init__(self, - rule: str, - hparam_spec: Dict[str, Dict], - num_trials: int, - n_initial_points: int = 20, - random_state_seed: Optional[int] = None, - gamma: float = 0.25, - *args, - **kwargs): - super().__init__(rule, hparam_spec) - - # Ensure that hyperopt is installed - assert hp is not None, 'hyperopt is not installed' - - self._space = self._build_space() - self._trials = hp.Trials() - self._num_trials = num_trials - self._n_initial_points = n_initial_points - self._random_state_seed = random_state_seed - self._gamma = gamma - - if self.rule == 'less': - self._rule_op = 1.0 - else: - self._rule_op = -1.0 - - def _build_space(self): - space = {} - for k, v in self.hparam_spec.items(): - if v['type'] == 'continuous': - space[k] = hp.hp.uniform(k, v['lower'], v['upper']) - elif v['type'] == 'discrete': - space[k] = hp.hp.choice(k, v['values']) - return space - - def suggest(self) -> Dict: - suggested_params = hp.fless( - fn=lambda x: - 0, # Dummy objective, we'll replace it with `record` later - space=self._space, - algo=hp.partial(hp.tpe.suggest, gamma=self._gamma), - greater_evals=self._n_initial_points + len(self._trials.trials), - trials=self._trials, - rstate=hp.pyll.stochastic.RandomState( - self._random_state_seed), # Seeded random state - return_argless=True, - verbose=0) # Not verbose - return suggested_params + hparam = self._optimizer.ask() + self._records[hparam.value] = hparam + return hparam.value def record(self, hparam: Dict, score: float): - self._trials.insert_trial_docs([{ - 'tid': len(self._trials.trials), - 'book_time': hp.utils.coarse_utcnow(), - 'misc': { - 'tid': len(self._trials.trials), - 'cmd': ('domain_attachment', 'FlessIter_Domain'), - 'vals': hparam, - 'idxs': {k: [len(self._trials.trials)] - for k in hparam} - }, - 'state': 2, # 2 is the state for "ok" in hyperopt - 'result': { - 'loss': score * self._rule_op, - 'status': 'ok' - } - }]) - self._trials.refresh() + assert hparam in self._records, \ + f'hparam {hparam} is not in the record' + self._optimizer.tell(self._records[hparam], score * self._rule_op) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 6903e11f5c..52c36b88c2 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -2,13 +2,9 @@ import tempfile from typing import Dict, List, Sequence, Tuple, Union -import torch -import torch.distributed.rpc as rpc -from torch.distributed.rpc import TensorPipeRpcBackendOptions, is_available - from mmengine.config import Config, ConfigDict -from mmengine.dist import (broadcast_object_list, get_rank, get_world_size, - init_dist, is_distributed, is_main_process) +from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, + is_main_process) from mmengine.logging import MMLogger from mmengine.runner import Runner from ._report_hook import ReportingHook @@ -17,7 +13,6 @@ class Tuner: rules_supported = ['greater', 'less'] - rpc_worker_name = 'RPC_WORKER{}' def __init__(self, runner_cfg: Union[Dict, Config, ConfigDict], @@ -29,9 +24,7 @@ def __init__(self, tunning_epoch: int = 0, report_op: str = 'latest', searcher_type: str = 'NevergradSearcher', - rpc_port: int = 29501, **searcher_kwargs): - assert is_available(), 'torch.distributed.rpc is not available.' self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec @@ -53,16 +46,11 @@ def __init__(self, env_cfg = runner_cfg.get('env_cfg', {}) dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) - self._init_rpc(rpc_port) - self._rpc_port = rpc_port self._logger = MMLogger.get_instance('Tuner', log_level='INFO') self._logger.info( f'Tuner initialized with rule: {rule} and monitor: {monitor}') self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) - def __del__(self): - rpc.shutdown() - @property def hparam_spec(self) -> Dict[str, Dict]: return self._hparam_spec @@ -95,10 +83,6 @@ def reporting_op(self) -> str: def history(self) -> List[Tuple[Dict, float]]: return self._history - @staticmethod - def get_rpc_worker_name(rank) -> str: - return Tuner.rpc_worker_name.format(rank) - @staticmethod def inject_config(cfg, key, value): key = key.split('.') @@ -114,38 +98,6 @@ def inject_config(cfg, key, value): cfg[key[-1]] = value return - @staticmethod - def run_trial(runner_cfg, monitor, rule, tuning_iter, tunning_epoch, - report_op): - runner = Runner.from_cfg(runner_cfg) - report_hook = ReportingHook(monitor, rule, tuning_iter, tunning_epoch, - report_op) - runner.register_hook(report_hook, priority='VERY_LOW') - runner.train() - return report_hook.report_score() - - def _init_rpc(self, rpc_port: int): - rank = get_rank() - world_size = get_world_size() - rpc_init_method: str - if self._distributed: - rpc_init_method = 'env://' - else: - rpc_init_method = f'tcp://localhost:{rpc_port}' - rpc_backend_options = TensorPipeRpcBackendOptions( - init_method=rpc_init_method, ) - for other in range(world_size): - if other == rank: - continue - rpc_backend_options.set_device_map( - Tuner.get_rpc_worker_name(other), {rank: other}) - - rpc.init_rpc( - Tuner.get_rpc_worker_name(rank), - rank=rank, - world_size=world_size, - rpc_backend_options=rpc_backend_options) - def _build_searcher(self, searcher_type: str = 'nevergrad', **kwargs) -> _Searcher: @@ -158,44 +110,41 @@ def _build_searcher(self, build_config.update(kwargs) return HYPER_SEARCHERS.build(build_config) - def _get_score_from_futures(self, futs) -> float: - try: - return torch.futures.wait_all(futs)[0] - except Exception: - return float('-inf') if self._rule == 'greater' else float('inf') - - def _submit(self): - + def _run_trial(self) -> Tuple[Dict, float]: if is_main_process(): hparam = self._searcher.suggest() - for k, v in hparam.items(): - self.inject_config(self._runner_cfg, k, v) - temp_dir = tempfile.TemporaryDirectory() - self._runner_cfg['work_dir'] = temp_dir.name - - futs = [] - for rank in range(get_world_size()): - fut = rpc.rpc_async( - Tuner.get_rpc_worker_name(rank), - Tuner.run_trial, - args=(self._runner_cfg, self._monitor, self._rule, - self._tuning_iter, self._tuning_epoch, - self._reporting_op)) - futs.append(fut) - score = self._get_score_from_futures(futs) - self._logger.info(f'Trial completed with score: {score}') - self._searcher.record(hparam, score) - temp_dir.cleanup() else: hparam = None - score = None - broadcast_object_list([hparam, score], src=0) - self._history.append((hparam, score)) + broadcast_object_list([hparam], src=0) + for k, v in hparam.items(): + self.inject_config(self._runner_cfg, k, v) + temp_dir = tempfile.TemporaryDirectory() + self._runner_cfg['work_dir'] = temp_dir.name + runner = Runner.from_cfg(self._runner_cfg) + report_hook = ReportingHook(self._monitor, self._rule, + self._tuning_iter, self._tuning_epoch, + self._reporting_op) + runner.register_hook(report_hook, priority='VERY_LOW') + score: float + try: + runner.train() + score = report_hook.report_score() + except Exception: + score = float('inf') if self._rule == 'less' else -float('inf') + broadcast_object_list([score], src=0) + temp_dir.cleanup() + return hparam, score def tune(self) -> Dict[str, Union[dict, float]]: self._logger.info(f'Starting tuning for {self._num_trials} trials...') - for _ in range(self._num_trials): - self._submit() + for trail_idx in range(self._num_trials): + hparam, score = self._run_trial() + self._searcher.record(hparam, score) + self._history.append((hparam, score)) + self._logger.info( + f'Trial [{trail_idx + 1}/{self._num_trials}] finished.' + + f' Score obtained: {score}' + + f' Hyperparameters used: {hparam}') best_hparam: dict best_score: float @@ -207,3 +156,6 @@ def tune(self) -> Dict[str, Union[dict, float]]: self._logger.info(f'Best score obtained: {best_score}') self._logger.info('Tuning completed.') return dict(hparam=best_hparam, score=best_score) + + def clean_up(self): + self._history = [] From bae8605b9d9f26ccb26685d4e63ae004d81bc9a7 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 23 Aug 2023 16:24:25 +0900 Subject: [PATCH 15/41] Fix examples --- examples/tune/find_lr.py | 20 +++++++++++--------- mmengine/tune/searchers.py | 15 ++++++++++++--- mmengine/tune/tuner.py | 26 +++++++++++++------------- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 5efb1e6e59..e742f82cb3 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -17,8 +17,9 @@ class ToyModel(BaseModel): def __init__(self, data_preprocessor=None): super().__init__(data_preprocessor=data_preprocessor) - self.linear1 = nn.Linear(2, 2) - self.linear2 = nn.Linear(2, 1) + self.linear1 = nn.Linear(2, 32) + self.linear2 = nn.Linear(32, 64) + self.linear3 = nn.Linear(64, 1) def forward(self, inputs, data_samples=None, mode='tensor'): if isinstance(inputs, list): @@ -27,11 +28,12 @@ def forward(self, inputs, data_samples=None, mode='tensor'): data_samples = torch.stack(data_samples) outputs = self.linear1(inputs) outputs = self.linear2(outputs) + outputs = self.linear3(outputs) if mode == 'tensor': return outputs elif mode == 'loss': - loss = (data_samples - outputs).sum() + loss = ((data_samples - outputs)**2).mean() outputs = dict(loss=loss) return outputs elif mode == 'predict': @@ -64,12 +66,12 @@ def __init__(self, collect_device='cpu'): def process(self, data_batch, predictions): true_values = data_batch['data_samples'] - squared_error = (true_values - predictions.squeeze())**2 - self.results.extend(squared_error.tolist()) + sqe = [(t - p)**2 for t, p in zip(true_values, predictions)] + self.results.extend(sqe) def compute_metrics(self, results=None): mse = torch.tensor(self.results).mean().item() - return dict(MSE=mse) + return dict(mse=mse) def parse_args(): @@ -89,11 +91,11 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], monitor: str = 'loss', rule: str = 'less', num_trials: int = 32, - lower_lr: Optional[float] = 1e-6, - upper_lr: Optional[float] = 1e-2, + lower_lr: Optional[float] = 1e-5, + upper_lr: Optional[float] = 1e-3, tuning_iter: int = 0, tunning_epoch: int = 0, - report_op: str = 'latest', + report_op: str = 'mean', searcher_type: str = 'NevergradSearcher', **searcher_kwargs) -> Dict[str, Union[dict, float]]: hparam_spec = { diff --git a/mmengine/tune/searchers.py b/mmengine/tune/searchers.py index 9e5cf63be9..ce827a0492 100644 --- a/mmengine/tune/searchers.py +++ b/mmengine/tune/searchers.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import hashlib +import json from typing import Dict from mmengine.registry import Registry @@ -94,12 +96,19 @@ def _build_optimizer(self, solver_type: str, num_trials: int): parametrization=converted_hparam_spec, budget=num_trials) return solver + def _hash_dict(self, d: dict) -> str: + serialized_data = json.dumps(d, sort_keys=True).encode() + hashed = hashlib.md5(serialized_data).hexdigest() + return hashed + def suggest(self) -> Dict: hparam = self._optimizer.ask() - self._records[hparam.value] = hparam + hash_key = self._hash_dict(hparam.value) + self._records[hash_key] = hparam return hparam.value def record(self, hparam: Dict, score: float): - assert hparam in self._records, \ + hash_key = self._hash_dict(hparam) + assert hash_key in self._records, \ f'hparam {hparam} is not in the record' - self._optimizer.tell(self._records[hparam], score * self._rule_op) + self._optimizer.tell(self._records[hash_key], score * self._rule_op) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 52c36b88c2..8ba267e716 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import tempfile from typing import Dict, List, Sequence, Tuple, Union from mmengine.config import Config, ConfigDict @@ -46,7 +45,8 @@ def __init__(self, env_cfg = runner_cfg.get('env_cfg', {}) dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) - self._logger = MMLogger.get_instance('Tuner', log_level='INFO') + self._logger = MMLogger.get_instance( + 'Tuner', log_level='INFO', distributed=self._distributed) self._logger.info( f'Tuner initialized with rule: {rule} and monitor: {monitor}') self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) @@ -112,34 +112,34 @@ def _build_searcher(self, def _run_trial(self) -> Tuple[Dict, float]: if is_main_process(): - hparam = self._searcher.suggest() + hparams_to_broadcast = [self._searcher.suggest()] else: - hparam = None - broadcast_object_list([hparam], src=0) + hparams_to_broadcast = [None] + broadcast_object_list(hparams_to_broadcast, src=0) + hparam = hparams_to_broadcast[0] for k, v in hparam.items(): self.inject_config(self._runner_cfg, k, v) - temp_dir = tempfile.TemporaryDirectory() - self._runner_cfg['work_dir'] = temp_dir.name runner = Runner.from_cfg(self._runner_cfg) report_hook = ReportingHook(self._monitor, self._rule, self._tuning_iter, self._tuning_epoch, self._reporting_op) runner.register_hook(report_hook, priority='VERY_LOW') - score: float + default_score = float('inf') if self._rule == 'less' else -float('inf') try: runner.train() - score = report_hook.report_score() + scores_to_broadcast = [report_hook.report_score()] except Exception: - score = float('inf') if self._rule == 'less' else -float('inf') - broadcast_object_list([score], src=0) - temp_dir.cleanup() + scores_to_broadcast = [default_score] + broadcast_object_list(scores_to_broadcast, src=0) + score = scores_to_broadcast[0] + if is_main_process(): + self._searcher.record(hparam, score) return hparam, score def tune(self) -> Dict[str, Union[dict, float]]: self._logger.info(f'Starting tuning for {self._num_trials} trials...') for trail_idx in range(self._num_trials): hparam, score = self._run_trial() - self._searcher.record(hparam, score) self._history.append((hparam, score)) self._logger.info( f'Trial [{trail_idx + 1}/{self._num_trials}] finished.' + From 18fd768eb5934a5fe988ccbe65df48ab2f85c266 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 23 Aug 2023 16:57:30 +0900 Subject: [PATCH 16/41] Fix minor --- mmengine/tune/_report_hook.py | 67 ++++++++++++++--------------------- mmengine/tune/searchers.py | 2 +- mmengine/tune/tuner.py | 13 ++++--- 3 files changed, 35 insertions(+), 47 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index 48609fe1ff..f9fd48955c 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -import math + from typing import Dict, List, Optional, Sequence, Union from mmengine.hooks import Hook @@ -9,28 +9,21 @@ class ReportingHook(Hook): - max_scoreboard_len = 1024 - rules_supported = ['greater', 'less'] - def __init__(self, monitor: str, - rule: str, - tuning_iter: int = 0, - tunning_epoch: int = 0, - report_op: str = 'latest'): - assert rule in self.rules_supported, f'rule {rule} is not supported' - self.rule = rule - assert (tuning_iter == 0 and tunning_epoch > 0) or ( - tunning_epoch == 0 and tuning_iter > 0 - ), 'tuning_iter and tuning_epoch should be set only one' + tuning_iter: Optional[int] = None, + tuning_epoch: Optional[int] = None, + report_op: str = 'latest', + max_scoreboard_len: int = 1024): + self.has_limit = tuning_iter is not None or tuning_epoch is not None assert report_op in ['latest', 'mean'], f'report_op {report_op} is not supported' self.report_op = report_op self.tuning_iter = tuning_iter - self.tuning_epoch = tunning_epoch - self.enabled_by_epoch = self.tuning_epoch != 0 + self.tuning_epoch = tuning_epoch self.monitor = monitor + self.max_scoreboard_len = max_scoreboard_len self.scoreboard: List[float] = [] def _append_score(self, score): @@ -38,6 +31,14 @@ def _append_score(self, score): if len(self.scoreboard) > self.max_scoreboard_len: self.scoreboard.pop(0) + def _mark_stop(self, runner): + if self.tuning_iter is not None: + if runner.iter > self.tuning_iter: + runner.train_loop.stop_training = True + if self.tuning_epoch is not None: + if runner.epoch > self.tuning_epoch: + runner.train_loop.stop_training = True + def after_train_iter(self, runner, batch_idx: int, @@ -50,16 +51,13 @@ def after_train_iter(self, score = tag.get(self.monitor, None) if score is not None: self._append_score(score) - if self.enabled_by_epoch: - return - if runner.iter + 1 == self.tuning_iter: - runner.train_loop.stop_training = True + + if self.has_limit: + self._mark_stop(runner) def after_train_epoch(self, runner) -> None: - if not self.enabled_by_epoch: - return - if runner.epoch + 1 == self.tuning_epoch: - runner.train_loop.stop_training = True + if self.has_limit: + self._mark_stop(runner) def after_val_epoch(self, runner, @@ -70,26 +68,13 @@ def after_val_epoch(self, if score is not None: self._append_score(score) - def report_score(self): - - if self.report_op == 'latest': + def report_score(self) -> Optional[float]: + if not self.scoreboard: + score = None + elif self.report_op == 'latest': score = self.scoreboard[-1] - if math.isnan(score) or math.isinf(score): - if self.rule == 'greater': - score = float('-inf') - else: - score = float('inf') - - elif self.report_op == 'mean': - if any(math.isnan(s) or math.isinf(s) for s in self.scoreboard): - if self.rule == 'greater': - score = float('-inf') - else: - score = float('inf') - else: - score = sum(self.scoreboard) / len(self.scoreboard) else: - raise NotImplementedError + score = sum(self.scoreboard) / len(self.scoreboard) return score def clear_scoreboard(self): diff --git a/mmengine/tune/searchers.py b/mmengine/tune/searchers.py index ce827a0492..a7e3d2d7c9 100644 --- a/mmengine/tune/searchers.py +++ b/mmengine/tune/searchers.py @@ -8,7 +8,7 @@ try: import nevergrad as ng except ImportError: - ng = None + ng = None # type: ignore HYPER_SEARCHERS = Registry('hyper parameter searcher') diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 8ba267e716..cc9197ee2c 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import math from typing import Dict, List, Sequence, Tuple, Union from mmengine.config import Config, ConfigDict @@ -114,20 +115,22 @@ def _run_trial(self) -> Tuple[Dict, float]: if is_main_process(): hparams_to_broadcast = [self._searcher.suggest()] else: - hparams_to_broadcast = [None] + hparams_to_broadcast = [None] # type: ignore broadcast_object_list(hparams_to_broadcast, src=0) hparam = hparams_to_broadcast[0] for k, v in hparam.items(): self.inject_config(self._runner_cfg, k, v) runner = Runner.from_cfg(self._runner_cfg) - report_hook = ReportingHook(self._monitor, self._rule, - self._tuning_iter, self._tuning_epoch, - self._reporting_op) + report_hook = ReportingHook(self._monitor, self._tuning_iter, + self._tuning_epoch, self._reporting_op) runner.register_hook(report_hook, priority='VERY_LOW') default_score = float('inf') if self._rule == 'less' else -float('inf') try: runner.train() - scores_to_broadcast = [report_hook.report_score()] + score = report_hook.report_score() + if score is None or math.isnan(score) or math.isinf(score): + score = default_score + scores_to_broadcast = [score] except Exception: scores_to_broadcast = [default_score] broadcast_object_list(scores_to_broadcast, src=0) From 71b4b2a97ebffb2e44a06669a3de6eef24f9a373 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 23 Aug 2023 17:38:51 +0900 Subject: [PATCH 17/41] Fix typo --- examples/tune/find_lr.py | 12 ++++++------ mmengine/tune/tuner.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index e742f82cb3..83869ee8a5 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -43,8 +43,7 @@ def forward(self, inputs, data_samples=None, mode='tensor'): class ToyDataset(Dataset): METAINFO = dict() # type: ignore num_samples = 1000 - data = torch.rand(num_samples, 2) * 10 # Random numbers between 0 and 10 - # Let's assume y = 3*x1 + 4*x2 + noise + data = torch.rand(num_samples, 2) * 10 label = 3 * data[:, 0] + 4 * data[:, 1] + torch.randn(num_samples) * 0.1 @property @@ -94,8 +93,8 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], lower_lr: Optional[float] = 1e-5, upper_lr: Optional[float] = 1e-3, tuning_iter: int = 0, - tunning_epoch: int = 0, - report_op: str = 'mean', + tuning_epoch: int = 0, + report_op: str = 'latest', searcher_type: str = 'NevergradSearcher', **searcher_kwargs) -> Dict[str, Union[dict, float]]: hparam_spec = { @@ -113,7 +112,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], rule=rule, num_trials=num_trials, tuning_iter=tuning_iter, - tunning_epoch=tunning_epoch, + tuning_epoch=tuning_epoch, report_op=report_op, searcher_type=searcher_type, **searcher_kwargs) @@ -162,10 +161,11 @@ def main(): result = find_optimial_lr( runner_cfg=runner_cfg, num_trials=32, - tunning_epoch=3, + tuning_epoch=3, ) print('best_lr: ', result.get('hparam')) print('lowest_loss: ', result.get('score')) + temp_dir.cleanup() if __name__ == '__main__': diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index cc9197ee2c..53b637a67e 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -21,7 +21,7 @@ def __init__(self, rule: str, num_trials: int, tuning_iter: int = 0, - tunning_epoch: int = 0, + tuning_epoch: int = 0, report_op: str = 'latest', searcher_type: str = 'NevergradSearcher', **searcher_kwargs): @@ -36,7 +36,7 @@ def __init__(self, self._num_trials = num_trials self._tuning_iter = tuning_iter - self._tuning_epoch = tunning_epoch + self._tuning_epoch = tuning_epoch self._reporting_op = report_op self._history: List[Tuple[Dict, float]] = [] From fecfacb0c0323cab6716e605d5bd01a9f6802197 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 11:21:01 +0900 Subject: [PATCH 18/41] Split seachers --- examples/tune/find_lr.py | 26 ++++---- mmengine/tune/searchers/__init__.py | 5 ++ .../{searchers.py => searchers/nevergrad.py} | 61 +---------------- mmengine/tune/searchers/searcher.py | 66 +++++++++++++++++++ mmengine/tune/tuner.py | 27 ++++---- 5 files changed, 99 insertions(+), 86 deletions(-) create mode 100644 mmengine/tune/searchers/__init__.py rename mmengine/tune/{searchers.py => searchers/nevergrad.py} (50%) create mode 100644 mmengine/tune/searchers/searcher.py diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 83869ee8a5..3386d017a9 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -86,17 +86,18 @@ def parse_args(): return args -def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], - monitor: str = 'loss', - rule: str = 'less', - num_trials: int = 32, - lower_lr: Optional[float] = 1e-5, - upper_lr: Optional[float] = 1e-3, - tuning_iter: int = 0, - tuning_epoch: int = 0, - report_op: str = 'latest', - searcher_type: str = 'NevergradSearcher', - **searcher_kwargs) -> Dict[str, Union[dict, float]]: +def find_optimial_lr( + runner_cfg: Union[Dict, Config, ConfigDict], + monitor: str = 'loss', + rule: str = 'less', + num_trials: int = 32, + lower_lr: Optional[float] = 1e-5, + upper_lr: Optional[float] = 1e-3, + tuning_iter: int = 0, + tuning_epoch: int = 0, + report_op: str = 'latest', + searcher_cfg: Dict = dict(type='NevergradSearcher'), +) -> Dict[str, Union[dict, float]]: hparam_spec = { 'optim_wrapper.optimizer.lr': { 'type': 'continuous', @@ -114,8 +115,7 @@ def find_optimial_lr(runner_cfg: Union[Dict, Config, ConfigDict], tuning_iter=tuning_iter, tuning_epoch=tuning_epoch, report_op=report_op, - searcher_type=searcher_type, - **searcher_kwargs) + searcher_cfg=searcher_cfg) return tuner.tune() diff --git a/mmengine/tune/searchers/__init__.py b/mmengine/tune/searchers/__init__.py new file mode 100644 index 0000000000..0fd0dd024b --- /dev/null +++ b/mmengine/tune/searchers/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .searcher import Searcher, HYPER_SEARCHERS +from .nevergrad import NevergradSearcher + +__all__ = ['Searcher', 'HYPER_SEARCHERS', 'NevergradSearcher'] \ No newline at end of file diff --git a/mmengine/tune/searchers.py b/mmengine/tune/searchers/nevergrad.py similarity index 50% rename from mmengine/tune/searchers.py rename to mmengine/tune/searchers/nevergrad.py index a7e3d2d7c9..1d35761a34 100644 --- a/mmengine/tune/searchers.py +++ b/mmengine/tune/searchers/nevergrad.py @@ -2,71 +2,16 @@ import hashlib import json from typing import Dict - -from mmengine.registry import Registry +from .searcher import Searcher, HYPER_SEARCHERS try: import nevergrad as ng except ImportError: ng = None # type: ignore -HYPER_SEARCHERS = Registry('hyper parameter searcher') - - -class _Searcher: - - rules_supported = ['greater', 'less'] - - def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): - assert rule in self.rules_supported, \ - f"rule must be 'less' or 'greater', but got {rule}" - self._rule = rule - self._validate_hparam_spec(hparam_spec) - self._hparam_spec = hparam_spec - - def _validate_hparam_spec(self, hparam_spec): - for _, v in hparam_spec.items(): - assert v.get('type', None) in [ - 'discrete', 'continuous' - ], \ - 'hparam_spec must have a key "type" and ' \ - f'its value must be "discrete" or "continuous", but got {v}' - if v['type'] == 'discrete': - assert 'values' in v, \ - 'if hparam_spec["type"] is "discrete", ' +\ - f'hparam_spec must have a key "values", but got {v}' - else: - assert 'lower' in v and 'upper' in v, \ - 'if hparam_spec["type"] is "continuous", ' +\ - 'hparam_spec must have keys "lower" and "upper", ' +\ - f'but got {v}' - - @property - def hparam_spec(self) -> Dict[str, Dict]: - return self._hparam_spec - - @property - def rule(self) -> str: - return self._rule - - def record(self, hparam: Dict, score: float): - """Record hparam and score to solver. - - Args: - hparam (Dict): The hparam to be updated - score (float): The score to be updated - """ - - def suggest(self) -> Dict: - """Suggest a new hparam based on solver's strategy. - - Returns: - Dict: suggested hparam - """ - @HYPER_SEARCHERS.register_module() -class NevergradSearcher(_Searcher): +class NevergradSearcher(Searcher): def __init__(self, rule: str, @@ -111,4 +56,4 @@ def record(self, hparam: Dict, score: float): hash_key = self._hash_dict(hparam) assert hash_key in self._records, \ f'hparam {hparam} is not in the record' - self._optimizer.tell(self._records[hash_key], score * self._rule_op) + self._optimizer.tell(self._records[hash_key], score * self._rule_op) \ No newline at end of file diff --git a/mmengine/tune/searchers/searcher.py b/mmengine/tune/searchers/searcher.py new file mode 100644 index 0000000000..9b66a37b44 --- /dev/null +++ b/mmengine/tune/searchers/searcher.py @@ -0,0 +1,66 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from typing import Dict + +from mmengine.registry import Registry + +HYPER_SEARCHERS = Registry('hyper parameter searcher') + + +class Searcher: + """Base hyper parameter searcher class. + + All hyper parameter searchers inherit from this class. + """ + + rules_supported = ['greater', 'less'] + + def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): + assert rule in self.rules_supported, \ + f"rule must be 'less' or 'greater', but got {rule}" + self._rule = rule + self._validate_hparam_spec(hparam_spec) + self._hparam_spec = hparam_spec + + def _validate_hparam_spec(self, hparam_spec): + for _, v in hparam_spec.items(): + assert v.get('type', None) in [ + 'discrete', 'continuous' + ], \ + 'hparam_spec must have a key "type" and ' \ + f'its value must be "discrete" or "continuous", but got {v}' + if v['type'] == 'discrete': + assert 'values' in v, \ + 'if hparam_spec["type"] is "discrete", ' +\ + f'hparam_spec must have a key "values", but got {v}' + else: + assert 'lower' in v and 'upper' in v, \ + 'if hparam_spec["type"] is "continuous", ' +\ + 'hparam_spec must have keys "lower" and "upper", ' +\ + f'but got {v}' + + @property + def hparam_spec(self) -> Dict[str, Dict]: + return self._hparam_spec + + @property + def rule(self) -> str: + return self._rule + + def record(self, hparam: Dict, score: float): + """Record hparam and score to solver. + + Args: + hparam (Dict): The hparam to be updated + score (float): The score to be updated + """ + + def suggest(self) -> Dict: + """Suggest a new hparam based on solver's strategy. + + Returns: + Dict: suggested hparam + """ + + + diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 53b637a67e..579da38638 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -8,7 +8,7 @@ from mmengine.logging import MMLogger from mmengine.runner import Runner from ._report_hook import ReportingHook -from .searchers import HYPER_SEARCHERS, _Searcher +from .searchers import HYPER_SEARCHERS, Searcher class Tuner: @@ -23,8 +23,7 @@ def __init__(self, tuning_iter: int = 0, tuning_epoch: int = 0, report_op: str = 'latest', - searcher_type: str = 'NevergradSearcher', - **searcher_kwargs): + searcher_cfg: Dict = dict(type='NevergradSearcher')): self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec @@ -50,7 +49,7 @@ def __init__(self, 'Tuner', log_level='INFO', distributed=self._distributed) self._logger.info( f'Tuner initialized with rule: {rule} and monitor: {monitor}') - self._searcher = self._build_searcher(searcher_type, **searcher_kwargs) + self._searcher = self._build_searcher(searcher_cfg) @property def hparam_spec(self) -> Dict[str, Dict]: @@ -99,17 +98,15 @@ def inject_config(cfg, key, value): cfg[key[-1]] = value return - def _build_searcher(self, - searcher_type: str = 'nevergrad', - **kwargs) -> _Searcher: - self._logger.info(f'Building searcher of type: {searcher_type}') - build_config = dict( - type=searcher_type, - rule=self.rule, - hparam_spec=self.hparam_spec, - num_trials=self._num_trials) - build_config.update(kwargs) - return HYPER_SEARCHERS.build(build_config) + def _build_searcher(self, searcher_cfg: Dict) -> Searcher: + searcher_cfg = searcher_cfg.copy() + self._logger.info(f'Building searcher of type: {searcher_cfg["type"]}') + searcher_cfg.update( + dict( + rule=self.rule, + hparam_spec=self.hparam_spec, + num_trials=self._num_trials)) + return HYPER_SEARCHERS.build(searcher_cfg) def _run_trial(self) -> Tuple[Dict, float]: if is_main_process(): From 010a3f159e0efea0173a912799e4524a8d9eea6a Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:16:45 +0900 Subject: [PATCH 19/41] Comment the tuner --- mmengine/tune/searchers/__init__.py | 4 +- mmengine/tune/searchers/nevergrad.py | 5 +- mmengine/tune/searchers/searcher.py | 5 +- mmengine/tune/tuner.py | 152 ++++++++++++++++++++++++--- 4 files changed, 146 insertions(+), 20 deletions(-) diff --git a/mmengine/tune/searchers/__init__.py b/mmengine/tune/searchers/__init__.py index 0fd0dd024b..654dabd43c 100644 --- a/mmengine/tune/searchers/__init__.py +++ b/mmengine/tune/searchers/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .searcher import Searcher, HYPER_SEARCHERS from .nevergrad import NevergradSearcher +from .searcher import HYPER_SEARCHERS, Searcher -__all__ = ['Searcher', 'HYPER_SEARCHERS', 'NevergradSearcher'] \ No newline at end of file +__all__ = ['Searcher', 'HYPER_SEARCHERS', 'NevergradSearcher'] diff --git a/mmengine/tune/searchers/nevergrad.py b/mmengine/tune/searchers/nevergrad.py index 1d35761a34..023dc0988e 100644 --- a/mmengine/tune/searchers/nevergrad.py +++ b/mmengine/tune/searchers/nevergrad.py @@ -2,7 +2,8 @@ import hashlib import json from typing import Dict -from .searcher import Searcher, HYPER_SEARCHERS + +from .searcher import HYPER_SEARCHERS, Searcher try: import nevergrad as ng @@ -56,4 +57,4 @@ def record(self, hparam: Dict, score: float): hash_key = self._hash_dict(hparam) assert hash_key in self._records, \ f'hparam {hparam} is not in the record' - self._optimizer.tell(self._records[hash_key], score * self._rule_op) \ No newline at end of file + self._optimizer.tell(self._records[hash_key], score * self._rule_op) diff --git a/mmengine/tune/searchers/searcher.py b/mmengine/tune/searchers/searcher.py index 9b66a37b44..2cd5a3f5b3 100644 --- a/mmengine/tune/searchers/searcher.py +++ b/mmengine/tune/searchers/searcher.py @@ -9,7 +9,7 @@ class Searcher: """Base hyper parameter searcher class. - + All hyper parameter searchers inherit from this class. """ @@ -61,6 +61,3 @@ def suggest(self) -> Dict: Returns: Dict: suggested hparam """ - - - diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 579da38638..007762e967 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import math -from typing import Dict, List, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from mmengine.config import Config, ConfigDict from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, @@ -12,6 +12,71 @@ class Tuner: + """A helper for hyperparameter tuning. + + By specifying a hyperparameter search space and a monitor metric, + this `Tuner` will automatically identify the optimal hyperparameters + for the runner. + + The `Tuner` employs black-box optimization techniques, ensuring + a systematic search for the best hyperparameters within a defined space. + The tuning process iteratively: + + 1. Searches for hyperparameters considering + the outcomes of previous trials. + 2. Constructs and trains the runner using the given hyperparameters. + 3. Assesses the performance of the trained runner's model + and logs it in the searcher. + + Upon the conclusion of all trials, the hyperparameters that yielded + the peak performance are returned. + + Args: + runner_cfg (Union[Dict, Config, ConfigDict]): + Configuration for the runner. + hparam_spec (Dict[str, Dict]): + The hyperparameter search space definition. + monitor (str): The metric to be monitored during the tuning process. + rule (str): The criterion used to determine the best hyperparameters. + Only 'greater' and 'less' are currently supported. + num_trials (int): Total number of trials to execute. + tuning_iter (Optional[int]): The maximum iterations for each trial. + If specified, tuning stops after reaching this limit. + Default is None, indicating no specific iteration limit. + tuning_epoch (Optional[int]): The maximum epochs for each trial. + If specified, tuning stops after reaching this number of epochs. + Default is None, indicating no epoch limit. + report_op (str): + Operation mode for metric reporting. Default is 'latest'. + searcher_cfg (Dict): Configuration for the searcher. + Default is `dict(type='NevergradSearcher')`. + + Note: + The black-box optimization depends on external packages, + such as `nevergrad`. Ensure the necessary packages are installed + before using. + + Example: + >>> from mmengine.tune import Tuner + >>> runner_config = {"...": "..."} + >>> hparam_spec = { + >>> 'optim_wrapper.optimizer.lr': { + >>> 'type': 'continuous', + >>> 'lower': 1e-5, + >>> 'upper': 1e-3 + >>> } + >>> } + >>> tuner = Tuner( + >>> runner_cfg, + >>> hparam_spec=hparam_spec, + >>> monitor='loss', + >>> rule='less', + >>> num_trials=32, + >>> ) + >>> result = tuner.tune() + >>> print(result['hparam']) + >>> print(result['score']) + """ rules_supported = ['greater', 'less'] def __init__(self, @@ -20,8 +85,8 @@ def __init__(self, monitor: str, rule: str, num_trials: int, - tuning_iter: int = 0, - tuning_epoch: int = 0, + tuning_iter: Optional[int] = None, + tuning_epoch: Optional[int] = None, report_op: str = 'latest', searcher_cfg: Dict = dict(type='NevergradSearcher')): @@ -39,66 +104,108 @@ def __init__(self, self._reporting_op = report_op self._history: List[Tuple[Dict, float]] = [] + # Initialize distributed environment if necessary + # This adjustment ensures consistent hyperparameter searching and + # performance recording across all processes. launcher = self._runner_cfg.get('launcher', 'none') self._distributed = launcher != 'none' if self._distributed and not is_distributed(): env_cfg = runner_cfg.get('env_cfg', {}) dist_cfg = env_cfg.get('dist_cfg', {}) init_dist(launcher, **dist_cfg) + + # Build logger to record tuning process self._logger = MMLogger.get_instance( 'Tuner', log_level='INFO', distributed=self._distributed) self._logger.info( f'Tuner initialized with rule: {rule} and monitor: {monitor}') + + # Build searcher to search for optimal hyperparameters self._searcher = self._build_searcher(searcher_cfg) @property def hparam_spec(self) -> Dict[str, Dict]: + """str: The hyperparameter search space definition.""" return self._hparam_spec @property def monitor(self) -> str: + """str: The metric to be monitored during the tuning process.""" return self._monitor @property def rule(self) -> str: + """str: The criterion used to determine the best hyperparameters.""" return self._rule @property def num_trials(self) -> int: + """int: Total number of trials to execute.""" return self._num_trials @property - def tuning_iter(self) -> int: + def tuning_iter(self) -> Optional[int]: + """Optional[int]: The maximum iterations for each trial. + If specified, tuning + """ return self._tuning_iter @property - def tuning_epoch(self) -> int: + def tuning_epoch(self) -> Optional[int]: + """Optional[int]: The maximum epochs for each trial. + If specified, tuning + """ return self._tuning_epoch @property def reporting_op(self) -> str: + """str: Operation mode for metric reporting. Default is 'latest'.""" return self._reporting_op @property def history(self) -> List[Tuple[Dict, float]]: + """List[Tuple[Dict, float]]: The history of hyperparameters and + scores.""" return self._history @staticmethod - def inject_config(cfg, key, value): - key = key.split('.') + def inject_config(cfg: Dict, key: str, value: Any): + """Inject a value into a config. + + The name can be multi-level, like 'optimizer.lr'. + + Args: + cfg (Dict): The config to be injected. + key (str): The key of the value to be injected. + value (Any): The value to be injected. + """ + splitted_key = key.split('.') suffix = '' - for item in key[:-1]: + for item in splitted_key[:-1]: if isinstance(cfg, Sequence) and not isinstance(cfg, str): cfg = cfg[int(item)] else: assert item in cfg, f'key {item} is not in {cfg}' cfg = cfg[item] suffix += f'{item}.' - assert key[-1] in cfg, f'attribute {key[-1]} is not in cfg{suffix}' - cfg[key[-1]] = value + assert splitted_key[ + -1] in cfg, f'attribute {splitted_key[-1]} is not in cfg{suffix}' + cfg[splitted_key[-1]] = value return def _build_searcher(self, searcher_cfg: Dict) -> Searcher: + """Build searcher from searcher_cfg. + + An Example of ``searcher_cfg``:: + + searcher_cfg = dict( + type='NevergradSearcher', + solver_type='CMA' + ) + + Args: + searcher_cfg (Dict): The searcher config. + """ searcher_cfg = searcher_cfg.copy() self._logger.info(f'Building searcher of type: {searcher_cfg["type"]}') searcher_cfg.update( @@ -109,12 +216,20 @@ def _build_searcher(self, searcher_cfg: Dict) -> Searcher: return HYPER_SEARCHERS.build(searcher_cfg) def _run_trial(self) -> Tuple[Dict, float]: + """Retrieve hyperparameters from searcher and run a trial.""" + + # Retrieve hyperparameters from searcher. + # The hyperparameters are broadcasted to all processes. + # To avoid integrity, the searching process is only executed + # on the main process. if is_main_process(): hparams_to_broadcast = [self._searcher.suggest()] else: hparams_to_broadcast = [None] # type: ignore broadcast_object_list(hparams_to_broadcast, src=0) hparam = hparams_to_broadcast[0] + + # Inject hyperparameters into runner config. for k, v in hparam.items(): self.inject_config(self._runner_cfg, k, v) runner = Runner.from_cfg(self._runner_cfg) @@ -122,6 +237,10 @@ def _run_trial(self) -> Tuple[Dict, float]: self._tuning_epoch, self._reporting_op) runner.register_hook(report_hook, priority='VERY_LOW') default_score = float('inf') if self._rule == 'less' else -float('inf') + + # Run a trial. + # If an exception occurs during the trial, the score is set + # to default_score. try: runner.train() score = report_hook.report_score() @@ -130,6 +249,8 @@ def _run_trial(self) -> Tuple[Dict, float]: scores_to_broadcast = [score] except Exception: scores_to_broadcast = [default_score] + + # Store the score between processes. broadcast_object_list(scores_to_broadcast, src=0) score = scores_to_broadcast[0] if is_main_process(): @@ -137,6 +258,12 @@ def _run_trial(self) -> Tuple[Dict, float]: return hparam, score def tune(self) -> Dict[str, Union[dict, float]]: + """Launch tuning. + + Returns: + Dict[str, Union[dict, float]]: + The best hyperparameters and the corresponding score. + """ self._logger.info(f'Starting tuning for {self._num_trials} trials...') for trail_idx in range(self._num_trials): hparam, score = self._run_trial() @@ -157,5 +284,6 @@ def tune(self) -> Dict[str, Union[dict, float]]: self._logger.info('Tuning completed.') return dict(hparam=best_hparam, score=best_score) - def clean_up(self): - self._history = [] + def clear(self): + """Clear the history of hyperparameters and scores.""" + self._history.clear() From 69e62a7e83a4e6138fab5651a33414f12fb80d2c Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:18:07 +0900 Subject: [PATCH 20/41] Rename solver of nevergrad --- mmengine/tune/searchers/nevergrad.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mmengine/tune/searchers/nevergrad.py b/mmengine/tune/searchers/nevergrad.py index 023dc0988e..b5ccfa746c 100644 --- a/mmengine/tune/searchers/nevergrad.py +++ b/mmengine/tune/searchers/nevergrad.py @@ -23,7 +23,7 @@ def __init__(self, **kwargs): super().__init__(rule, hparam_spec) assert ng is not None, 'nevergrad is not installed' - self._optimizer = self._build_optimizer(solver_type, num_trials) + self._solver = self._build_solver(solver_type, num_trials) self._records = dict() # type: ignore if self.rule == 'less': @@ -31,14 +31,14 @@ def __init__(self, else: self._rule_op = -1.0 - def _build_optimizer(self, solver_type: str, num_trials: int): + def _build_solver(self, solver_type: str, num_trials: int): converted_hparam_spec = ng.p.Dict( **{ k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) if v['type'] == 'continuous' else ng.p.Choice(v['values']) for k, v in self.hparam_spec.items() }) - solver = ng.optimization.optimizerlib.registry[solver_type]( + solver = ng.optimization.solverlib.registry[solver_type]( parametrization=converted_hparam_spec, budget=num_trials) return solver @@ -48,7 +48,7 @@ def _hash_dict(self, d: dict) -> str: return hashed def suggest(self) -> Dict: - hparam = self._optimizer.ask() + hparam = self._solver.ask() hash_key = self._hash_dict(hparam.value) self._records[hash_key] = hparam return hparam.value @@ -57,4 +57,4 @@ def record(self, hparam: Dict, score: float): hash_key = self._hash_dict(hparam) assert hash_key in self._records, \ f'hparam {hparam} is not in the record' - self._optimizer.tell(self._records[hash_key], score * self._rule_op) + self._solver.tell(self._records[hash_key], score * self._rule_op) From 23d1f9767cb3833b881d1858a4ec7cc160e81297 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 16:18:10 +0900 Subject: [PATCH 21/41] Comment the report hook --- mmengine/tune/_report_hook.py | 82 +++++++++++++++++++++++----- mmengine/tune/searchers/nevergrad.py | 2 +- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index f9fd48955c..eff4ad4565 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -8,6 +8,23 @@ class ReportingHook(Hook): + """Auxiliary hook to report the score to tuner. + + If tuning limit is specified, this hook will mark the loop to stop. + + Args: + monitor (str): The monitored metric key to report. + tuning_iter (int, optional): The iteration limit to stop tuning. + Defaults to None. + tuning_epoch (int, optional): The epoch limit to stop tuning. + Defaults to None. + report_op (str, optional): The operation to report the score. + Options are 'latest', 'mean'. Defaults to 'latest'. + max_scoreboard_len (int, optional): + The maximum length of the scoreboard. + """ + + report_op_supported = ['latest', 'mean'] def __init__(self, monitor: str, @@ -15,9 +32,11 @@ def __init__(self, tuning_epoch: Optional[int] = None, report_op: str = 'latest', max_scoreboard_len: int = 1024): - self.has_limit = tuning_iter is not None or tuning_epoch is not None - assert report_op in ['latest', - 'mean'], f'report_op {report_op} is not supported' + assert report_op in self.report_op_supported, \ + f'report_op {report_op} is not supported' + if tuning_iter is not None and tuning_epoch is not None: + raise ValueError( + 'tuning_iter and tuning_epoch cannot be set at the same time') self.report_op = report_op self.tuning_iter = tuning_iter self.tuning_epoch = tuning_epoch @@ -26,18 +45,26 @@ def __init__(self, self.max_scoreboard_len = max_scoreboard_len self.scoreboard: List[float] = [] - def _append_score(self, score): + def _append_score(self, score: float): + """Append the score to the scoreboard.""" self.scoreboard.append(score) if len(self.scoreboard) > self.max_scoreboard_len: self.scoreboard.pop(0) - def _mark_stop(self, runner): + def _should_stop(self, runner): + """Check if the training should be stopped. + + Args: + runner (Runner): The runner of the training process. + """ if self.tuning_iter is not None: if runner.iter > self.tuning_iter: - runner.train_loop.stop_training = True - if self.tuning_epoch is not None: + return True + elif self.tuning_epoch is not None: if runner.epoch > self.tuning_epoch: - runner.train_loop.stop_training = True + return True + else: + return False def after_train_iter(self, runner, @@ -45,6 +72,14 @@ def after_train_iter(self, data_batch: DATA_BATCH = None, outputs: Optional[Union[dict, Sequence]] = None, mode: str = 'train') -> None: + """Record the score after each iteration. + + Args: + runner (Runner): The runner of the training process. + batch_idx (int): The index of the current batch in the train loop. + data_batch (dict or tuple or list, optional): Data from dataloader. + outputs (dict, optional): Outputs from model. + """ tag, _ = runner.log_processor.get_log_after_iter( runner, batch_idx, 'train') @@ -52,16 +87,29 @@ def after_train_iter(self, if score is not None: self._append_score(score) - if self.has_limit: - self._mark_stop(runner) + if self._should_stop(runner): + runner.train_loop.stop_training = True def after_train_epoch(self, runner) -> None: - if self.has_limit: - self._mark_stop(runner) + """Record the score after each epoch. + + Args: + runner (Runner): The runner of the training process. + """ + if self._should_stop(runner): + runner.train_loop.stop_training = True def after_val_epoch(self, runner, metrics: Optional[Dict[str, float]] = None) -> None: + """Record the score after each validation epoch. + + Args: + runner (Runner): The runner of the validation process. + metrics (Dict[str, float], optional): Evaluation results of all + metrics on validation dataset. The keys are the names of the + metrics, and the values are corresponding results. + """ if metrics is None: return score = metrics.get(self.monitor, None) @@ -69,6 +117,11 @@ def after_val_epoch(self, self._append_score(score) def report_score(self) -> Optional[float]: + """Aggregate the scores in the scoreboard. + + Returns: + Optional[float]: The aggregated score. + """ if not self.scoreboard: score = None elif self.report_op == 'latest': @@ -77,5 +130,6 @@ def report_score(self) -> Optional[float]: score = sum(self.scoreboard) / len(self.scoreboard) return score - def clear_scoreboard(self): - self.scoreboard = [] + def clear(self): + """Clear the scoreboard.""" + self.scoreboard.clear() diff --git a/mmengine/tune/searchers/nevergrad.py b/mmengine/tune/searchers/nevergrad.py index b5ccfa746c..fb6ecf38a8 100644 --- a/mmengine/tune/searchers/nevergrad.py +++ b/mmengine/tune/searchers/nevergrad.py @@ -38,7 +38,7 @@ def _build_solver(self, solver_type: str, num_trials: int): if v['type'] == 'continuous' else ng.p.Choice(v['values']) for k, v in self.hparam_spec.items() }) - solver = ng.optimization.solverlib.registry[solver_type]( + solver = ng.optimization.optimizerlib.registry[solver_type]( parametrization=converted_hparam_spec, budget=num_trials) return solver From 482a9e5f0ec6d24a67380d55491455dc49719b86 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 17:04:22 +0900 Subject: [PATCH 22/41] Comment the searchers --- mmengine/tune/searchers/nevergrad.py | 50 ++++++++++++++++++++++++---- mmengine/tune/searchers/searcher.py | 28 +++++++++++++++- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/mmengine/tune/searchers/nevergrad.py b/mmengine/tune/searchers/nevergrad.py index fb6ecf38a8..bf13140599 100644 --- a/mmengine/tune/searchers/nevergrad.py +++ b/mmengine/tune/searchers/nevergrad.py @@ -13,6 +13,19 @@ @HYPER_SEARCHERS.register_module() class NevergradSearcher(Searcher): + """Support hyper parameter searchering with nevergrad. + + Note: + The detailed usage of nevergrad can be found at + https://facebookresearch.github.io/nevergrad/. + + Args: + rule (str): The rule to compare the score. + Options are 'greater', 'less'. + hparam_spec (Dict[str, Dict]): The hyper parameter specification. + num_trials (int): The number of trials. + solver_type (str): The type of solver. + """ def __init__(self, rule: str, @@ -32,6 +45,12 @@ def __init__(self, self._rule_op = -1.0 def _build_solver(self, solver_type: str, num_trials: int): + """Build the solver of nevergrad. + + Args: + solver_type (str): The type of solver. + num_trials (int): The number of trials. + """ converted_hparam_spec = ng.p.Dict( **{ k: ng.p.Scalar(lower=v['lower'], upper=v['upper']) @@ -43,18 +62,37 @@ def _build_solver(self, solver_type: str, num_trials: int): return solver def _hash_dict(self, d: dict) -> str: + """Hash the dict. + + Args: + d (dict): The dict to be hashed. + + Returns: + str: The hashed string. + """ serialized_data = json.dumps(d, sort_keys=True).encode() hashed = hashlib.md5(serialized_data).hexdigest() return hashed - def suggest(self) -> Dict: - hparam = self._solver.ask() - hash_key = self._hash_dict(hparam.value) - self._records[hash_key] = hparam - return hparam.value - def record(self, hparam: Dict, score: float): + """Record hparam and score to solver. + + Args: + hparam (Dict): The hparam to be updated + score (float): The score to be updated + """ hash_key = self._hash_dict(hparam) assert hash_key in self._records, \ f'hparam {hparam} is not in the record' self._solver.tell(self._records[hash_key], score * self._rule_op) + + def suggest(self) -> Dict: + """Suggest a new hparam based on solver's strategy. + + Returns: + Dict: suggested hparam + """ + hparam = self._solver.ask() + hash_key = self._hash_dict(hparam.value) + self._records[hash_key] = hparam + return hparam.value diff --git a/mmengine/tune/searchers/searcher.py b/mmengine/tune/searchers/searcher.py index 2cd5a3f5b3..5aa4ef90f4 100644 --- a/mmengine/tune/searchers/searcher.py +++ b/mmengine/tune/searchers/searcher.py @@ -22,7 +22,31 @@ def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): self._validate_hparam_spec(hparam_spec) self._hparam_spec = hparam_spec - def _validate_hparam_spec(self, hparam_spec): + def _validate_hparam_spec(self, hparam_spec: Dict[str, Dict]): + """Validate hparam_spec. + + An example of hparam_spec: + + 1. discrete: + hparam_spec = { + 'lr': { + 'type': 'discrete', + 'values': [0.01, 0.02, 0.03] + } + } + + 2. continuous: + hparam_spec = { + 'lr': { + 'type': 'continuous', + 'lower': 0.01, + 'upper': 0.1 + } + } + + Args: + hparam_spec (Dict[str, Dict]): The hyper parameter specification. + """ for _, v in hparam_spec.items(): assert v.get('type', None) in [ 'discrete', 'continuous' @@ -41,10 +65,12 @@ def _validate_hparam_spec(self, hparam_spec): @property def hparam_spec(self) -> Dict[str, Dict]: + """Dict: The hyper parameter specification.""" return self._hparam_spec @property def rule(self) -> str: + """str: The rule of the searcher, 'less' or 'greater'.""" return self._rule def record(self, hparam: Dict, score: float): From 04b46a3c6ebf0a1ea2331caf815b935dd4724db1 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Mon, 28 Aug 2023 17:12:11 +0900 Subject: [PATCH 23/41] Add readme for tune --- examples/tune/README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 examples/tune/README.md diff --git a/examples/tune/README.md b/examples/tune/README.md new file mode 100644 index 0000000000..e4275856e8 --- /dev/null +++ b/examples/tune/README.md @@ -0,0 +1,23 @@ +# Find the Optimal Learning Rate + +## Install external dependencies + +First, you should install ``nevergrad`` for tuning. + +```bash +pip install nevergrad +``` + +## Run the example + +Single device training + +```bash +python examples/tune/find_lr.py +``` + +Distributed data parallel tuning + +```bash +torchrun -nnodes 1 -nproc_per_node 8 examples/tune/find_lr.py --launcher pytorch +``` \ No newline at end of file From 3418ddc5c16c1369a0320c736a028d0da3d0465e Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Tue, 29 Aug 2023 09:04:34 +0900 Subject: [PATCH 24/41] Add error logging --- examples/tune/README.md | 4 ++-- mmengine/tune/tuner.py | 31 +++++++++++++++++++------------ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/examples/tune/README.md b/examples/tune/README.md index e4275856e8..5df70f725a 100644 --- a/examples/tune/README.md +++ b/examples/tune/README.md @@ -2,7 +2,7 @@ ## Install external dependencies -First, you should install ``nevergrad`` for tuning. +First, you should install `nevergrad` for tuning. ```bash pip install nevergrad @@ -20,4 +20,4 @@ Distributed data parallel tuning ```bash torchrun -nnodes 1 -nproc_per_node 8 examples/tune/find_lr.py --launcher pytorch -``` \ No newline at end of file +``` diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 007762e967..09501623bb 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -215,13 +215,15 @@ def _build_searcher(self, searcher_cfg: Dict) -> Searcher: num_trials=self._num_trials)) return HYPER_SEARCHERS.build(searcher_cfg) - def _run_trial(self) -> Tuple[Dict, float]: + def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: """Retrieve hyperparameters from searcher and run a trial.""" - # Retrieve hyperparameters from searcher. - # The hyperparameters are broadcasted to all processes. - # To avoid integrity, the searching process is only executed - # on the main process. + # Retrieve hyperparameters for the trial: + # 1. Only the main process executes the searcher to avoid any conflicts + # and ensure integrity. + # 2. Once retrieved, the hyperparameters are broadcasted to all other + # processes ensuring every process has the same set of + # hyperparameters for this trial. if is_main_process(): hparams_to_broadcast = [self._searcher.suggest()] else: @@ -241,21 +243,23 @@ def _run_trial(self) -> Tuple[Dict, float]: # Run a trial. # If an exception occurs during the trial, the score is set # to default_score. + error = None try: runner.train() score = report_hook.report_score() if score is None or math.isnan(score) or math.isinf(score): score = default_score scores_to_broadcast = [score] - except Exception: + except Exception as e: scores_to_broadcast = [default_score] + error = e # Store the score between processes. broadcast_object_list(scores_to_broadcast, src=0) score = scores_to_broadcast[0] if is_main_process(): self._searcher.record(hparam, score) - return hparam, score + return hparam, score, error def tune(self) -> Dict[str, Union[dict, float]]: """Launch tuning. @@ -266,12 +270,15 @@ def tune(self) -> Dict[str, Union[dict, float]]: """ self._logger.info(f'Starting tuning for {self._num_trials} trials...') for trail_idx in range(self._num_trials): - hparam, score = self._run_trial() + hparam, score, error = self._run_trial() + log_msg = f'Trial [{trail_idx + 1}/{self._num_trials}]' + if error is not None: + log_msg += f' failed. Error: {error}' + else: + log_msg += f' finished. Score obtained: {score}' + log_msg += f' Hyperparameters used: {hparam}' + self._logger.info(log_msg) self._history.append((hparam, score)) - self._logger.info( - f'Trial [{trail_idx + 1}/{self._num_trials}] finished.' + - f' Score obtained: {score}' + - f' Hyperparameters used: {hparam}') best_hparam: dict best_score: float From 92ad439d2f8ea4d6b761e3d44b3c889c45c26eae Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 30 Aug 2023 10:36:43 +0900 Subject: [PATCH 25/41] Add unittest for tune --- mmengine/tune/_report_hook.py | 4 +- mmengine/tune/searchers/searcher.py | 16 +- tests/test_tune/test_report_hook.py | 96 ++++++++ .../test_searchers/test_nevergrad.py | 101 ++++++++ .../test_tune/test_searchers/test_searcher.py | 86 +++++++ tests/test_tune/test_tuner.py | 226 ++++++++++++++++++ 6 files changed, 521 insertions(+), 8 deletions(-) create mode 100644 tests/test_tune/test_report_hook.py create mode 100644 tests/test_tune/test_searchers/test_nevergrad.py create mode 100644 tests/test_tune/test_searchers/test_searcher.py create mode 100644 tests/test_tune/test_tuner.py diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index eff4ad4565..a58d9a2431 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -58,10 +58,10 @@ def _should_stop(self, runner): runner (Runner): The runner of the training process. """ if self.tuning_iter is not None: - if runner.iter > self.tuning_iter: + if runner.iter + 1 >= self.tuning_iter: return True elif self.tuning_epoch is not None: - if runner.epoch > self.tuning_epoch: + if runner.epoch + 1 >= self.tuning_epoch: return True else: return False diff --git a/mmengine/tune/searchers/searcher.py b/mmengine/tune/searchers/searcher.py index 5aa4ef90f4..024f8fca0a 100644 --- a/mmengine/tune/searchers/searcher.py +++ b/mmengine/tune/searchers/searcher.py @@ -54,14 +54,18 @@ def _validate_hparam_spec(self, hparam_spec: Dict[str, Dict]): 'hparam_spec must have a key "type" and ' \ f'its value must be "discrete" or "continuous", but got {v}' if v['type'] == 'discrete': - assert 'values' in v, \ - 'if hparam_spec["type"] is "discrete", ' +\ - f'hparam_spec must have a key "values", but got {v}' + assert 'values' in v and isinstance(v['values'], list) and \ + v['values'], 'Expected a non-empty "values" list for ' + \ + 'discrete type, but got {v}' else: assert 'lower' in v and 'upper' in v, \ - 'if hparam_spec["type"] is "continuous", ' +\ - 'hparam_spec must have keys "lower" and "upper", ' +\ - f'but got {v}' + 'Expected keys "lower" and "upper" for continuous ' + \ + f'type, but got {v}' + assert isinstance(v['lower'], (int, float)) and \ + isinstance(v['upper'], (int, float)), \ + f'Expected "lower" and "upper" to be numbers, but got {v}' + assert v['lower'] < v['upper'], \ + f'Expected "lower" to be less than "upper", but got {v}' @property def hparam_spec(self) -> Dict[str, Dict]: diff --git a/tests/test_tune/test_report_hook.py b/tests/test_tune/test_report_hook.py new file mode 100644 index 0000000000..a50fc68d06 --- /dev/null +++ b/tests/test_tune/test_report_hook.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.testing import RunnerTestCase +from mmengine.tune._report_hook import ReportingHook +from unittest.mock import MagicMock + +class TestReportingHook(RunnerTestCase): + def test_append_score(self): + hook = ReportingHook(monitor='acc', max_scoreboard_len=3) + + # Adding scores to the scoreboard + hook._append_score(0.5) + hook._append_score(0.6) + hook._append_score(0.7) + self.assertEqual(hook.scoreboard, [0.5, 0.6, 0.7]) + + # When exceeding max length, it should pop the first item + hook._append_score(0.8) + self.assertEqual(hook.scoreboard, [0.6, 0.7, 0.8]) + + def test_should_stop(self): + runner = MagicMock(iter=3, epoch=1) + + # Test with tuning_iter + hook1 = ReportingHook(monitor='acc', tuning_iter=5) + self.assertFalse(hook1._should_stop(runner)) + runner.iter = 4 + self.assertTrue(hook1._should_stop(runner)) + + # Test with tuning_epoch + hook2 = ReportingHook(monitor='acc', tuning_epoch=3) + self.assertFalse(hook2._should_stop(runner)) + runner.epoch = 2 + self.assertTrue(hook2._should_stop(runner)) + + def test_report_score(self): + hook1 = ReportingHook(monitor='acc', report_op='latest') + hook1.scoreboard = [0.5, 0.6, 0.7] + self.assertEqual(hook1.report_score(), 0.7) + + hook2 = ReportingHook(monitor='acc', report_op='mean') + hook2.scoreboard = [0.5, 0.6, 0.7] + self.assertEqual(hook2.report_score(), 0.6) + + # Test with an empty scoreboard + hook3 = ReportingHook(monitor='acc', report_op='mean') + self.assertIsNone(hook3.report_score()) + + def test_clear(self): + hook = ReportingHook(monitor='acc') + hook.scoreboard = [0.5, 0.6, 0.7] + hook.clear() + self.assertEqual(hook.scoreboard, []) + + def test_after_train_iter(self): + runner = MagicMock(iter=3, epoch=1) + runner.log_processor.get_log_after_iter = MagicMock(return_value=({'acc': 0.9}, 'log_str')) + + # Check if the monitored score gets appended correctly + hook = ReportingHook(monitor='acc') + hook.after_train_iter(runner, 0) + self.assertEqual(hook.scoreboard[-1], 0.9) + + # Check if no score is appended for a non-existent metric + hook2 = ReportingHook(monitor='non_existent') + hook2.after_train_iter(runner, 0) + self.assertEqual(len(hook2.scoreboard), 0) + + # Check that training stops if tuning_iter is reached + runner.iter = 5 + hook3 = ReportingHook(monitor='acc', tuning_iter=5) + hook3.after_train_iter(runner, 0) + self.assertTrue(runner.train_loop.stop_training) + + def test_after_val_epoch(self): + runner = MagicMock(iter=3, epoch=1) + + # Check if the monitored score gets appended correctly from metrics + metrics = {'acc': 0.9} + hook = ReportingHook(monitor='acc') + hook.after_val_epoch(runner, metrics=metrics) + self.assertEqual(hook.scoreboard[-1], 0.9) + + # Check that no score is appended if the metric is missing from metrics + metrics = {'loss': 0.1} + hook2 = ReportingHook(monitor='acc') + hook2.after_val_epoch(runner, metrics=metrics) + self.assertEqual(len(hook2.scoreboard), 0) + + def test_with_runner(self): + runner = self.build_runner(self.epoch_based_cfg) + acc_hook = ReportingHook(monitor='test/acc', tuning_epoch=1) + runner.register_hook(acc_hook, priority='VERY_LOW') + runner.train() + self.assertEqual(runner.epoch, 1) + score = acc_hook.report_score() + self.assertAlmostEqual(score, 1) diff --git a/tests/test_tune/test_searchers/test_nevergrad.py b/tests/test_tune/test_searchers/test_nevergrad.py new file mode 100644 index 0000000000..3251d9c3d5 --- /dev/null +++ b/tests/test_tune/test_searchers/test_nevergrad.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase, skipIf +import random +from typing import List + +from mmengine.tune.searchers import NevergradSearcher + +try: + import nevergrad + NEVERGRAD_AVAILABLE = True +except ImportError: + NEVERGRAD_AVAILABLE = False + +@skipIf(not NEVERGRAD_AVAILABLE, "nevergrad is not installed") +class TestNevergradSearcher(TestCase): + def noisy_sphere_function(self, x: List[float]): + """Sphere function with noise: f(x) = sum(x_i^2) + noise""" + noise = random.gauss(0, 0.1) # Gaussian noise with mean 0 and std 0.1 + return sum([x_i ** 2 for x_i in x.values()]) + noise + + def one_max_function(self, x: List[int]): + """OneMax function: f(x) = sum(x_i) for binary x_i""" + return sum(x) + + @property + def target_solver_types(self): + return [ + 'OnePlusOne', 'CMA', 'BO', 'DE', 'PSO', 'NGO' + ] + + def test_hash_dict(self): + searcher = NevergradSearcher(rule='less', hparam_spec={}, num_trials=100, solver_type='OnePlusOne') + + # Check different dicts yield different hashes + d1 = {"x": 1, "y": 2} + d2 = {"x": 1, "y": 3} + self.assertNotEqual(searcher._hash_dict(d1), searcher._hash_dict(d2)) + + # Check same dict yields same hash + self.assertEqual(searcher._hash_dict(d1), searcher._hash_dict(d1)) + + # Check order doesn't matter + d3 = {"y": 2, "x": 1} + self.assertEqual(searcher._hash_dict(d1), searcher._hash_dict(d3)) + + def test_noisy_sphere_function(self): + hparam_continuous_space = { + 'x1': { + 'type': 'continuous', + 'lower': -5.0, + 'upper': 5.0 + }, + 'x2': { + 'type': 'continuous', + 'lower': -5.0, + 'upper': 5.0 + } + } + for solver_type in self.target_solver_types: + searcher = NevergradSearcher(rule='less', hparam_spec=hparam_continuous_space, num_trials=100, solver_type=solver_type) + for _ in range(100): + hparam = searcher.suggest() + score = self.noisy_sphere_function([v for _,v in hparam.items()]) + searcher.record(hparam, score) + # For the noisy sphere function, the optimal should be close to x1=0 and x2=0 + best_hparam = searcher.suggest() + self.assertAlmostEqual(best_hparam['x1'], 0.0, places=1) + self.assertAlmostEqual(best_hparam['x2'], 0.0, places=1) + + def test_one_max_function(self): + # Define the discrete search space for OneMax + hparam_discrete_space = { + 'x1': { + 'type': 'discrete', + 'values': [0, 1] + }, + 'x2': { + 'type': 'discrete', + 'values': [0, 1] + }, + 'x3': { + 'type': 'discrete', + 'values': [0, 1] + }, + 'x4': { + 'type': 'discrete', + 'values': [0, 1] + } + } + for solver_type in self.target_solver_types: + searcher = NevergradSearcher(rule='greater', hparam_spec=hparam_discrete_space, num_trials=100, solver_type=solver_type) + for _ in range(100): + hparam = searcher.suggest() + score = self.one_max_function([v for _,v in hparam.items()]) + searcher.record(hparam, score) + # For the OneMax function, the optimal solution is x1=x2=x3=x4=1 + best_hparam = searcher.suggest() + self.assertEqual(best_hparam['x1'], 1) + self.assertEqual(best_hparam['x2'], 1) + self.assertEqual(best_hparam['x3'], 1) + self.assertEqual(best_hparam['x4'], 1) \ No newline at end of file diff --git a/tests/test_tune/test_searchers/test_searcher.py b/tests/test_tune/test_searchers/test_searcher.py new file mode 100644 index 0000000000..ca814cb4ed --- /dev/null +++ b/tests/test_tune/test_searchers/test_searcher.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase + +from mmengine.tune.searchers import Searcher + +class TestSearcher(TestCase): + + def test_rule(self): + valid_hparam_spec_1 = { + 'lr': { + 'type': 'discrete', + 'values': [0.01, 0.02, 0.03] + } + } + # Invalid cases + with self.assertRaises(AssertionError): + Searcher(rule='invalid_rule', hparam_spec=valid_hparam_spec_1) + Searcher(rule='greater', hparam_spec=valid_hparam_spec_1) + Searcher(rule='less', hparam_spec=valid_hparam_spec_1) + + def test_validate_hparam_spec(self): + # Unknown hparam spec type + invalid_hparam_spec_1 = { + 'lr': { + 'type': 'unknown_type', + 'values': [0.01, 0.02, 0.03] + } + } + with self.assertRaises(AssertionError): + Searcher(rule='greater', hparam_spec=invalid_hparam_spec_1) + + # Missing keys in continuous hparam_spec + invalid_hparam_spec_2 = { + 'lr': { + 'type': 'continuous', + 'lower': 0.01 + } + } + with self.assertRaises(AssertionError): + Searcher(rule='less', hparam_spec=invalid_hparam_spec_2) + + # Invalid discrete hparam_spec + invalid_hparam_spec_3 = { + 'lr': { + 'type': 'discrete', + 'values': [] # Empty list + } + } + with self.assertRaises(AssertionError): + Searcher(rule='greater', hparam_spec=invalid_hparam_spec_3) + + # Invalid continuous hparam_spec + invalid_hparam_spec_4 = { + 'lr': { + 'type': 'continuous', + 'lower': 0.1, + 'upper': 0.01 # lower is greater than upper + } + } + with self.assertRaises(AssertionError): + Searcher(rule='less', hparam_spec=invalid_hparam_spec_4) + + # Invalid data type in continuous hparam_spec + invalid_hparam_spec_5 = { + 'lr': { + 'type': 'continuous', + 'lower': '0.01', # String instead of number + 'upper': 0.1 + } + } + with self.assertRaises(AssertionError): + Searcher(rule='less', hparam_spec=invalid_hparam_spec_5) + + def test_hparam_spec_property(self): + hparam_spec = { + 'lr': { + 'type': 'discrete', + 'values': [0.01, 0.02, 0.03] + } + } + searcher = Searcher(rule='greater', hparam_spec=hparam_spec) + self.assertEqual(searcher.hparam_spec, hparam_spec) + + def test_rule_property(self): + searcher = Searcher(rule='greater', hparam_spec={}) + self.assertEqual(searcher.rule, 'greater') \ No newline at end of file diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py new file mode 100644 index 0000000000..4b5eb04397 --- /dev/null +++ b/tests/test_tune/test_tuner.py @@ -0,0 +1,226 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict +from mmengine.testing import RunnerTestCase +from mmengine.tune import Tuner +from mmengine.tune.searchers import Searcher, HYPER_SEARCHERS +from unittest import mock + +import random + +class ToySearcher(Searcher): + def suggest(self) -> Dict: + hparam = dict() + for k, v in self.hparam_spec.items(): + if v['type'] == 'discrete': + hparam[k] = random.choice(v['values']) + else: + hparam[k] = random.uniform(v['lower'], v['upper']) + return hparam + +class TestTuner(RunnerTestCase): + def setUp(self) -> None: + super().setUp() + HYPER_SEARCHERS.register_module(ToySearcher) + self.hparam_spec = { + 'optim_wrapper.optimizer.lr': { + 'type': 'discrete', + 'values': [0.1, 0.2, 0.3] + } + } + + def tearDown(self): + super().tearDown() + HYPER_SEARCHERS.module_dict.pop('ToySearcher', None) + + def test_init(self): + with self.assertRaises(ValueError): + Tuner( + runner_cfg=dict(), + hparam_spec=dict(), + monitor='loss', + rule='invalid_rule', + searcher_cfg=dict(type='ToySearcher')) + + # Initializing with correct parameters + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher') + ) + + # Verify the properties + self.assertEqual(tuner.epoch_based_cfg, self.epoch_based_cfg) + self.assertEqual(tuner.hparam_spec, self.hparam_spec) + self.assertEqual(tuner.monitor, 'loss') + self.assertEqual(tuner.rule, 'less') + self.assertEqual(tuner.num_trials, 2) + + # Ensure a searcher of type ToySearcher is used + self.assertIsInstance(tuner.searcher, ToySearcher) + + + def mock_is_main_process(self, return_value=True): + return mock.patch('mmengine.dist.is_main_process', return_value=return_value) + + def mock_broadcast(self, side_effect=None): + return mock.patch('mmengine.dist.broadcast_object_list', side_effect=side_effect) + + def test_inject_config(self): + # Inject into a single level + cfg = {'a': 1} + Tuner.inject_config(cfg, 'a', 2) + self.assertEqual(cfg['a'], 2) + + # Inject into a nested level + cfg = { + 'level1': { + 'level2': { + 'level3': 3 + } + } + } + Tuner.inject_config(cfg, 'level1.level2.level3', 4) + self.assertEqual(cfg['level1']['level2']['level3'], 4) + + # Inject into a non-existent key + cfg = {} + with self.assertRaises(AssertionError): + Tuner.inject_config(cfg, 'a', 1) + + # Inject into a sequence + cfg = { + 'sequence': [1, 2, 3] + } + Tuner.inject_config(cfg, 'sequence.1', 5) + self.assertEqual(cfg['sequence'][1], 5) + + @mock.patch('mmengine.runner.Runner.train') + @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') + def test_successful_run(self, mock_report_score, mock_train): + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher') + ) + + tuner.searcher.suggest = mock.MagicMock(return_value={'optim_wrapper.optimizer.lr': 0.1}) + tuner.searcher.record = mock.MagicMock() + + mock_report_score.return_value = 0.05 + + with self.mock_is_main_process(), self.mock_broadcast(): + hparam, score, error = tuner._run_trial() + + self.assertEqual(hparam, {'optim_wrapper.optimizer.lr': 0.1}) + self.assertEqual(score, 0.05) + self.assertIsNone(error) + tuner.searcher.record.assert_called_with({'optim_wrapper.optimizer.lr': 0.1}, 0.05) + + @mock.patch('mmengine.runner.Runner.train') + @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') + def test_successful_run(self, mock_report_score, mock_train): + mock_train.side_effect = Exception("Error during training") + + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher') + ) + + tuner.searcher.suggest = mock.MagicMock(return_value={'optim_wrapper.optimizer.lr': 0.1}) + tuner.searcher.record = mock.MagicMock() + + with self.mock_is_main_process(), self.mock_broadcast(): + hparam, score, error = tuner._run_trial() + + self.assertEqual(hparam, {'optim_wrapper.optimizer.lr': 0.1}) + self.assertEqual(score, float('inf')) + self.assertTrue(isinstance(error, Exception)) + tuner.searcher.record.assert_called_with({'optim_wrapper.optimizer.lr': 0.1}, float('inf')) + + @mock.patch('mmengine.runner.Runner.train') + @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') + def test_tune_method(self, mock_report_score, mock_train): + mock_scores = [0.05, 0.03, 0.04, 0.06] + mock_hparams = [ + {'optim_wrapper.optimizer.lr': 0.1}, + {'optim_wrapper.optimizer.lr': 0.05}, + {'optim_wrapper.optimizer.lr': 0.2}, + {'optim_wrapper.optimizer.lr': 0.3} + ] + + mock_report_score.side_effect = mock_scores + + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=4, + searcher_cfg=dict(type='ToySearcher') + ) + + mock_run_trial_return_values = [ + (mock_hparams[0], mock_scores[0], None), + (mock_hparams[1], mock_scores[1], Exception("Error during training")), + (mock_hparams[2], mock_scores[2], None), + (mock_hparams[3], mock_scores[3], None) + ] + tuner._run_trial = mock.MagicMock(side_effect=mock_run_trial_return_values) + + with self.mock_is_main_process(), self.mock_broadcast(): + result = tuner.tune() + + self.assertEqual(tuner._history, [(mock_hparams[0], mock_scores[0]), (mock_hparams[2], mock_scores[2]), (mock_hparams[3], mock_scores[3])]) + + self.assertEqual(result, { + 'hparam': mock_hparams[2], + 'score': mock_scores[2] + }) + + tuner.rule = 'greater' + with self.mock_is_main_process(), self.mock_broadcast(): + result = tuner.tune() + self.assertEqual(result, { + 'hparam': mock_hparams[3], + 'score': mock_scores[3] + }) + + def test_clear(self): + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher') + ) + + tuner.history.append({'optim_wrapper.optimizer.lr': 0.1}, 0.05) + tuner.clear() + self.assertEqual(tuner.history, []) + + def test_with_runner(self): + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='test/acc', + rule='greater', + num_trials=10, + searcher_cfg=dict(type='ToySearcher') + ) + + with self.mock_is_main_process(), self.mock_broadcast(): + result = tuner.tune() + + self.assertTrue( set([hparam['optim_wrapper.optimizer.lr'] for hparam, _ in tuner.history]) in self.hparam_spec['optim_wrapper.optimizer.lr']['values'] ) + self.assertEqual(result['score'], 1) From 308ece3c6e51b7310fb73c0b3af9a676c6a0991c Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 30 Aug 2023 10:46:22 +0900 Subject: [PATCH 26/41] Apply lint --- tests/test_tune/test_report_hook.py | 14 +- .../test_searchers/test_nevergrad.py | 58 +++--- .../test_tune/test_searchers/test_searcher.py | 14 +- tests/test_tune/test_tuner.py | 178 +++++++++--------- 4 files changed, 142 insertions(+), 122 deletions(-) diff --git a/tests/test_tune/test_report_hook.py b/tests/test_tune/test_report_hook.py index a50fc68d06..f06a010fa9 100644 --- a/tests/test_tune/test_report_hook.py +++ b/tests/test_tune/test_report_hook.py @@ -1,9 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. +from unittest.mock import MagicMock + from mmengine.testing import RunnerTestCase from mmengine.tune._report_hook import ReportingHook -from unittest.mock import MagicMock + class TestReportingHook(RunnerTestCase): + def test_append_score(self): hook = ReportingHook(monitor='acc', max_scoreboard_len=3) @@ -53,8 +56,11 @@ def test_clear(self): def test_after_train_iter(self): runner = MagicMock(iter=3, epoch=1) - runner.log_processor.get_log_after_iter = MagicMock(return_value=({'acc': 0.9}, 'log_str')) - + runner.log_processor.get_log_after_iter = MagicMock( + return_value=({ + 'acc': 0.9 + }, 'log_str')) + # Check if the monitored score gets appended correctly hook = ReportingHook(monitor='acc') hook.after_train_iter(runner, 0) @@ -64,7 +70,7 @@ def test_after_train_iter(self): hook2 = ReportingHook(monitor='non_existent') hook2.after_train_iter(runner, 0) self.assertEqual(len(hook2.scoreboard), 0) - + # Check that training stops if tuning_iter is reached runner.iter = 5 hook3 = ReportingHook(monitor='acc', tuning_iter=5) diff --git a/tests/test_tune/test_searchers/test_nevergrad.py b/tests/test_tune/test_searchers/test_nevergrad.py index 3251d9c3d5..dade8eff77 100644 --- a/tests/test_tune/test_searchers/test_nevergrad.py +++ b/tests/test_tune/test_searchers/test_nevergrad.py @@ -1,48 +1,52 @@ # Copyright (c) OpenMMLab. All rights reserved. -from unittest import TestCase, skipIf import random from typing import List +from unittest import TestCase, skipIf from mmengine.tune.searchers import NevergradSearcher try: - import nevergrad + import nevergrad # noqa: F401 NEVERGRAD_AVAILABLE = True except ImportError: NEVERGRAD_AVAILABLE = False -@skipIf(not NEVERGRAD_AVAILABLE, "nevergrad is not installed") + +@skipIf(not NEVERGRAD_AVAILABLE, 'nevergrad is not installed') class TestNevergradSearcher(TestCase): + def noisy_sphere_function(self, x: List[float]): """Sphere function with noise: f(x) = sum(x_i^2) + noise""" noise = random.gauss(0, 0.1) # Gaussian noise with mean 0 and std 0.1 - return sum([x_i ** 2 for x_i in x.values()]) + noise - + return sum([x_i**2 for x_i in x]) + noise + def one_max_function(self, x: List[int]): """OneMax function: f(x) = sum(x_i) for binary x_i""" - return sum(x) + return sum(x) @property def target_solver_types(self): - return [ - 'OnePlusOne', 'CMA', 'BO', 'DE', 'PSO', 'NGO' - ] + return ['OnePlusOne', 'CMA', 'BO', 'DE', 'PSO', 'NGO'] def test_hash_dict(self): - searcher = NevergradSearcher(rule='less', hparam_spec={}, num_trials=100, solver_type='OnePlusOne') - + searcher = NevergradSearcher( + rule='less', + hparam_spec={}, + num_trials=100, + solver_type='OnePlusOne') + # Check different dicts yield different hashes - d1 = {"x": 1, "y": 2} - d2 = {"x": 1, "y": 3} + d1 = {'x': 1, 'y': 2} + d2 = {'x': 1, 'y': 3} self.assertNotEqual(searcher._hash_dict(d1), searcher._hash_dict(d2)) - + # Check same dict yields same hash self.assertEqual(searcher._hash_dict(d1), searcher._hash_dict(d1)) # Check order doesn't matter - d3 = {"y": 2, "x": 1} + d3 = {'y': 2, 'x': 1} self.assertEqual(searcher._hash_dict(d1), searcher._hash_dict(d3)) - + def test_noisy_sphere_function(self): hparam_continuous_space = { 'x1': { @@ -57,12 +61,18 @@ def test_noisy_sphere_function(self): } } for solver_type in self.target_solver_types: - searcher = NevergradSearcher(rule='less', hparam_spec=hparam_continuous_space, num_trials=100, solver_type=solver_type) + searcher = NevergradSearcher( + rule='less', + hparam_spec=hparam_continuous_space, + num_trials=100, + solver_type=solver_type) for _ in range(100): hparam = searcher.suggest() - score = self.noisy_sphere_function([v for _,v in hparam.items()]) + score = self.noisy_sphere_function( + [v for _, v in hparam.items()]) searcher.record(hparam, score) - # For the noisy sphere function, the optimal should be close to x1=0 and x2=0 + # For the noisy sphere function, + # the optimal should be close to x1=0 and x2=0 best_hparam = searcher.suggest() self.assertAlmostEqual(best_hparam['x1'], 0.0, places=1) self.assertAlmostEqual(best_hparam['x2'], 0.0, places=1) @@ -88,14 +98,18 @@ def test_one_max_function(self): } } for solver_type in self.target_solver_types: - searcher = NevergradSearcher(rule='greater', hparam_spec=hparam_discrete_space, num_trials=100, solver_type=solver_type) + searcher = NevergradSearcher( + rule='greater', + hparam_spec=hparam_discrete_space, + num_trials=100, + solver_type=solver_type) for _ in range(100): hparam = searcher.suggest() - score = self.one_max_function([v for _,v in hparam.items()]) + score = self.one_max_function([v for _, v in hparam.items()]) searcher.record(hparam, score) # For the OneMax function, the optimal solution is x1=x2=x3=x4=1 best_hparam = searcher.suggest() self.assertEqual(best_hparam['x1'], 1) self.assertEqual(best_hparam['x2'], 1) self.assertEqual(best_hparam['x3'], 1) - self.assertEqual(best_hparam['x4'], 1) \ No newline at end of file + self.assertEqual(best_hparam['x4'], 1) diff --git a/tests/test_tune/test_searchers/test_searcher.py b/tests/test_tune/test_searchers/test_searcher.py index ca814cb4ed..a010fa0397 100644 --- a/tests/test_tune/test_searchers/test_searcher.py +++ b/tests/test_tune/test_searchers/test_searcher.py @@ -3,6 +3,7 @@ from mmengine.tune.searchers import Searcher + class TestSearcher(TestCase): def test_rule(self): @@ -29,13 +30,8 @@ def test_validate_hparam_spec(self): with self.assertRaises(AssertionError): Searcher(rule='greater', hparam_spec=invalid_hparam_spec_1) - # Missing keys in continuous hparam_spec - invalid_hparam_spec_2 = { - 'lr': { - 'type': 'continuous', - 'lower': 0.01 - } - } + # Missing keys in continuous hparam_spec + invalid_hparam_spec_2 = {'lr': {'type': 'continuous', 'lower': 0.01}} with self.assertRaises(AssertionError): Searcher(rule='less', hparam_spec=invalid_hparam_spec_2) @@ -70,7 +66,7 @@ def test_validate_hparam_spec(self): } with self.assertRaises(AssertionError): Searcher(rule='less', hparam_spec=invalid_hparam_spec_5) - + def test_hparam_spec_property(self): hparam_spec = { 'lr': { @@ -83,4 +79,4 @@ def test_hparam_spec_property(self): def test_rule_property(self): searcher = Searcher(rule='greater', hparam_spec={}) - self.assertEqual(searcher.rule, 'greater') \ No newline at end of file + self.assertEqual(searcher.rule, 'greater') diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py index 4b5eb04397..063672212d 100644 --- a/tests/test_tune/test_tuner.py +++ b/tests/test_tune/test_tuner.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. +import random from typing import Dict +from unittest import mock + from mmengine.testing import RunnerTestCase from mmengine.tune import Tuner -from mmengine.tune.searchers import Searcher, HYPER_SEARCHERS -from unittest import mock +from mmengine.tune.searchers import HYPER_SEARCHERS, Searcher -import random class ToySearcher(Searcher): + def suggest(self) -> Dict: hparam = dict() for k, v in self.hparam_spec.items(): @@ -17,15 +19,17 @@ def suggest(self) -> Dict: hparam[k] = random.uniform(v['lower'], v['upper']) return hparam + class TestTuner(RunnerTestCase): + def setUp(self) -> None: super().setUp() - HYPER_SEARCHERS.register_module(ToySearcher) + HYPER_SEARCHERS.register_module(module=ToySearcher) self.hparam_spec = { 'optim_wrapper.optimizer.lr': { 'type': 'discrete', - 'values': [0.1, 0.2, 0.3] - } + 'values': [0.1, 0.2, 0.3] + } } def tearDown(self): @@ -35,38 +39,37 @@ def tearDown(self): def test_init(self): with self.assertRaises(ValueError): Tuner( - runner_cfg=dict(), - hparam_spec=dict(), - monitor='loss', + runner_cfg=dict(), + hparam_spec=dict(), + monitor='loss', rule='invalid_rule', searcher_cfg=dict(type='ToySearcher')) - + # Initializing with correct parameters tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='loss', - rule='less', - num_trials=2, - searcher_cfg=dict(type='ToySearcher') - ) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher')) # Verify the properties - self.assertEqual(tuner.epoch_based_cfg, self.epoch_based_cfg) self.assertEqual(tuner.hparam_spec, self.hparam_spec) self.assertEqual(tuner.monitor, 'loss') self.assertEqual(tuner.rule, 'less') self.assertEqual(tuner.num_trials, 2) - + # Ensure a searcher of type ToySearcher is used self.assertIsInstance(tuner.searcher, ToySearcher) - def mock_is_main_process(self, return_value=True): - return mock.patch('mmengine.dist.is_main_process', return_value=return_value) + return mock.patch( + 'mmengine.dist.is_main_process', return_value=return_value) def mock_broadcast(self, side_effect=None): - return mock.patch('mmengine.dist.broadcast_object_list', side_effect=side_effect) + return mock.patch( + 'mmengine.dist.broadcast_object_list', side_effect=side_effect) def test_inject_config(self): # Inject into a single level @@ -75,13 +78,7 @@ def test_inject_config(self): self.assertEqual(cfg['a'], 2) # Inject into a nested level - cfg = { - 'level1': { - 'level2': { - 'level3': 3 - } - } - } + cfg = {'level1': {'level2': {'level3': 3}}} Tuner.inject_config(cfg, 'level1.level2.level3', 4) self.assertEqual(cfg['level1']['level2']['level3'], 4) @@ -91,9 +88,7 @@ def test_inject_config(self): Tuner.inject_config(cfg, 'a', 1) # Inject into a sequence - cfg = { - 'sequence': [1, 2, 3] - } + cfg = {'sequence': [1, 2, 3]} Tuner.inject_config(cfg, 'sequence.1', 5) self.assertEqual(cfg['sequence'][1], 5) @@ -101,15 +96,15 @@ def test_inject_config(self): @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') def test_successful_run(self, mock_report_score, mock_train): tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='loss', - rule='less', - num_trials=2, - searcher_cfg=dict(type='ToySearcher') - ) - - tuner.searcher.suggest = mock.MagicMock(return_value={'optim_wrapper.optimizer.lr': 0.1}) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher')) + + tuner.searcher.suggest = mock.MagicMock( + return_value={'optim_wrapper.optimizer.lr': 0.1}) tuner.searcher.record = mock.MagicMock() mock_report_score.return_value = 0.05 @@ -120,23 +115,24 @@ def test_successful_run(self, mock_report_score, mock_train): self.assertEqual(hparam, {'optim_wrapper.optimizer.lr': 0.1}) self.assertEqual(score, 0.05) self.assertIsNone(error) - tuner.searcher.record.assert_called_with({'optim_wrapper.optimizer.lr': 0.1}, 0.05) + tuner.searcher.record.assert_called_with( + {'optim_wrapper.optimizer.lr': 0.1}, 0.05) @mock.patch('mmengine.runner.Runner.train') @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') - def test_successful_run(self, mock_report_score, mock_train): - mock_train.side_effect = Exception("Error during training") + def test_run_with_exception(self, mock_report_score, mock_train): + mock_train.side_effect = Exception('Error during training') tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='loss', - rule='less', - num_trials=2, - searcher_cfg=dict(type='ToySearcher') - ) - - tuner.searcher.suggest = mock.MagicMock(return_value={'optim_wrapper.optimizer.lr': 0.1}) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher')) + + tuner.searcher.suggest = mock.MagicMock( + return_value={'optim_wrapper.optimizer.lr': 0.1}) tuner.searcher.record = mock.MagicMock() with self.mock_is_main_process(), self.mock_broadcast(): @@ -145,48 +141,55 @@ def test_successful_run(self, mock_report_score, mock_train): self.assertEqual(hparam, {'optim_wrapper.optimizer.lr': 0.1}) self.assertEqual(score, float('inf')) self.assertTrue(isinstance(error, Exception)) - tuner.searcher.record.assert_called_with({'optim_wrapper.optimizer.lr': 0.1}, float('inf')) + tuner.searcher.record.assert_called_with( + {'optim_wrapper.optimizer.lr': 0.1}, float('inf')) @mock.patch('mmengine.runner.Runner.train') @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') def test_tune_method(self, mock_report_score, mock_train): mock_scores = [0.05, 0.03, 0.04, 0.06] - mock_hparams = [ - {'optim_wrapper.optimizer.lr': 0.1}, - {'optim_wrapper.optimizer.lr': 0.05}, - {'optim_wrapper.optimizer.lr': 0.2}, - {'optim_wrapper.optimizer.lr': 0.3} - ] + mock_hparams = [{ + 'optim_wrapper.optimizer.lr': 0.1 + }, { + 'optim_wrapper.optimizer.lr': 0.05 + }, { + 'optim_wrapper.optimizer.lr': 0.2 + }, { + 'optim_wrapper.optimizer.lr': 0.3 + }] mock_report_score.side_effect = mock_scores tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='loss', - rule='less', - num_trials=4, - searcher_cfg=dict(type='ToySearcher') - ) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=4, + searcher_cfg=dict(type='ToySearcher')) mock_run_trial_return_values = [ (mock_hparams[0], mock_scores[0], None), - (mock_hparams[1], mock_scores[1], Exception("Error during training")), + (mock_hparams[1], mock_scores[1], + Exception('Error during training')), (mock_hparams[2], mock_scores[2], None), (mock_hparams[3], mock_scores[3], None) ] - tuner._run_trial = mock.MagicMock(side_effect=mock_run_trial_return_values) + tuner._run_trial = mock.MagicMock( + side_effect=mock_run_trial_return_values) with self.mock_is_main_process(), self.mock_broadcast(): result = tuner.tune() - self.assertEqual(tuner._history, [(mock_hparams[0], mock_scores[0]), (mock_hparams[2], mock_scores[2]), (mock_hparams[3], mock_scores[3])]) - + self.assertEqual(tuner._history, [(mock_hparams[0], mock_scores[0]), + (mock_hparams[2], mock_scores[2]), + (mock_hparams[3], mock_scores[3])]) + self.assertEqual(result, { 'hparam': mock_hparams[2], 'score': mock_scores[2] }) - + tuner.rule = 'greater' with self.mock_is_main_process(), self.mock_broadcast(): result = tuner.tune() @@ -197,13 +200,12 @@ def test_tune_method(self, mock_report_score, mock_train): def test_clear(self): tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='loss', - rule='less', - num_trials=2, - searcher_cfg=dict(type='ToySearcher') - ) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='less', + num_trials=2, + searcher_cfg=dict(type='ToySearcher')) tuner.history.append({'optim_wrapper.optimizer.lr': 0.1}, 0.05) tuner.clear() @@ -211,16 +213,18 @@ def test_clear(self): def test_with_runner(self): tuner = Tuner( - runner_cfg=self.epoch_based_cfg, - hparam_spec=self.hparam_spec, - monitor='test/acc', - rule='greater', - num_trials=10, - searcher_cfg=dict(type='ToySearcher') - ) + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='test/acc', + rule='greater', + num_trials=10, + searcher_cfg=dict(type='ToySearcher')) with self.mock_is_main_process(), self.mock_broadcast(): result = tuner.tune() - self.assertTrue( set([hparam['optim_wrapper.optimizer.lr'] for hparam, _ in tuner.history]) in self.hparam_spec['optim_wrapper.optimizer.lr']['values'] ) + self.assertTrue({ + hparam['optim_wrapper.optimizer.lr'] + for hparam, _ in tuner.history + } in self.hparam_spec['optim_wrapper.optimizer.lr']['values']) self.assertEqual(result['score'], 1) From 0767f5222306ed6f3501dd1a09561f43e032e8b8 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 30 Aug 2023 11:07:30 +0900 Subject: [PATCH 27/41] Add random searcher --- mmengine/tune/searchers/__init__.py | 5 +++- mmengine/tune/searchers/random.py | 29 +++++++++++++++++++ mmengine/tune/tuner.py | 2 +- tests/test_tune/test_searchers/test_random.py | 26 +++++++++++++++++ tests/test_tune/test_tuner.py | 5 ++-- 5 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 mmengine/tune/searchers/random.py create mode 100644 tests/test_tune/test_searchers/test_random.py diff --git a/mmengine/tune/searchers/__init__.py b/mmengine/tune/searchers/__init__.py index 654dabd43c..a7077efb4b 100644 --- a/mmengine/tune/searchers/__init__.py +++ b/mmengine/tune/searchers/__init__.py @@ -1,5 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. from .nevergrad import NevergradSearcher +from .random import RandomSearcher from .searcher import HYPER_SEARCHERS, Searcher -__all__ = ['Searcher', 'HYPER_SEARCHERS', 'NevergradSearcher'] +__all__ = [ + 'Searcher', 'HYPER_SEARCHERS', 'NevergradSearcher', 'RandomSearcher' +] diff --git a/mmengine/tune/searchers/random.py b/mmengine/tune/searchers/random.py new file mode 100644 index 0000000000..a4105f51dd --- /dev/null +++ b/mmengine/tune/searchers/random.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import random +from typing import Dict + +from .searcher import HYPER_SEARCHERS, Searcher + +@HYPER_SEARCHERS.register_module() +class RandomSearcher(Searcher): + def __init__(self, + rule: str, + hparam_spec: Dict[str, Dict], + *args, + **kwargs): + super().__init__(rule, hparam_spec) + + def suggest(self) -> Dict: + """Suggest a new hparam based on random selection. + + Returns: + Dict: suggested hparam + """ + suggestion = {} + for key, spec in self._hparam_spec.items(): + if spec['type'] == 'discrete': + suggestion[key] = random.choice(spec['values']) + elif spec['type'] == 'continuous': + suggestion[key] = random.uniform(spec['lower'], spec['upper']) + + return suggestion \ No newline at end of file diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 09501623bb..b149e7b0cb 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -88,7 +88,7 @@ def __init__(self, tuning_iter: Optional[int] = None, tuning_epoch: Optional[int] = None, report_op: str = 'latest', - searcher_cfg: Dict = dict(type='NevergradSearcher')): + searcher_cfg: Dict = dict(type='RandomSearcher')): self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec diff --git a/tests/test_tune/test_searchers/test_random.py b/tests/test_tune/test_searchers/test_random.py new file mode 100644 index 0000000000..2e90bafbf6 --- /dev/null +++ b/tests/test_tune/test_searchers/test_random.py @@ -0,0 +1,26 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase + +from mmengine.tune.searchers import RandomSearcher + + +class TestRandomSearcher(TestCase): + def test_suggest(self): + searcher = RandomSearcher( + rule='greater', + hparam_spec={ + 'x1': { + 'type': 'discrete', + 'values': [0.01, 0.02, 0.03] + }, + 'x2': { + 'type': 'continuous', + 'lower': 0.01, + 'upper': 0.1 + } + }) + + for _ in range(100): + hparam = searcher.suggest() + self.assertTrue(hparam['x1'] in [0.01, 0.02, 0.03]) + self.assertTrue(hparam['x2'] >= 0.01 and hparam['x2'] <= 0.1) \ No newline at end of file diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py index 063672212d..e40eb5e24f 100644 --- a/tests/test_tune/test_tuner.py +++ b/tests/test_tune/test_tuner.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import random from typing import Dict from unittest import mock @@ -14,9 +13,9 @@ def suggest(self) -> Dict: hparam = dict() for k, v in self.hparam_spec.items(): if v['type'] == 'discrete': - hparam[k] = random.choice(v['values']) + hparam[k] = v['values'][0] else: - hparam[k] = random.uniform(v['lower'], v['upper']) + hparam[k] = (v['lower'] + v['upper']) / 2 return hparam From 70d91e44e5cb5f9cab0bdec5e9cb1a1789ed416b Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Wed, 30 Aug 2023 16:48:30 +0900 Subject: [PATCH 28/41] Fix unittest bug --- mmengine/tune/searchers/random.py | 9 +-- mmengine/tune/searchers/searcher.py | 3 +- mmengine/tune/tuner.py | 37 ++++++--- tests/test_tune/test_report_hook.py | 2 +- .../test_searchers/test_nevergrad.py | 81 +++++++------------ tests/test_tune/test_searchers/test_random.py | 5 +- tests/test_tune/test_tuner.py | 3 +- 7 files changed, 67 insertions(+), 73 deletions(-) diff --git a/mmengine/tune/searchers/random.py b/mmengine/tune/searchers/random.py index a4105f51dd..421940c756 100644 --- a/mmengine/tune/searchers/random.py +++ b/mmengine/tune/searchers/random.py @@ -4,12 +4,11 @@ from .searcher import HYPER_SEARCHERS, Searcher + @HYPER_SEARCHERS.register_module() class RandomSearcher(Searcher): - def __init__(self, - rule: str, - hparam_spec: Dict[str, Dict], - *args, + + def __init__(self, rule: str, hparam_spec: Dict[str, Dict], *args, **kwargs): super().__init__(rule, hparam_spec) @@ -26,4 +25,4 @@ def suggest(self) -> Dict: elif spec['type'] == 'continuous': suggestion[key] = random.uniform(spec['lower'], spec['upper']) - return suggestion \ No newline at end of file + return suggestion diff --git a/mmengine/tune/searchers/searcher.py b/mmengine/tune/searchers/searcher.py index 024f8fca0a..ea81bed267 100644 --- a/mmengine/tune/searchers/searcher.py +++ b/mmengine/tune/searchers/searcher.py @@ -15,7 +15,8 @@ class Searcher: rules_supported = ['greater', 'less'] - def __init__(self, rule: str, hparam_spec: Dict[str, Dict]): + def __init__(self, rule: str, hparam_spec: Dict[str, Dict], *args, + **kwargs): assert rule in self.rules_supported, \ f"rule must be 'less' or 'greater', but got {rule}" self._rule = rule diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index b149e7b0cb..b5d7c84f6c 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import math -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from mmengine.config import Config, ConfigDict from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, @@ -168,6 +168,11 @@ def history(self) -> List[Tuple[Dict, float]]: scores.""" return self._history + @property + def searcher(self) -> Searcher: + """Searcher: The searcher used for hyperparameter tuning.""" + return self._searcher + @staticmethod def inject_config(cfg: Dict, key: str, value: Any): """Inject a value into a config. @@ -179,18 +184,24 @@ def inject_config(cfg: Dict, key: str, value: Any): key (str): The key of the value to be injected. value (Any): The value to be injected. """ - splitted_key = key.split('.') - suffix = '' - for item in splitted_key[:-1]: - if isinstance(cfg, Sequence) and not isinstance(cfg, str): - cfg = cfg[int(item)] - else: - assert item in cfg, f'key {item} is not in {cfg}' - cfg = cfg[item] - suffix += f'{item}.' - assert splitted_key[ - -1] in cfg, f'attribute {splitted_key[-1]} is not in cfg{suffix}' - cfg[splitted_key[-1]] = value + keys = key.split('.') + for k in keys[:-1]: + if isinstance(cfg, list): + k = int(k) + if k >= len(cfg) or k < 0: + raise KeyError(f'Index {k} is out of range in {cfg}') + elif k not in cfg: + raise KeyError(f"Key '{k}' not found in {cfg}") + cfg = cfg[k] + if isinstance(cfg, list): + k = int(keys[-1]) + if k >= len(cfg) or k < 0: + raise KeyError(f'Index {k} is out of range in {cfg}') + cfg[k] = value + elif keys[-1] not in cfg: + raise KeyError(f"Key '{keys[-1]}' not found in {cfg}") + else: + cfg[keys[-1]] = value return def _build_searcher(self, searcher_cfg: Dict) -> Searcher: diff --git a/tests/test_tune/test_report_hook.py b/tests/test_tune/test_report_hook.py index f06a010fa9..4e271bdfd4 100644 --- a/tests/test_tune/test_report_hook.py +++ b/tests/test_tune/test_report_hook.py @@ -94,7 +94,7 @@ def test_after_val_epoch(self): def test_with_runner(self): runner = self.build_runner(self.epoch_based_cfg) - acc_hook = ReportingHook(monitor='test/acc', tuning_epoch=1) + acc_hook = ReportingHook(monitor='acc', tuning_epoch=1) runner.register_hook(acc_hook, priority='VERY_LOW') runner.train() self.assertEqual(runner.epoch, 1) diff --git a/tests/test_tune/test_searchers/test_nevergrad.py b/tests/test_tune/test_searchers/test_nevergrad.py index dade8eff77..08c92ec7c0 100644 --- a/tests/test_tune/test_searchers/test_nevergrad.py +++ b/tests/test_tune/test_searchers/test_nevergrad.py @@ -24,14 +24,13 @@ def one_max_function(self, x: List[int]): """OneMax function: f(x) = sum(x_i) for binary x_i""" return sum(x) - @property - def target_solver_types(self): - return ['OnePlusOne', 'CMA', 'BO', 'DE', 'PSO', 'NGO'] - def test_hash_dict(self): searcher = NevergradSearcher( rule='less', - hparam_spec={}, + hparam_spec={'test': { + 'type': 'discrete', + 'values': [0, 1] + }}, num_trials=100, solver_type='OnePlusOne') @@ -60,56 +59,38 @@ def test_noisy_sphere_function(self): 'upper': 5.0 } } - for solver_type in self.target_solver_types: - searcher = NevergradSearcher( - rule='less', - hparam_spec=hparam_continuous_space, - num_trials=100, - solver_type=solver_type) - for _ in range(100): - hparam = searcher.suggest() - score = self.noisy_sphere_function( - [v for _, v in hparam.items()]) - searcher.record(hparam, score) - # For the noisy sphere function, - # the optimal should be close to x1=0 and x2=0 - best_hparam = searcher.suggest() - self.assertAlmostEqual(best_hparam['x1'], 0.0, places=1) - self.assertAlmostEqual(best_hparam['x2'], 0.0, places=1) + searcher = NevergradSearcher( + rule='less', + hparam_spec=hparam_continuous_space, + num_trials=100, + solver_type='CMA') + for _ in range(100): + hparam = searcher.suggest() + score = self.noisy_sphere_function([v for _, v in hparam.items()]) + searcher.record(hparam, score) + # For the noisy sphere function, + # the optimal should be close to x1=0 and x2=0 + hparam = searcher.suggest() + self.assertAlmostEqual(hparam['x1'], 0.0, delta=1) + self.assertAlmostEqual(hparam['x2'], 0.0, delta=1) def test_one_max_function(self): # Define the discrete search space for OneMax hparam_discrete_space = { - 'x1': { - 'type': 'discrete', - 'values': [0, 1] - }, - 'x2': { - 'type': 'discrete', - 'values': [0, 1] - }, - 'x3': { - 'type': 'discrete', - 'values': [0, 1] - }, - 'x4': { + f'x{i}': { 'type': 'discrete', 'values': [0, 1] } + for i in range(1, 8) } - for solver_type in self.target_solver_types: - searcher = NevergradSearcher( - rule='greater', - hparam_spec=hparam_discrete_space, - num_trials=100, - solver_type=solver_type) - for _ in range(100): - hparam = searcher.suggest() - score = self.one_max_function([v for _, v in hparam.items()]) - searcher.record(hparam, score) - # For the OneMax function, the optimal solution is x1=x2=x3=x4=1 - best_hparam = searcher.suggest() - self.assertEqual(best_hparam['x1'], 1) - self.assertEqual(best_hparam['x2'], 1) - self.assertEqual(best_hparam['x3'], 1) - self.assertEqual(best_hparam['x4'], 1) + searcher = NevergradSearcher( + rule='greater', + hparam_spec=hparam_discrete_space, + num_trials=300, + solver_type='NGO') + for _ in range(300): + hparam = searcher.suggest() + score = self.one_max_function([v for _, v in hparam.items()]) + searcher.record(hparam, score) + hparam = searcher.suggest() + self.assertGreaterEqual(score, 6) diff --git a/tests/test_tune/test_searchers/test_random.py b/tests/test_tune/test_searchers/test_random.py index 2e90bafbf6..6b57843aaa 100644 --- a/tests/test_tune/test_searchers/test_random.py +++ b/tests/test_tune/test_searchers/test_random.py @@ -5,6 +5,7 @@ class TestRandomSearcher(TestCase): + def test_suggest(self): searcher = RandomSearcher( rule='greater', @@ -19,8 +20,8 @@ def test_suggest(self): 'upper': 0.1 } }) - + for _ in range(100): hparam = searcher.suggest() self.assertTrue(hparam['x1'] in [0.01, 0.02, 0.03]) - self.assertTrue(hparam['x2'] >= 0.01 and hparam['x2'] <= 0.1) \ No newline at end of file + self.assertTrue(hparam['x2'] >= 0.01 and hparam['x2'] <= 0.1) diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py index e40eb5e24f..f718ed9360 100644 --- a/tests/test_tune/test_tuner.py +++ b/tests/test_tune/test_tuner.py @@ -42,6 +42,7 @@ def test_init(self): hparam_spec=dict(), monitor='loss', rule='invalid_rule', + num_trials=2, searcher_cfg=dict(type='ToySearcher')) # Initializing with correct parameters @@ -206,7 +207,7 @@ def test_clear(self): num_trials=2, searcher_cfg=dict(type='ToySearcher')) - tuner.history.append({'optim_wrapper.optimizer.lr': 0.1}, 0.05) + tuner.history.append(({'optim_wrapper.optimizer.lr': 0.1}, 0.05)) tuner.clear() self.assertEqual(tuner.history, []) From 1e122115a1a0b00e00c85ff1234d00cc380c30b2 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Thu, 31 Aug 2023 09:50:31 +0900 Subject: [PATCH 29/41] Fix tuner unittest --- tests/test_tune/test_tuner.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py index f718ed9360..96cc155f40 100644 --- a/tests/test_tune/test_tuner.py +++ b/tests/test_tune/test_tuner.py @@ -84,7 +84,7 @@ def test_inject_config(self): # Inject into a non-existent key cfg = {} - with self.assertRaises(AssertionError): + with self.assertRaises(KeyError): Tuner.inject_config(cfg, 'a', 1) # Inject into a sequence @@ -146,7 +146,7 @@ def test_run_with_exception(self, mock_report_score, mock_train): @mock.patch('mmengine.runner.Runner.train') @mock.patch('mmengine.tune._report_hook.ReportingHook.report_score') - def test_tune_method(self, mock_report_score, mock_train): + def test_tune(self, mock_report_score, mock_train): mock_scores = [0.05, 0.03, 0.04, 0.06] mock_hparams = [{ 'optim_wrapper.optimizer.lr': 0.1 @@ -182,15 +182,24 @@ def test_tune_method(self, mock_report_score, mock_train): result = tuner.tune() self.assertEqual(tuner._history, [(mock_hparams[0], mock_scores[0]), + (mock_hparams[1], mock_scores[1]), (mock_hparams[2], mock_scores[2]), (mock_hparams[3], mock_scores[3])]) self.assertEqual(result, { - 'hparam': mock_hparams[2], - 'score': mock_scores[2] + 'hparam': mock_hparams[1], + 'score': mock_scores[1] }) - tuner.rule = 'greater' + tuner = Tuner( + runner_cfg=self.epoch_based_cfg, + hparam_spec=self.hparam_spec, + monitor='loss', + rule='greater', + num_trials=4, + searcher_cfg=dict(type='ToySearcher')) + tuner._run_trial = mock.MagicMock( + side_effect=mock_run_trial_return_values) with self.mock_is_main_process(), self.mock_broadcast(): result = tuner.tune() self.assertEqual(result, { @@ -215,7 +224,7 @@ def test_with_runner(self): tuner = Tuner( runner_cfg=self.epoch_based_cfg, hparam_spec=self.hparam_spec, - monitor='test/acc', + monitor='acc', rule='greater', num_trials=10, searcher_cfg=dict(type='ToySearcher')) @@ -226,5 +235,6 @@ def test_with_runner(self): self.assertTrue({ hparam['optim_wrapper.optimizer.lr'] for hparam, _ in tuner.history - } in self.hparam_spec['optim_wrapper.optimizer.lr']['values']) + }.issubset( + set(self.hparam_spec['optim_wrapper.optimizer.lr']['values']))) self.assertEqual(result['score'], 1) From c4a7e04664e2cc314d8ffeb4b45d04d1b0bf4d3f Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Thu, 31 Aug 2023 11:33:10 +0900 Subject: [PATCH 30/41] Add tuning interface for runner --- examples/tune/find_lr.py | 55 +++++++++++-------------------------- mmengine/runner/runner.py | 55 +++++++++++++++++++++++++++++++++++++ mmengine/tune/tuner.py | 57 ++++++++++++++++++++++----------------- 3 files changed, 102 insertions(+), 65 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 3386d017a9..520a672348 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -1,16 +1,14 @@ import argparse import tempfile -from typing import Dict, Optional, Union import torch import torch.nn as nn from torch.utils.data import Dataset -from mmengine.config import Config, ConfigDict from mmengine.evaluator import BaseMetric from mmengine.model import BaseModel from mmengine.registry import DATASETS, METRICS, MODELS -from mmengine.tune import Tuner +from mmengine.runner import Runner class ToyModel(BaseModel): @@ -86,39 +84,6 @@ def parse_args(): return args -def find_optimial_lr( - runner_cfg: Union[Dict, Config, ConfigDict], - monitor: str = 'loss', - rule: str = 'less', - num_trials: int = 32, - lower_lr: Optional[float] = 1e-5, - upper_lr: Optional[float] = 1e-3, - tuning_iter: int = 0, - tuning_epoch: int = 0, - report_op: str = 'latest', - searcher_cfg: Dict = dict(type='NevergradSearcher'), -) -> Dict[str, Union[dict, float]]: - hparam_spec = { - 'optim_wrapper.optimizer.lr': { - 'type': 'continuous', - 'lower': lower_lr, - 'upper': upper_lr - } - } - - tuner = Tuner( - runner_cfg, - hparam_spec=hparam_spec, - monitor=monitor, - rule=rule, - num_trials=num_trials, - tuning_iter=tuning_iter, - tuning_epoch=tuning_epoch, - report_op=report_op, - searcher_cfg=searcher_cfg) - return tuner.tune() - - def main(): args = parse_args() @@ -158,13 +123,23 @@ def main(): env_cfg=dict(dist_cfg=dict(backend='nccl')), experiment_name='test1') - result = find_optimial_lr( + runner = Runner.from_tuning( runner_cfg=runner_cfg, + hparam_spec={ + 'optim_wrapper.optimizer.lr': { + 'type': 'continuous', + 'lower': 1e-5, + 'upper': 1e-1 + } + }, + monitor='loss', + rule='less', num_trials=32, - tuning_epoch=3, + tuning_epoch=2, + searcher_cfg=dict(type='NevergradSearcher'), ) - print('best_lr: ', result.get('hparam')) - print('lowest_loss: ', result.get('score')) + runner.train() + temp_dir.cleanup() diff --git a/mmengine/runner/runner.py b/mmengine/runner/runner.py index 12830cf4ad..2b0b1a5249 100644 --- a/mmengine/runner/runner.py +++ b/mmengine/runner/runner.py @@ -37,6 +37,7 @@ HOOKS, LOG_PROCESSORS, LOOPS, MODEL_WRAPPERS, MODELS, OPTIM_WRAPPERS, PARAM_SCHEDULERS, RUNNERS, VISUALIZERS, DefaultScope) +from mmengine.tune import Tuner from mmengine.utils import apply_to, digit_version, get_git_hash, is_seq_of from mmengine.utils.dl_utils import (TORCH_VERSION, collect_env, set_multi_processing) @@ -474,6 +475,60 @@ def from_cfg(cls, cfg: ConfigType) -> 'Runner': return runner + @classmethod + def from_tuning( + cls, + runner_cfg: ConfigType, + hparam_spec: Dict, + monitor: str, + rule: str, + num_trials: int, + tuning_iter: Optional[int] = None, + tuning_epoch: Optional[int] = None, + report_op: str = 'latest', + searcher_cfg: Dict = dict(type='RandomSearcher') + ) -> 'Runner': + """Build a runner from tuning. + + Args: + runner_cfg (ConfigType): A config used for building runner. Keys of + ``runner_cfg`` can see :meth:`__init__`. + hparam_spec (Dict): A dict of hyper parameters to be tuned. + monitor (str): The metric name to be monitored. + rule (Dict): The rule to measure the best metric. + num_trials (int): The maximum number of trials for tuning. + tuning_iter (Optional[int]): The maximum iterations for each trial. + If specified, tuning stops after reaching this limit. + Default is None, indicating no specific iteration limit. + tuning_epoch (Optional[int]): The maximum epochs for each trial. + If specified, tuning stops after reaching this number + of epochs. Default is None, indicating no epoch limit. + report_op (str): + Operation mode for metric reporting. Default is 'latest'. + searcher_cfg (Dict): Configuration for the searcher. + Default is `dict(type='RandomSearcher')`. + + Returns: + Runner: A runner build from ``runner_cfg`` tuned by trials. + """ + + runner_cfg = copy.deepcopy(runner_cfg) + tuner = Tuner( + runner_cfg=runner_cfg, + hparam_spec=hparam_spec, + monitor=monitor, + rule=rule, + num_trials=num_trials, + tuning_iter=tuning_iter, + tuning_epoch=tuning_epoch, + report_op=report_op, + searcher_cfg=searcher_cfg) + hparam = tuner.tune()['hparam'] + assert isinstance(hparam, dict), 'hparam should be a dict' + for k, v in hparam.items(): + Tuner.inject_config(runner_cfg, k, v) + return cls.from_cfg(runner_cfg) + @property def experiment_name(self): """str: Name of experiment.""" diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index b5d7c84f6c..9b2fa558b5 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -10,6 +10,8 @@ from ._report_hook import ReportingHook from .searchers import HYPER_SEARCHERS, Searcher +ConfigType = Union[Dict, Config, ConfigDict] + class Tuner: """A helper for hyperparameter tuning. @@ -32,7 +34,7 @@ class Tuner: the peak performance are returned. Args: - runner_cfg (Union[Dict, Config, ConfigDict]): + runner_cfg (ConfigType): Configuration for the runner. hparam_spec (Dict[str, Dict]): The hyperparameter search space definition. @@ -48,8 +50,8 @@ class Tuner: Default is None, indicating no epoch limit. report_op (str): Operation mode for metric reporting. Default is 'latest'. - searcher_cfg (Dict): Configuration for the searcher. - Default is `dict(type='NevergradSearcher')`. + searcher_cfg (ConfigType): Configuration for the searcher. + Default is `dict(type='RandomSearcher')`. Note: The black-box optimization depends on external packages, @@ -80,7 +82,7 @@ class Tuner: rules_supported = ['greater', 'less'] def __init__(self, - runner_cfg: Union[Dict, Config, ConfigDict], + runner_cfg: ConfigType, hparam_spec: Dict[str, Dict], monitor: str, rule: str, @@ -88,7 +90,7 @@ def __init__(self, tuning_iter: Optional[int] = None, tuning_epoch: Optional[int] = None, report_op: str = 'latest', - searcher_cfg: Dict = dict(type='RandomSearcher')): + searcher_cfg: ConfigType = dict(type='RandomSearcher')): self._runner_cfg = runner_cfg.copy() self._hparam_spec = hparam_spec @@ -174,37 +176,41 @@ def searcher(self) -> Searcher: return self._searcher @staticmethod - def inject_config(cfg: Dict, key: str, value: Any): + def inject_config(cfg: ConfigType, key: str, value: Any): """Inject a value into a config. The name can be multi-level, like 'optimizer.lr'. Args: - cfg (Dict): The config to be injected. + cfg (ConfigType): The config to be injected. key (str): The key of the value to be injected. value (Any): The value to be injected. """ keys = key.split('.') for k in keys[:-1]: if isinstance(cfg, list): - k = int(k) - if k >= len(cfg) or k < 0: - raise KeyError(f'Index {k} is out of range in {cfg}') - elif k not in cfg: - raise KeyError(f"Key '{k}' not found in {cfg}") - cfg = cfg[k] + idx = int(k) + if idx >= len(cfg) or idx < 0: + raise KeyError(f'Index {idx} is out of range in {cfg}') + cfg = cfg[idx] + else: + if k not in cfg: + raise KeyError(f"Key '{k}' not found in {cfg}") + cfg = cfg[k] + if isinstance(cfg, list): - k = int(keys[-1]) - if k >= len(cfg) or k < 0: - raise KeyError(f'Index {k} is out of range in {cfg}') - cfg[k] = value - elif keys[-1] not in cfg: - raise KeyError(f"Key '{keys[-1]}' not found in {cfg}") + idx = int(keys[-1]) + if idx >= len(cfg) or idx < 0: + raise KeyError(f'Index {idx} is out of range in {cfg}') + cfg[idx] = value else: - cfg[keys[-1]] = value + if keys[-1] not in cfg: + raise KeyError(f"Key '{keys[-1]}' not found in {cfg}") + else: + cfg[keys[-1]] = value return - def _build_searcher(self, searcher_cfg: Dict) -> Searcher: + def _build_searcher(self, searcher_cfg: ConfigType) -> Searcher: """Build searcher from searcher_cfg. An Example of ``searcher_cfg``:: @@ -215,7 +221,7 @@ def _build_searcher(self, searcher_cfg: Dict) -> Searcher: ) Args: - searcher_cfg (Dict): The searcher config. + searcher_cfg (ConfigType): The searcher config. """ searcher_cfg = searcher_cfg.copy() self._logger.info(f'Building searcher of type: {searcher_cfg["type"]}') @@ -272,12 +278,13 @@ def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: self._searcher.record(hparam, score) return hparam, score, error - def tune(self) -> Dict[str, Union[dict, float]]: + def tune(self) -> Dict[str, Union[Dict[str, Any], float]]: """Launch tuning. Returns: - Dict[str, Union[dict, float]]: - The best hyperparameters and the corresponding score. + Dict[str, Union[Dict[str, Any], float]]: + A dictionary containing the best hyperparameters under the key + 'hparam' and the corresponding score under the key 'score'. """ self._logger.info(f'Starting tuning for {self._num_trials} trials...') for trail_idx in range(self._num_trials): From 4d71002539c2f4d04a6a79ca7dce40a7198d6889 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Thu, 31 Aug 2023 13:22:32 +0900 Subject: [PATCH 31/41] Fix minor --- examples/tune/README.md | 2 +- examples/tune/find_lr.py | 6 +++--- mmengine/tune/tuner.py | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/tune/README.md b/examples/tune/README.md index 5df70f725a..d94aa479b3 100644 --- a/examples/tune/README.md +++ b/examples/tune/README.md @@ -19,5 +19,5 @@ python examples/tune/find_lr.py Distributed data parallel tuning ```bash -torchrun -nnodes 1 -nproc_per_node 8 examples/tune/find_lr.py --launcher pytorch +torchrun --nnodes 1 --nproc_per_node 8 examples/tune/find_lr.py --launcher pytorch ``` diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 520a672348..3dce3b61e8 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -40,7 +40,7 @@ def forward(self, inputs, data_samples=None, mode='tensor'): class ToyDataset(Dataset): METAINFO = dict() # type: ignore - num_samples = 1000 + num_samples = 100 data = torch.rand(num_samples, 2) * 10 label = 3 * data[:, 0] + 4 * data[:, 1] + torch.randn(num_samples) * 0.1 @@ -129,12 +129,12 @@ def main(): 'optim_wrapper.optimizer.lr': { 'type': 'continuous', 'lower': 1e-5, - 'upper': 1e-1 + 'upper': 1e-3 } }, monitor='loss', rule='less', - num_trials=32, + num_trials=16, tuning_epoch=2, searcher_cfg=dict(type='NevergradSearcher'), ) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 9b2fa558b5..ad565d2784 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -6,7 +6,6 @@ from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, is_main_process) from mmengine.logging import MMLogger -from mmengine.runner import Runner from ._report_hook import ReportingHook from .searchers import HYPER_SEARCHERS, Searcher @@ -241,6 +240,8 @@ def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: # 2. Once retrieved, the hyperparameters are broadcasted to all other # processes ensuring every process has the same set of # hyperparameters for this trial. + from mmengine.runner import Runner + if is_main_process(): hparams_to_broadcast = [self._searcher.suggest()] else: From cfc3f6adea849971fed739cf6caaac888d69f4b8 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:37:20 +0900 Subject: [PATCH 32/41] Refactor report op --- mmengine/tune/_report_hook.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index a58d9a2431..be3d74e0b3 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Optional, Sequence, Union +from typing import Callable, Dict, List, Optional, Sequence, Union from mmengine.hooks import Hook @@ -19,12 +19,17 @@ class ReportingHook(Hook): tuning_epoch (int, optional): The epoch limit to stop tuning. Defaults to None. report_op (str, optional): The operation to report the score. - Options are 'latest', 'mean'. Defaults to 'latest'. + Options are 'latest', 'mean', 'min', 'max'. Defaults to 'latest'. max_scoreboard_len (int, optional): The maximum length of the scoreboard. """ - report_op_supported = ['latest', 'mean'] + report_op_supported: Dict[str, Callable[[List[float]], float]] = { + 'latest': lambda x: x[-1], + 'mean': lambda x: sum(x) / len(x), + 'max': max, + 'min': min + } def __init__(self, monitor: str, @@ -124,12 +129,22 @@ def report_score(self) -> Optional[float]: """ if not self.scoreboard: score = None - elif self.report_op == 'latest': - score = self.scoreboard[-1] - else: - score = sum(self.scoreboard) / len(self.scoreboard) + operation = self.report_op_supported[self.report_op] + score = operation(self.scoreboard) return score + @classmethod + def register_report_op(cls, name: str, func: Callable[[List[float]], + float]): + """Register a new report operation. + + Args: + name (str): The name of the report operation. + func (Callable[[List[float]], float]): The function to aggregate + the scores. + """ + cls.report_op_supported[name] = func + def clear(self): """Clear the scoreboard.""" self.scoreboard.clear() From 3488ae1606fe5f6b07ce1a918f8b8bd7470040a0 Mon Sep 17 00:00:00 2001 From: Younghwan Na <100389977+yhna940@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:44:09 +0900 Subject: [PATCH 33/41] Fix report bug --- mmengine/tune/_report_hook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index be3d74e0b3..1ec52a3d05 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -129,8 +129,9 @@ def report_score(self) -> Optional[float]: """ if not self.scoreboard: score = None - operation = self.report_op_supported[self.report_op] - score = operation(self.scoreboard) + else: + operation = self.report_op_supported[self.report_op] + score = operation(self.scoreboard) return score @classmethod From c0d8e453b8bca3e858aa26f921423399992de0c4 Mon Sep 17 00:00:00 2001 From: YH Date: Sat, 9 Sep 2023 11:12:08 +0900 Subject: [PATCH 34/41] Update mmengine/tune/_report_hook.py Co-authored-by: Mashiro <57566630+HAOCHENYE@users.noreply.github.com> --- mmengine/tune/_report_hook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index 1ec52a3d05..21d7805343 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -37,8 +37,8 @@ def __init__(self, tuning_epoch: Optional[int] = None, report_op: str = 'latest', max_scoreboard_len: int = 1024): - assert report_op in self.report_op_supported, \ - f'report_op {report_op} is not supported' + if report_op not in self.report_op_supported: + raise ValueError(f'report_op {report_op} is not supported') if tuning_iter is not None and tuning_epoch is not None: raise ValueError( 'tuning_iter and tuning_epoch cannot be set at the same time') From 27bb08bd9bdcb304ba68118a6efb452516f72a8e Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 12:10:20 +0900 Subject: [PATCH 35/41] Fix comment for report hook --- mmengine/tune/_report_hook.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index 21d7805343..0ce898abc7 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -8,9 +8,11 @@ class ReportingHook(Hook): - """Auxiliary hook to report the score to tuner. - - If tuning limit is specified, this hook will mark the loop to stop. + """Auxiliary hook to report the score to tuner. The ReportingHook maintains + a "scoreboard" which keeps track of the monitored metrics' scores during + the training process. The scores are aggregated based on the method + specified by the 'report_op' parameter. If tuning limit is specified, this + hook will mark the loop to stop. Args: monitor (str): The monitored metric key to report. @@ -18,10 +20,17 @@ class ReportingHook(Hook): Defaults to None. tuning_epoch (int, optional): The epoch limit to stop tuning. Defaults to None. - report_op (str, optional): The operation to report the score. - Options are 'latest', 'mean', 'min', 'max'. Defaults to 'latest'. + report_op (str, optional): The method for aggregating scores + in the scoreboard. Accepts the following options: + - 'latest': Returns the most recent score in the scoreboard. + - 'mean': Returns the mean of all scores in the scoreboard. + - 'max': Returns the highest score in the scoreboard. + - 'min': Returns the lowest score in the scoreboard. + Defaults to 'latest'. max_scoreboard_len (int, optional): - The maximum length of the scoreboard. + Specifies the maximum number of scores that can be retained + on the scoreboard, helping to manage memory and computational + overhead. Defaults to 1024. """ report_op_supported: Dict[str, Callable[[List[float]], float]] = { @@ -38,7 +47,7 @@ def __init__(self, report_op: str = 'latest', max_scoreboard_len: int = 1024): if report_op not in self.report_op_supported: - raise ValueError(f'report_op {report_op} is not supported') + raise ValueError(f'report_op {report_op} is not supported') if tuning_iter is not None and tuning_epoch is not None: raise ValueError( 'tuning_iter and tuning_epoch cannot be set at the same time') From afb5af20c1b529199cf36625ea4d5e24164621a2 Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 17:14:59 +0900 Subject: [PATCH 36/41] Specify phase in monitor --- examples/tune/find_lr.py | 2 +- mmengine/tune/_report_hook.py | 35 ++++++++++++++++++++--------- tests/test_tune/test_report_hook.py | 26 ++++++++++----------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/examples/tune/find_lr.py b/examples/tune/find_lr.py index 3dce3b61e8..3c4f3fa92d 100644 --- a/examples/tune/find_lr.py +++ b/examples/tune/find_lr.py @@ -132,7 +132,7 @@ def main(): 'upper': 1e-3 } }, - monitor='loss', + monitor='train/loss', rule='less', num_trials=16, tuning_epoch=2, diff --git a/mmengine/tune/_report_hook.py b/mmengine/tune/_report_hook.py index 0ce898abc7..e0fdde168c 100644 --- a/mmengine/tune/_report_hook.py +++ b/mmengine/tune/_report_hook.py @@ -15,7 +15,14 @@ class ReportingHook(Hook): hook will mark the loop to stop. Args: - monitor (str): The monitored metric key to report. + monitor (str): The monitored metric key prefixed with either 'train/' + or 'val/' to indicate the specific phase where the metric should + be monitored. For instance, 'train/loss' will monitor the 'loss' + metric during the training phase, and 'val/accuracy' will monitor + the 'accuracy' metric during the validation phase. + The actual metric key (i.e., the part following the prefix) + should correspond to a key in the logs produced during + training or validation. tuning_iter (int, optional): The iteration limit to stop tuning. Defaults to None. tuning_epoch (int, optional): The epoch limit to stop tuning. @@ -46,16 +53,19 @@ def __init__(self, tuning_epoch: Optional[int] = None, report_op: str = 'latest', max_scoreboard_len: int = 1024): + if not monitor.startswith('train/') and not monitor.startswith('val/'): + raise ValueError("The 'monitor' parameter should start " + "with 'train/' or 'val/' to specify the phase.") if report_op not in self.report_op_supported: raise ValueError(f'report_op {report_op} is not supported') if tuning_iter is not None and tuning_epoch is not None: raise ValueError( 'tuning_iter and tuning_epoch cannot be set at the same time') + self.monitor_prefix, self.monitor_metric = monitor.split('/', 1) self.report_op = report_op self.tuning_iter = tuning_iter self.tuning_epoch = tuning_epoch - self.monitor = monitor self.max_scoreboard_len = max_scoreboard_len self.scoreboard: List[float] = [] @@ -94,12 +104,15 @@ def after_train_iter(self, data_batch (dict or tuple or list, optional): Data from dataloader. outputs (dict, optional): Outputs from model. """ - + if self.monitor_prefix != 'train': + return tag, _ = runner.log_processor.get_log_after_iter( runner, batch_idx, 'train') - score = tag.get(self.monitor, None) - if score is not None: - self._append_score(score) + score = tag.get(self.monitor_metric) + if not isinstance(score, (int, float)): + raise ValueError(f"The monitored value '{self.monitor_metric}' " + 'should be a number.') + self._append_score(score) if self._should_stop(runner): runner.train_loop.stop_training = True @@ -124,11 +137,13 @@ def after_val_epoch(self, metrics on validation dataset. The keys are the names of the metrics, and the values are corresponding results. """ - if metrics is None: + if self.monitor_prefix != 'val' or metrics is None: return - score = metrics.get(self.monitor, None) - if score is not None: - self._append_score(score) + score = metrics.get(self.monitor_metric) + if not isinstance(score, (int, float)): + raise ValueError(f"The monitored value '{self.monitor_metric}' " + 'should be a number.') + self._append_score(score) def report_score(self) -> Optional[float]: """Aggregate the scores in the scoreboard. diff --git a/tests/test_tune/test_report_hook.py b/tests/test_tune/test_report_hook.py index 4e271bdfd4..e210b5be8d 100644 --- a/tests/test_tune/test_report_hook.py +++ b/tests/test_tune/test_report_hook.py @@ -8,7 +8,7 @@ class TestReportingHook(RunnerTestCase): def test_append_score(self): - hook = ReportingHook(monitor='acc', max_scoreboard_len=3) + hook = ReportingHook(monitor='train/acc', max_scoreboard_len=3) # Adding scores to the scoreboard hook._append_score(0.5) @@ -24,32 +24,32 @@ def test_should_stop(self): runner = MagicMock(iter=3, epoch=1) # Test with tuning_iter - hook1 = ReportingHook(monitor='acc', tuning_iter=5) + hook1 = ReportingHook(monitor='train/cc', tuning_iter=5) self.assertFalse(hook1._should_stop(runner)) runner.iter = 4 self.assertTrue(hook1._should_stop(runner)) # Test with tuning_epoch - hook2 = ReportingHook(monitor='acc', tuning_epoch=3) + hook2 = ReportingHook(monitor='train/acc', tuning_epoch=3) self.assertFalse(hook2._should_stop(runner)) runner.epoch = 2 self.assertTrue(hook2._should_stop(runner)) def test_report_score(self): - hook1 = ReportingHook(monitor='acc', report_op='latest') + hook1 = ReportingHook(monitor='train/acc', report_op='latest') hook1.scoreboard = [0.5, 0.6, 0.7] self.assertEqual(hook1.report_score(), 0.7) - hook2 = ReportingHook(monitor='acc', report_op='mean') + hook2 = ReportingHook(monitor='train/acc', report_op='mean') hook2.scoreboard = [0.5, 0.6, 0.7] self.assertEqual(hook2.report_score(), 0.6) # Test with an empty scoreboard - hook3 = ReportingHook(monitor='acc', report_op='mean') + hook3 = ReportingHook(monitor='train/acc', report_op='mean') self.assertIsNone(hook3.report_score()) def test_clear(self): - hook = ReportingHook(monitor='acc') + hook = ReportingHook(monitor='train/acc') hook.scoreboard = [0.5, 0.6, 0.7] hook.clear() self.assertEqual(hook.scoreboard, []) @@ -62,18 +62,18 @@ def test_after_train_iter(self): }, 'log_str')) # Check if the monitored score gets appended correctly - hook = ReportingHook(monitor='acc') + hook = ReportingHook(monitor='train/acc') hook.after_train_iter(runner, 0) self.assertEqual(hook.scoreboard[-1], 0.9) # Check if no score is appended for a non-existent metric - hook2 = ReportingHook(monitor='non_existent') + hook2 = ReportingHook(monitor='train/non_existent') hook2.after_train_iter(runner, 0) self.assertEqual(len(hook2.scoreboard), 0) # Check that training stops if tuning_iter is reached runner.iter = 5 - hook3 = ReportingHook(monitor='acc', tuning_iter=5) + hook3 = ReportingHook(monitor='train/acc', tuning_iter=5) hook3.after_train_iter(runner, 0) self.assertTrue(runner.train_loop.stop_training) @@ -82,19 +82,19 @@ def test_after_val_epoch(self): # Check if the monitored score gets appended correctly from metrics metrics = {'acc': 0.9} - hook = ReportingHook(monitor='acc') + hook = ReportingHook(monitor='val/acc') hook.after_val_epoch(runner, metrics=metrics) self.assertEqual(hook.scoreboard[-1], 0.9) # Check that no score is appended if the metric is missing from metrics metrics = {'loss': 0.1} - hook2 = ReportingHook(monitor='acc') + hook2 = ReportingHook(monitor='val/acc') hook2.after_val_epoch(runner, metrics=metrics) self.assertEqual(len(hook2.scoreboard), 0) def test_with_runner(self): runner = self.build_runner(self.epoch_based_cfg) - acc_hook = ReportingHook(monitor='acc', tuning_epoch=1) + acc_hook = ReportingHook(monitor='val/acc', tuning_epoch=1) runner.register_hook(acc_hook, priority='VERY_LOW') runner.train() self.assertEqual(runner.epoch, 1) From 0cdf020538c9cac34c78cd345e9727f4dcf04dbb Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 17:24:01 +0900 Subject: [PATCH 37/41] Fix comment on tuner for monitor --- mmengine/tune/tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index ad565d2784..778f2c5d97 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -70,7 +70,7 @@ class Tuner: >>> tuner = Tuner( >>> runner_cfg, >>> hparam_spec=hparam_spec, - >>> monitor='loss', + >>> monitor='train/loss', >>> rule='less', >>> num_trials=32, >>> ) From 48e2abc66d95140c71fbd2f211f21d541f678112 Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 21:26:22 +0900 Subject: [PATCH 38/41] Apply reduce operation to score in trial --- mmengine/tune/tuner.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 778f2c5d97..9bd06c8658 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -2,9 +2,11 @@ import math from typing import Any, Dict, List, Optional, Tuple, Union +import torch + from mmengine.config import Config, ConfigDict -from mmengine.dist import (broadcast_object_list, init_dist, is_distributed, - is_main_process) +from mmengine.dist import (all_reduce, broadcast_object_list, init_dist, + is_distributed, is_main_process) from mmengine.logging import MMLogger from ._report_hook import ReportingHook from .searchers import HYPER_SEARCHERS, Searcher @@ -261,20 +263,22 @@ def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: # Run a trial. # If an exception occurs during the trial, the score is set # to default_score. - error = None + score: float + error: Optional[Exception] = None try: runner.train() score = report_hook.report_score() if score is None or math.isnan(score) or math.isinf(score): score = default_score - scores_to_broadcast = [score] except Exception as e: - scores_to_broadcast = [default_score] + score = default_score error = e - # Store the score between processes. - broadcast_object_list(scores_to_broadcast, src=0) - score = scores_to_broadcast[0] + # Synchronize and average scores across all processes + score_tensor = torch.tensor(score) + all_reduce(score_tensor, op='mean') + score = score_tensor.item() + if is_main_process(): self._searcher.record(hparam, score) return hparam, score, error From eb6b38726961fca9c20ca840bd0d55a5465016bc Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 21:37:57 +0900 Subject: [PATCH 39/41] Fix comment on tuner --- mmengine/tune/tuner.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index 9bd06c8658..fa1581363f 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -191,7 +191,7 @@ def inject_config(cfg: ConfigType, key: str, value: Any): for k in keys[:-1]: if isinstance(cfg, list): idx = int(k) - if idx >= len(cfg) or idx < 0: + if idx >= len(cfg): raise KeyError(f'Index {idx} is out of range in {cfg}') cfg = cfg[idx] else: @@ -201,7 +201,7 @@ def inject_config(cfg: ConfigType, key: str, value: Any): if isinstance(cfg, list): idx = int(keys[-1]) - if idx >= len(cfg) or idx < 0: + if idx >= len(cfg): raise KeyError(f'Index {idx} is out of range in {cfg}') cfg[idx] = value else: @@ -235,15 +235,17 @@ def _build_searcher(self, searcher_cfg: ConfigType) -> Searcher: def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: """Retrieve hyperparameters from searcher and run a trial.""" - - # Retrieve hyperparameters for the trial: - # 1. Only the main process executes the searcher to avoid any conflicts - # and ensure integrity. - # 2. Once retrieved, the hyperparameters are broadcasted to all other - # processes ensuring every process has the same set of - # hyperparameters for this trial. from mmengine.runner import Runner + # Retrieve hyperparameters for the trial: + # Only the main process invokes the searcher's suggest method + # to mitigate the potential randomness that might occur in methods + # like Bayesian optimization or evolutionary algorithms. + # These methods might introduce randomness in the selection of + # hyperparameters, potentially leading to inconsistent suggestions + # across different processes. By centralizing the suggestion + # to the main process, we ensure a consistent set of hyperparameters + # is used for each trial. if is_main_process(): hparams_to_broadcast = [self._searcher.suggest()] else: From 16d5186784303d4aab14c6ece4783b7451ff50d1 Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 22:30:08 +0900 Subject: [PATCH 40/41] Enhance safe trial during tune --- mmengine/tune/tuner.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index fa1581363f..bbababd387 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import math +import tempfile from typing import Any, Dict, List, Optional, Tuple, Union import torch @@ -7,7 +8,9 @@ from mmengine.config import Config, ConfigDict from mmengine.dist import (all_reduce, broadcast_object_list, init_dist, is_distributed, is_main_process) -from mmengine.logging import MMLogger +from mmengine.logging import MessageHub, MMLogger +from mmengine.registry import DefaultScope +from mmengine.visualization import Visualizer from ._report_hook import ReportingHook from .searchers import HYPER_SEARCHERS, Searcher @@ -233,6 +236,21 @@ def _build_searcher(self, searcher_cfg: ConfigType) -> Searcher: num_trials=self._num_trials)) return HYPER_SEARCHERS.build(searcher_cfg) + def _tear_down_runner(self, runner): + """Clear the global states of a runner.""" + + # Set the runner's cls attributes to None + runner.cfg = None + runner._train_loop = None + runner._val_loop = None + runner._test_loop = None + + # Remove the instance managed by the ManagerMixin + MMLogger._instance_dict.pop(runner.logger.instance_name) + MessageHub._instance_dict.pop(runner.message_hub.instance_name) + Visualizer._instance_dict.pop(runner.visualizer.instance_name) + DefaultScope._instance_dict.pop(runner.default_scope.instance_name) + def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: """Retrieve hyperparameters from searcher and run a trial.""" from mmengine.runner import Runner @@ -269,12 +287,14 @@ def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: error: Optional[Exception] = None try: runner.train() - score = report_hook.report_score() + score = report_hook.report_score() # type: ignore if score is None or math.isnan(score) or math.isinf(score): score = default_score except Exception as e: score = default_score error = e + finally: + self._tear_down_runner(runner) # Synchronize and average scores across all processes score_tensor = torch.tensor(score) @@ -293,6 +313,8 @@ def tune(self) -> Dict[str, Union[Dict[str, Any], float]]: A dictionary containing the best hyperparameters under the key 'hparam' and the corresponding score under the key 'score'. """ + temp_dir = tempfile.TemporaryDirectory() + self._runner_cfg['work_dir'] = temp_dir.name self._logger.info(f'Starting tuning for {self._num_trials} trials...') for trail_idx in range(self._num_trials): hparam, score, error = self._run_trial() @@ -314,6 +336,7 @@ def tune(self) -> Dict[str, Union[Dict[str, Any], float]]: self._logger.info(f'Best hyperparameters obtained: {best_hparam}') self._logger.info(f'Best score obtained: {best_score}') self._logger.info('Tuning completed.') + temp_dir.cleanup() return dict(hparam=best_hparam, score=best_score) def clear(self): From 8f5ee32fc7f07b2cf031b65121d52f2dfc231745 Mon Sep 17 00:00:00 2001 From: yhna940 Date: Sat, 9 Sep 2023 23:02:47 +0900 Subject: [PATCH 41/41] Fix unittest bug --- mmengine/tune/tuner.py | 2 +- tests/test_tune/test_report_hook.py | 12 ++++++------ tests/test_tune/test_tuner.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mmengine/tune/tuner.py b/mmengine/tune/tuner.py index bbababd387..177aab265c 100644 --- a/mmengine/tune/tuner.py +++ b/mmengine/tune/tuner.py @@ -297,7 +297,7 @@ def _run_trial(self) -> Tuple[Dict, float, Optional[Exception]]: self._tear_down_runner(runner) # Synchronize and average scores across all processes - score_tensor = torch.tensor(score) + score_tensor = torch.tensor(score, dtype=torch.float64) all_reduce(score_tensor, op='mean') score = score_tensor.item() diff --git a/tests/test_tune/test_report_hook.py b/tests/test_tune/test_report_hook.py index e210b5be8d..c9fefdef0e 100644 --- a/tests/test_tune/test_report_hook.py +++ b/tests/test_tune/test_report_hook.py @@ -66,10 +66,10 @@ def test_after_train_iter(self): hook.after_train_iter(runner, 0) self.assertEqual(hook.scoreboard[-1], 0.9) - # Check if no score is appended for a non-existent metric + # Check the error raised when the monitored score is missing from logs hook2 = ReportingHook(monitor='train/non_existent') - hook2.after_train_iter(runner, 0) - self.assertEqual(len(hook2.scoreboard), 0) + with self.assertRaises(ValueError): + hook2.after_train_iter(runner, 0) # Check that training stops if tuning_iter is reached runner.iter = 5 @@ -86,11 +86,11 @@ def test_after_val_epoch(self): hook.after_val_epoch(runner, metrics=metrics) self.assertEqual(hook.scoreboard[-1], 0.9) - # Check that no score is appended if the metric is missing from metrics + # Check the error raised when the monitored score is missing from logs metrics = {'loss': 0.1} hook2 = ReportingHook(monitor='val/acc') - hook2.after_val_epoch(runner, metrics=metrics) - self.assertEqual(len(hook2.scoreboard), 0) + with self.assertRaises(ValueError): + hook2.after_val_epoch(runner, metrics=metrics) def test_with_runner(self): runner = self.build_runner(self.epoch_based_cfg) diff --git a/tests/test_tune/test_tuner.py b/tests/test_tune/test_tuner.py index 96cc155f40..1af905976e 100644 --- a/tests/test_tune/test_tuner.py +++ b/tests/test_tune/test_tuner.py @@ -98,7 +98,7 @@ def test_successful_run(self, mock_report_score, mock_train): tuner = Tuner( runner_cfg=self.epoch_based_cfg, hparam_spec=self.hparam_spec, - monitor='loss', + monitor='train/loss', rule='less', num_trials=2, searcher_cfg=dict(type='ToySearcher')) @@ -126,7 +126,7 @@ def test_run_with_exception(self, mock_report_score, mock_train): tuner = Tuner( runner_cfg=self.epoch_based_cfg, hparam_spec=self.hparam_spec, - monitor='loss', + monitor='train/loss', rule='less', num_trials=2, searcher_cfg=dict(type='ToySearcher')) @@ -224,7 +224,7 @@ def test_with_runner(self): tuner = Tuner( runner_cfg=self.epoch_based_cfg, hparam_spec=self.hparam_spec, - monitor='acc', + monitor='val/acc', rule='greater', num_trials=10, searcher_cfg=dict(type='ToySearcher'))