From 2e5eed958defe280de6b21c6f7310a375f848386 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Fri, 11 Jun 2021 19:28:32 -0700 Subject: [PATCH] Update ranges for omegaconf and hydra (#2336) * Update ranges Signed-off-by: smajumdar * Updates for Hydra and OmegaConf updates Signed-off-by: smajumdar * Style fixes Signed-off-by: smajumdar * Correct tests and revert patch for model utils Signed-off-by: smajumdar * Correct docstring Signed-off-by: smajumdar * Revert unnecessary change Signed-off-by: smajumdar * Revert unnecessary change Signed-off-by: smajumdar * Guard scheduler for None Signed-off-by: smajumdar * default to 0.0 if bpe_dropout is None Signed-off-by: ericharper * Correctly log class that was restored Signed-off-by: smajumdar * Root patch *bpe_dropout Signed-off-by: smajumdar Co-authored-by: ericharper Signed-off-by: Micha Livne --- .../nlp/data/machine_translation/preproc_mt_data.py | 9 +++++++++ .../models/machine_translation/mt_enc_dec_model.py | 8 ++++++-- nemo/core/classes/modelPT.py | 2 +- nemo/core/config/hydra_runner.py | 6 ++++-- nemo/core/config/schedulers.py | 1 + nemo/core/optim/lr_scheduler.py | 8 ++++++-- nemo/core/optim/optimizers.py | 6 +++--- nemo/utils/model_utils.py | 13 +++++++++++-- requirements/requirements.txt | 4 ++-- tests/core/test_optimizers_schedulers.py | 4 ++-- 10 files changed, 45 insertions(+), 16 deletions(-) diff --git a/nemo/collections/nlp/data/machine_translation/preproc_mt_data.py b/nemo/collections/nlp/data/machine_translation/preproc_mt_data.py index 1cabbfd28085..2ce1b0bc51ce 100644 --- a/nemo/collections/nlp/data/machine_translation/preproc_mt_data.py +++ b/nemo/collections/nlp/data/machine_translation/preproc_mt_data.py @@ -299,6 +299,12 @@ def get_enc_dec_tokenizers( # if encoder_tokenizer_name != 'yttm' or decoder_tokenizer_name != 'yttm': # raise NotImplementedError(f"Currently we only support yttm tokenizer.") + if encoder_bpe_dropout is None: + encoder_bpe_dropout = 0.0 + + if decoder_bpe_dropout is None: + decoder_bpe_dropout = 0.0 + encoder_tokenizer = get_nmt_tokenizer( library=encoder_tokenizer_name, model_name=encoder_model_name, @@ -321,6 +327,9 @@ def get_monolingual_tokenizer( if tokenizer_name != 'yttm': raise NotImplementedError(f"Currently we only support yttm tokenizer.") + if bpe_dropout is None: + bpe_dropout = 0.0 + tokenizer = get_tokenizer( tokenizer_name=tokenizer_name, tokenizer_model=tokenizer_model, bpe_dropout=bpe_dropout, ) diff --git a/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py b/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py index 08f48905666e..11552f9fe8b3 100644 --- a/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py +++ b/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py @@ -76,11 +76,15 @@ def __init__(self, cfg: MTEncDecModelConfig, trainer: Trainer = None): self.setup_enc_dec_tokenizers( encoder_tokenizer_library=cfg.encoder_tokenizer.get('library', 'yttm'), encoder_tokenizer_model=cfg.encoder_tokenizer.get('tokenizer_model'), - encoder_bpe_dropout=cfg.encoder_tokenizer.get('bpe_dropout', 0.0), + encoder_bpe_dropout=cfg.encoder_tokenizer.get('bpe_dropout', 0.0) + if cfg.encoder_tokenizer.get('bpe_dropout', 0.0) is not None + else 0.0, encoder_model_name=cfg.encoder.get('model_name') if hasattr(cfg.encoder, 'model_name') else None, decoder_tokenizer_library=cfg.decoder_tokenizer.get('library', 'yttm'), decoder_tokenizer_model=cfg.decoder_tokenizer.tokenizer_model, - decoder_bpe_dropout=cfg.decoder_tokenizer.get('bpe_dropout', 0.0), + decoder_bpe_dropout=cfg.decoder_tokenizer.get('bpe_dropout', 0.0) + if cfg.decoder_tokenizer.get('bpe_dropout', 0.0) is not None + else 0.0, decoder_model_name=cfg.decoder.get('model_name') if hasattr(cfg.decoder, 'model_name') else None, ) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 2c0ddf3df847..7b8162a03535 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -436,7 +436,7 @@ def _default_restore_from( instance = instance.to(map_location) instance.load_state_dict(torch.load(model_weights, map_location=map_location), strict=strict) - logging.info(f'Model {cls.__name__} was successfully restored from {restore_path}.') + logging.info(f'Model {instance.__class__.__name__} was successfully restored from {restore_path}.') finally: cls._set_model_restore_state(is_being_restored=False) os.chdir(cwd) diff --git a/nemo/core/config/hydra_runner.py b/nemo/core/config/hydra_runner.py index 1d2bb81a0679..dfeff788a2c7 100644 --- a/nemo/core/config/hydra_runner.py +++ b/nemo/core/config/hydra_runner.py @@ -24,7 +24,7 @@ def hydra_runner( - config_path: Optional[str] = None, config_name: Optional[str] = None, schema: Optional[Any] = None + config_path: Optional[str] = ".", config_name: Optional[str] = None, schema: Optional[Any] = None ) -> Callable[[TaskFunction], Any]: """ Decorator used for passing the Config paths to main function. @@ -32,6 +32,9 @@ def hydra_runner( Args: config_path: Optional path that will be added to config search directory. + NOTE: The default value of `config_path` has changed between Hydra 1.0 and Hydra 1.1+. + Please refer to https://hydra.cc/docs/next/upgrades/1.0_to_1.1/changes_to_hydra_main_config_path/ + for details. config_name: Pathname of the config file. schema: Structured config type representing the schema used for validation/providing default values. """ @@ -100,7 +103,6 @@ def parse_args(self, args=None, namespace=None): task_function=task_function, config_path=config_path, config_name=config_name, - strict=None, ) return wrapper diff --git a/nemo/core/config/schedulers.py b/nemo/core/config/schedulers.py index 629a1fb9fcef..846b785d6499 100644 --- a/nemo/core/config/schedulers.py +++ b/nemo/core/config/schedulers.py @@ -34,6 +34,7 @@ class WarmupSchedulerParams(SchedulerParams): It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name). """ + max_steps: int = 0 warmup_steps: Optional[float] = None warmup_ratio: Optional[float] = None diff --git a/nemo/core/optim/lr_scheduler.py b/nemo/core/optim/lr_scheduler.py index 5c3b94ac99b1..f775ff56bb2a 100644 --- a/nemo/core/optim/lr_scheduler.py +++ b/nemo/core/optim/lr_scheduler.py @@ -28,6 +28,7 @@ from nemo.core.config import SchedulerParams, get_scheduler_config, register_scheduler_params from nemo.utils import logging +from nemo.utils.model_utils import maybe_update_config_version class WarmupPolicy(_LRScheduler): @@ -453,6 +454,9 @@ def prepare_lr_scheduler( A dictionary containing the LR Scheduler implementation if the config was successfully parsed along with other parameters required by Pytorch Lightning, otherwise None. """ + if scheduler_config is not None: + scheduler_config = maybe_update_config_version(scheduler_config) + # Build nested dictionary for convenience out of structured objects if isinstance(scheduler_config, DictConfig): scheduler_config = OmegaConf.to_container(scheduler_config, resolve=True) @@ -493,7 +497,7 @@ def prepare_lr_scheduler( return None # Try instantiation of scheduler params from config class path - try: + if '_target_' in scheduler_args: scheduler_args_cfg = OmegaConf.create(scheduler_args) scheduler_conf = hydra.utils.instantiate(scheduler_args_cfg) scheduler_args = vars(scheduler_conf) @@ -504,7 +508,7 @@ def prepare_lr_scheduler( if 'Params' in scheduler_name: scheduler_name = scheduler_name.replace('Params', '') - except Exception: + else: # Class path instantiation failed; try resolving "name" component # Get name of the scheduler diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py index 7e8030366377..127e44beed9d 100644 --- a/nemo/core/optim/optimizers.py +++ b/nemo/core/optim/optimizers.py @@ -25,6 +25,7 @@ from nemo.core.config import OptimizerParams, get_optimizer_config, register_optimizer_params from nemo.core.optim.novograd import Novograd from nemo.utils import logging +from nemo.utils.model_utils import maybe_update_config_version AVAILABLE_OPTIMIZERS = { 'sgd': optim.SGD, @@ -74,6 +75,7 @@ def parse_optimizer_args( return kwargs optimizer_kwargs = copy.deepcopy(optimizer_kwargs) + optimizer_kwargs = maybe_update_config_version(optimizer_kwargs) if isinstance(optimizer_kwargs, DictConfig): optimizer_kwargs = OmegaConf.to_container(optimizer_kwargs, resolve=True) @@ -81,13 +83,11 @@ def parse_optimizer_args( # If it is a dictionary, perform stepwise resolution if hasattr(optimizer_kwargs, 'keys'): # Attempt class path resolution - try: + if '_target_' in optimizer_kwargs: # captures (target, _target_) optimizer_kwargs_config = OmegaConf.create(optimizer_kwargs) optimizer_instance = hydra.utils.instantiate(optimizer_kwargs_config) # type: DictConfig optimizer_instance = vars(optimizer_instance) return optimizer_instance - except Exception: - pass # If class path was not provided, perhaps `name` is provided for resolution if 'name' in optimizer_kwargs: diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py index 0639a3a699ac..c226cbcdb8b4 100644 --- a/nemo/utils/model_utils.py +++ b/nemo/utils/model_utils.py @@ -373,8 +373,8 @@ def _convert_config(cfg: OmegaConf): """ Recursive function convertint the configuration from old hydra format to the new one. """ # Get rid of cls -> _target_. - if 'cls' in cfg and "_target_" not in cfg: - cfg._target_ = cfg.pop("cls") + if 'cls' in cfg and '_target_' not in cfg: + cfg._target_ = cfg.pop('cls') # Get rid of params. if 'params' in cfg: @@ -398,6 +398,7 @@ def maybe_update_config_version(cfg: DictConfig): Changes include: - `cls` -> `_target_`. - `params` -> drop params and shift all arguments to parent. + - `target` -> `_target_` cannot be performed due to ModelPT injecting `target` inside class. Args: cfg: Any Hydra compatible DictConfig @@ -405,6 +406,14 @@ def maybe_update_config_version(cfg: DictConfig): Returns: An updated DictConfig that conforms to Hydra 1.x format. """ + if cfg is not None and not isinstance(cfg, DictConfig): + try: + temp_cfg = OmegaConf.create(cfg) + cfg = temp_cfg + except omegaconf_errors.OmegaConfBaseException: + # Cannot be cast to DictConfig, skip updating. + return cfg + # Make a copy of model config. cfg = copy.deepcopy(cfg) OmegaConf.set_struct(cfg, False) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 462817a79a80..ac7d1a3dd472 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -7,8 +7,8 @@ wget wrapt ruamel.yaml scikit-learn -omegaconf>=2.0.5,<2.1.0 -hydra-core>=1.0.4,<1.1.0 +omegaconf>=2.1.0 +hydra-core>=1.1.0 transformers>=4.0.1 sentencepiece<1.0.0 webdataset>=0.1.48,<=0.1.62 diff --git a/tests/core/test_optimizers_schedulers.py b/tests/core/test_optimizers_schedulers.py index 389de0ca40fb..84f7d14a7c61 100644 --- a/tests/core/test_optimizers_schedulers.py +++ b/tests/core/test_optimizers_schedulers.py @@ -171,7 +171,7 @@ def test_optim_config_parse_arg_by_name(self): @pytest.mark.unit def test_optim_config_parse_arg_by_target(self): basic_optim_config = { - 'target': 'nemo.core.config.NovogradParams', + '_target_': 'nemo.core.config.NovogradParams', 'params': {'weight_decay': 0.001, 'betas': [0.8, 0.5]}, } basic_optim_config = omegaconf.OmegaConf.create(basic_optim_config) @@ -256,7 +256,7 @@ def test_sched_config_parse_from_cls(self): opt = opt_cls(model.parameters(), lr=self.INITIAL_LR) basic_sched_config = { - 'target': 'nemo.core.config.CosineAnnealingParams', + '_target_': 'nemo.core.config.CosineAnnealingParams', 'params': {'min_lr': 0.1}, 'max_steps': self.MAX_STEPS, }