Max_update is not backward compatible. Fix in this diff.

Summary: max_update assertion in tri_stage_lr is introduced in D25040041 (6d2cf0d). It requires max-update to be defined, and breaks the backward compatibility of existing recipes. Since max-update is ONLY used when phase-ratio is defined. We recommend this change to keep it from breaking existing model recipes. Reviewed By: myleott Differential Revision: D25204247 fbshipit-source-id: 01f6f2f0935dfaff9f23501158af608e5d507145
facebookresearch · Nov 30, 2020 · f732b40 · f732b40
1 parent 9cf0bd9
commit f732b40
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py b/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py
@@ -48,7 +48,7 @@ class TriStageLRScheduleConfig(FairseqDataclass):
 
 
 @register_lr_scheduler("tri_stage", dataclass=TriStageLRScheduleConfig)
-class TriStageLRScheduleConfig(FairseqLRScheduler):
+class TriStageLRSchedule(FairseqLRScheduler):
     """Tristage learning rate schedulr
 
     Implement the learning rate scheduler in https://arxiv.org/pdf/1904.08779.pdf
@@ -93,14 +93,14 @@ def __init__(self, cfg: TriStageLRScheduleConfig, optimizer):
                 "Cannot use a fixed learning rate schedule with tri-stage lr."
                 " Consider --lr-scheduler=fixed instead."
             )
-        assert cfg.max_update > 0
 
         # calculate LR at each point
         self.peak_lr = cfg.lr[0]
         self.init_lr = cfg.init_lr_scale * cfg.lr[0]
         self.final_lr = cfg.final_lr_scale * cfg.lr[0]
 
         if cfg.phase_ratio is not None:
+            assert cfg.max_update > 0
             assert sum(cfg.phase_ratio) == 1, "phase ratios must add up to 1"
             self.warmup_steps = int(cfg.max_update * cfg.phase_ratio[0])
             self.hold_steps = int(cfg.max_update * cfg.phase_ratio[1])