pytorch · vfdev-5 · Sep 24, 2023 · Sep 16, 2023 · Sep 17, 2023 · Sep 17, 2023
diff --git a/ignite/handlers/param_scheduler.py b/ignite/handlers/param_scheduler.py
@@ -288,6 +288,9 @@
         usually be the number of batches in an epoch.
 
     .. versionadded:: 0.4.5
+
+    .. versionchanged:: 0.4.13
+        Added warmup to the scheduler using ``warmup_each_cycle`` and ``warmup_duration``.
     """
 
     def __init__(
@@ -300,6 +303,8 @@
         cycle_mult: float = 1.0,
         start_value_mult: float = 1.0,
         end_value_mult: float = 1.0,
+        warmup_each_cycle: bool = False,
+        warmup_duration: Optional[int] = None,
         save_history: bool = False,
         param_group_index: Optional[int] = None,
     ):
@@ -313,6 +318,20 @@
         self.cycle = 0
         self.start_value_mult = start_value_mult
         self.end_value_mult = end_value_mult
+        self.warmup_each_cycle = warmup_each_cycle
+        if not self.warmup_each_cycle:
+            if warmup_duration is not None:
+                warnings.warn(
+                    f"warmup_each_cycle=False but your warmup_duration is {warmup_duration}. "
+                    "so warmup_duration will be set to 0. "
+                    "If you want to use warmup each cycle, please set warmup_each_cycle=True"
+                )
+            self.warmup_duration = 0
+        else:
+            if warmup_duration is None:
+                raise ValueError("Argument warmup_duration should be integer, but given None")
+            self.warmup_duration = warmup_duration
+        self.total_cycle_size = self.warmup_duration + self.cycle_size
 
         if self.cycle_size < 2:
             raise ValueError(f"Argument cycle_size should be positive and larger than 1, but given {cycle_size}")
@@ -325,18 +344,39 @@
             "cycle",
             "start_value_mult",
             "end_value_mult",
+            "total_cycle_size",
+            "warmup_duration",
         ]
 
     def __call__(self, engine: Optional[Engine], name: Optional[str] = None) -> None:
-        if self.event_index != 0 and self.event_index % self.cycle_size == 0:
+        if self.event_index != 0 and self.event_index % self.total_cycle_size == 0:
             self.event_index = 0
             self.cycle_size = int(self.cycle_size * self.cycle_mult)
+            self.warmup_duration = int(self.warmup_duration * self.cycle_mult)
+            self.total_cycle_size = int(self.warmup_duration + self.cycle_size)
             self.cycle += 1
             self.start_value *= self.start_value_mult
+        if self.event_index != 0 and self.event_index == self.warmup_duration:
             self.end_value *= self.end_value_mult
 
         return super(CyclicalScheduler, self).__call__(engine, name)
 
+    def get_param(self) -> float:
+        """Method to get current optimizer's parameter value"""
+        if self.warmup_each_cycle and self.event_index < self.warmup_duration:
+            return self.end_value + (self.start_value - self.end_value) * self.event_index / self.warmup_duration
+
+        return self._get_cycle_param()
+
+    @abstractmethod
+    def _get_cycle_param(self) -> float:
+        """Method to get the cycle's current parameter value
+
+        Returns:
+            list of params, or scalar param
+        """
+        pass
+
 
 class LinearCyclicalScheduler(CyclicalScheduler):
     """Linearly adjusts param value to 'end_value' for a half-cycle, then linearly
@@ -432,7 +472,8 @@
     .. versionadded:: 0.4.5
     """
 
-    def get_param(self) -> float:
+    def _get_cycle_param(self) -> float:
+        """Method to get the cycle's current parameter value"""
         cycle_progress = self.event_index / self.cycle_size
         return self.end_value + (self.start_value - self.end_value) * abs(cycle_progress - 0.5) * 2
 
@@ -536,9 +577,9 @@
     .. versionadded:: 0.4.5
     """
 
-    def get_param(self) -> float:
-        """Method to get current optimizer's parameter value"""
-        cycle_progress = self.event_index / self.cycle_size
+    def _get_cycle_param(self) -> float:
+        """Method to get the cycle's current parameter value"""
+        cycle_progress = (self.event_index - self.warmup_duration) / self.cycle_size
         return self.start_value + ((self.end_value - self.start_value) / 2) * (1 - math.cos(math.pi * cycle_progress))
 
 

diff --git a/tests/ignite/handlers/test_param_scheduler.py b/tests/ignite/handlers/test_param_scheduler.py
@@ -10,6 +10,7 @@
     ConcatScheduler,
     CosineAnnealingScheduler,
     create_lr_scheduler_with_warmup,
+    CyclicalScheduler,
     LinearCyclicalScheduler,
     LRScheduler,
     ParamGroupScheduler,
@@ -55,6 +56,14 @@ def test_param_scheduler_asserts():
         FakeParamScheduler({}, "lr")
 
 
+def test_cyclical_scheduler_asserts():
+    tensor = torch.zeros([1], requires_grad=True)
+    optimizer = torch.optim.SGD([tensor], lr=0)
+
+    with pytest.raises(TypeError, match="Can't instantiate abstract class CyclicalScheduler"):
+        CyclicalScheduler({}, "lr", 0.0, 1.0, 10)
+
+
 def test_linear_scheduler():
     with pytest.raises(TypeError, match=r"Argument optimizer should be torch.optim.Optimizer"):
         LinearCyclicalScheduler({}, "lr", 1, 0, cycle_size=0)
@@ -293,6 +302,66 @@ def save_lr(engine):
         assert lrs == pytest.approx([v for i, v in simulated_values])
 
 
+def test_cosine_annealing_scheduler_warmup():
+    tensor = torch.zeros([1], requires_grad=True)
+    optimizer = torch.optim.SGD([tensor], lr=0)
+
+    scheduler = CosineAnnealingScheduler(optimizer, "lr", 0, 1, 10, warmup_each_cycle=True, warmup_duration=5)
+    state_dict = scheduler.state_dict()
+
+    data = [0] * 9
+    max_epochs = 2
+    simulated_values = CosineAnnealingScheduler.simulate_values(
+        num_events=len(data) * max_epochs,
+        param_name="lr",
+        start_value=0,
+        end_value=1,
+        cycle_size=10,
+        warmup_each_cycle=True,
+        warmup_duration=5,
+    )
+
+    def save_lr(engine):
+        lrs.append(optimizer.param_groups[0]["lr"])
+
+    trainer = Engine(lambda engine, batch: None)
+    trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
+    trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)
+
+    for _ in range(2):
+        lrs = []
+        trainer.run(data, max_epochs=max_epochs)
+
+        assert lrs == list(
+            map(
+                pytest.approx,
+                [
+                    1.0,
+                    0.8,
+                    0.6,
+                    0.4,
+                    0.2,
+                    0.0,
+                    0.024471741852423234,
+                    0.09549150281252627,
+                    0.20610737385376343,
+                    0.3454915028125263,
+                    0.49999999999999994,
+                    0.6545084971874737,
+                    0.7938926261462365,
+                    0.9045084971874737,
+                    0.9755282581475768,
+                    1.0,
+                    0.8,
+                    0.6,
+                ],
+            )
+        )
+        scheduler.load_state_dict(state_dict)
+
+        assert lrs == pytest.approx([v for i, v in simulated_values])
+
+
 def test_concat_scheduler_asserts():
     tensor = torch.zeros([1], requires_grad=True)
     optimizer = torch.optim.SGD([tensor], lr=0)