Merge pull request #714 from mv1388/amp_scaler_update_grad_scaling

AMP scaler update when grad accumulating
mv1388 · Jul 31, 2022 · a875979 · a875979
2 parents a48333e + 634610c
commit a875979
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 1 deletion.
diff --git a/aitoolbox/torchtrain/train_loop/train_loop.py b/aitoolbox/torchtrain/train_loop/train_loop.py
@@ -246,7 +246,10 @@ def _train(self, num_epochs, num_iterations, callbacks=None, grad_accumulation=1
                     # Optimizer zero grad
                     self._optimizer_zero_grad(optimizer_idx)
 
-                self.amp_scaler.update()
+                # Execute AMP scaler update only when optimizer is stepped and grads are zeroed out
+                # https://pytorch.org/docs/stable/notes/amp_examples.html#gradient-accumulation
+                if (self.iteration + 1) % self.grad_accumulation == 0:
+                    self.amp_scaler.update()
 
                 self.callbacks_handler.execute_batch_end()
 

diff --git a/dist/aitoolbox-1.6.0-py3-none-any.whl b/dist/aitoolbox-1.6.0-py3-none-any.whl
diff --git a/dist/aitoolbox-1.6.0.tar.gz b/dist/aitoolbox-1.6.0.tar.gz