From 268519d036ad38b3505bcc49c141110f0f4dae4e Mon Sep 17 00:00:00 2001
From: Jin Young Sohn <jysohn@google.com>
Date: Thu, 21 Nov 2019 19:12:34 +0000
Subject: [PATCH] No need to call `xm.mark_step()` explicitly

Since for gradient accumulation we're accumulating on batches from
`ParallelLoader` instance which on next() marks the step itself.
---
 examples/run_glue_tpu.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/run_glue_tpu.py b/examples/run_glue_tpu.py
index 1f203d2d950634..aa7e56fb8b5e97 100644
--- a/examples/run_glue_tpu.py
+++ b/examples/run_glue_tpu.py
@@ -150,7 +150,6 @@ def train(args, train_dataset, model, tokenizer, disable_logging=False):
             loss = outputs[0]  # model outputs are always tuple in transformers (see doc)
 
             if args.gradient_accumulation_steps > 1:
-                xm.mark_step()  # Mark step to evaluate graph so far or else graph will grow too big and OOM.
                 loss = loss / args.gradient_accumulation_steps
 
             loss.backward()