From 3129ad3378df2d678d3ef0cb94e6458ff5e56031 Mon Sep 17 00:00:00 2001 From: Jin Young Sohn Date: Thu, 21 Nov 2019 13:16:21 -0800 Subject: [PATCH] No need to call `xm.mark_step()` explicitly (#4) Since for gradient accumulation we're accumulating on batches from `ParallelLoader` instance which on next() marks the step itself. --- examples/run_glue_tpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/run_glue_tpu.py b/examples/run_glue_tpu.py index 1f203d2d9506..aa7e56fb8b5e 100644 --- a/examples/run_glue_tpu.py +++ b/examples/run_glue_tpu.py @@ -150,7 +150,6 @@ def train(args, train_dataset, model, tokenizer, disable_logging=False): loss = outputs[0] # model outputs are always tuple in transformers (see doc) if args.gradient_accumulation_steps > 1: - xm.mark_step() # Mark step to evaluate graph so far or else graph will grow too big and OOM. loss = loss / args.gradient_accumulation_steps loss.backward()