Skip to content

Commit

Permalink
added a fix for every time trainer.evaluate or trainer.predictions is…
Browse files Browse the repository at this point in the history
… called
  • Loading branch information
benja-matic committed May 24, 2021
1 parent bf51acf commit d7a605a
Showing 1 changed file with 31 additions and 11 deletions.
42 changes: 31 additions & 11 deletions projects/transformers/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,35 @@ def train(trainer, output_dir, last_checkpoint=None):
*count_nonzero_params(trainer.model)
))

def toggle_drop_last(trainer, should_drop_last):
"""
Turn trainer.args.dataloader_drop_last on or off depending on use case
If drop_last is left on, then you can get skewed results anytime
trainer.evaluate or trainer.predict is called, since drop_last will set
the last batch with incomplete number of samples to be labeled -100
You'll want to use this if you want drop_last on for training, but off
for testing
Example usage at evaluation time
drop_last = toggle_drop_last(trainer, False)
trainer.evaluate(...)
_ = toggle_drop_last(trainer, drop_last)
"""

if should_drop_last:
return False
else:
trainer.args.dataloader_drop_last = False
return True

def evaluate_tasks(trainer, output_dir, tasks, eval_datasets):
"""
Evaluate tasks after finetuning.
Returns evaluation dict with results.
"""
drop_last = toggle_drop_last(trainer, False) # should_drop_last=False
eval_results = {}

# trainer references args.dataloader_drop_last for both train and eval dataloaders.
# We want to be able to drop last for training, but not for eval, because
# dropping at eval leaves off examples, leading to innacurate performance measures
# This is a hack to toggle args.dataloader_drop_last
drop_last = False
if trainer.args.dataloader_drop_last:
drop_last = True
trainer.args.dataloader_drop_last = False

for eval_dataset, task in zip(eval_datasets, tasks):
eval_result = trainer.evaluate(eval_dataset=eval_dataset)
if task == "mnli-mm":
Expand All @@ -152,8 +164,7 @@ def evaluate_tasks(trainer, output_dir, tasks, eval_datasets):
writer.write(f"{key} = {value}\n")

# if you want drop_last for training, this toggles it back on
if drop_last:
trainer.args.dataloader_drop_last = True
_ = toggle_drop_last(trainer, drop_last)

return eval_results

Expand All @@ -162,6 +173,9 @@ def test_tasks(trainer, output_dir, tasks, test_datasets, is_regression, label_l
"""
Test tasks after finetuning.
"""

drop_last = toggle_drop_last(trainer, False)

for test_dataset, task in zip(test_datasets, tasks):
# Removing the `label` columns because it contains -1
# and Trainer won't like that.
Expand All @@ -184,9 +198,13 @@ def test_tasks(trainer, output_dir, tasks, test_datasets, is_regression, label_l
item = label_list[item]
writer.write(f"{index}\t{item}\n")

_ = toggle_drop_last(trainer, drop_last)


def evaluate_language_model(trainer, eval_dataset, output_dir):
"""Evaluate language model. Returns dict with results on perplexity metric. """
drop_last = toggle_drop_last(trainer, False)

results = {}
eval_output = trainer.evaluate(eval_dataset)

Expand All @@ -201,6 +219,8 @@ def evaluate_language_model(trainer, eval_dataset, output_dir):
logging.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")

_ = toggle_drop_last(trainer, drop_last)

return results


Expand Down

0 comments on commit d7a605a

Please sign in to comment.