Skip to content

Commit

Permalink
add comment to atomic checkpoint copying process
Browse files Browse the repository at this point in the history
  • Loading branch information
ANarayan committed Mar 25, 2021
1 parent 7c244df commit c924b9f
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ludwig/hyperopt/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,9 @@ def on_epoch_end(self, trainer, progress_tracker, save_path):
if trainer.is_coordinator():
with tune.checkpoint_dir(step=progress_tracker.epoch) as checkpoint_dir:
checkpoint_model = os.path.join(checkpoint_dir, 'model')
# shutil.copytree(save_path, checkpoint_model)
# Note: A previous implementation used shutil.copytree()
# however, this copying method is non atomic
if not os.path.isdir(checkpoint_model):
copy_id = uuid.uuid4()
tmp_dst = "%s.%s.tmp" % (checkpoint_model, copy_id)
Expand Down

0 comments on commit c924b9f

Please sign in to comment.