Skip to content

Commit

Permalink
fix checkpoint dir issue under distributed training
Browse files Browse the repository at this point in the history
  • Loading branch information
ppwwyyxx committed Aug 26, 2019
1 parent 82a8953 commit 47de91d
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion tensorpack/callbacks/saver.py
Expand Up @@ -42,7 +42,10 @@ def __init__(self, max_to_keep=10,
if checkpoint_dir is not None:
if not tf.gfile.IsDirectory(checkpoint_dir): # v2: tf.io.gfile.isdir
tf.gfile.MakeDirs(checkpoint_dir) # v2: tf.io.gfile.makedirs
self.checkpoint_dir = os.path.normpath(checkpoint_dir)
# If None, allow it to be init, but fail later if used
# For example, if chief_only=True, it can still be safely initialized
# in non-chief workers which don't have logger dir
self.checkpoint_dir = os.path.normpath(checkpoint_dir) if checkpoint_dir is not None else checkpoint_dir

def _setup_graph(self):
assert self.checkpoint_dir is not None, \
Expand Down

0 comments on commit 47de91d

Please sign in to comment.