Skip to content

Commit

Permalink
[Enhance] Make sure the FileHandler still alive after torch.compile (
Browse files Browse the repository at this point in the history
…#1021)

* [Enhance] Make sure the FileHandler still alive after

* Resume filter

* avoid bc

* Fix unit test

* clean the code

* revert changes and set mode from 'm' to 'a'

* mode to file_mode

* add comments

* refine comments

* Fix duplicated the
  • Loading branch information
HAOCHENYE committed Mar 30, 2023
1 parent b3b1e11 commit 83c4f3e
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
3 changes: 2 additions & 1 deletion mmengine/hooks/logger_hook.py
Expand Up @@ -304,4 +304,5 @@ def after_run(self, runner) -> None:
if not self.keep_local:
os.remove(local_filepath)
runner.logger.info(f'{local_filepath} was removed due to the '
'`self.keep_local=False`')
'`self.keep_local=False`. You can check '
f'the running logs in {out_filepath}')
5 changes: 5 additions & 0 deletions mmengine/runner/runner.py
Expand Up @@ -716,6 +716,11 @@ def build_logger(self,

log_cfg = dict(log_level=log_level, log_file=log_file, **kwargs)
log_cfg.setdefault('name', self._experiment_name)
# `torch.compile` in PyTorch 2.0 could close all user defined handlers
# unexpectedly. Using file mode 'a' can help prevent abnormal
# termination of the FileHandler and ensure that the log file could
# be continuously updated during the lifespan of the runner.
log_cfg.setdefault('file_mode', 'a')

return MMLogger.get_instance(**log_cfg) # type: ignore

Expand Down
8 changes: 8 additions & 0 deletions tests/test_runner/test_runner.py
Expand Up @@ -1745,6 +1745,14 @@ def test_train_with_compile(self):
runner = Runner.from_cfg(cfg)
runner.train()

runner._maybe_compile('train_step')
# PyTorch 2.0.0 could close the FileHandler after calling of
# ``torch.compile``. So we need to test our file handler still works.
with open(osp.join(f'{runner.log_dir}',
f'{runner.timestamp}.log')) as f:
last_line = f.readlines()[-1]
self.assertTrue(last_line.endswith('please be patient.\n'))

def test_val(self):
cfg = copy.deepcopy(self.epoch_based_cfg)
cfg.experiment_name = 'test_val1'
Expand Down

0 comments on commit 83c4f3e

Please sign in to comment.