-
Notifications
You must be signed in to change notification settings - Fork 21.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "Fix default timeouts for python entrypoints (e.g. init_proces…
…s_group) (#112893)" This reverts commit f9d47e1. Reverted #112893 on behalf of https://github.com/clee2000 due to sorry this seems to have broken inductor https://hud.pytorch.org/pytorch/pytorch/commit/f9d47e13813bbefc9f19a6c0430b7122f9d09b91 https://github.com/pytorch/pytorch/actions/runs/6776367936/job/18418174752 ([comment](#112893 (comment)))
- Loading branch information
1 parent
eefe327
commit 75adb9f
Showing
5 changed files
with
97 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,7 @@ | ||
from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT | ||
from datetime import timedelta | ||
from typing import Optional | ||
|
||
__all__ = ['default_pg_timeout', 'default_pg_nccl_timeout'] | ||
|
||
# Default process group wide timeout, if applicable. | ||
# This only applies to the non-nccl backends | ||
# This only applies to the gloo and nccl backends | ||
# (only if NCCL_BLOCKING_WAIT or NCCL_ASYNC_ERROR_HANDLING is set to 1). | ||
# To make an attempt at backwards compatibility with THD, we use an | ||
# extraordinarily high default timeout, given that THD did not have timeouts. | ||
default_pg_timeout: timedelta = _DEFAULT_PG_TIMEOUT | ||
# Separate timeout for PGNCCL mainly becuase it's always been that way in the C++ layer, but until recently | ||
# there was one default that applied across all backends in the python layer. | ||
# Later, we could consider merging them back together at the c++ layer if we can align on a same value. | ||
# (only if NCCL_BLOCKING_WAIT or NCCL_ASYNC_ERROR_HANDLING is set to 1). | ||
|
||
try: | ||
from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT | ||
default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT | ||
except ImportError: | ||
# if C++ NCCL support is not compiled, we don't have access to the default nccl value. | ||
# if anyone is actually trying to use nccl in this state, it should error. | ||
default_pg_nccl_timeout = None | ||
default_pg_timeout = _DEFAULT_PG_TIMEOUT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters