From dd8b028115b27d6a7f6bda342819399608df6ee9 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Wed, 8 Oct 2025 11:55:25 +0100 Subject: [PATCH 1/2] [update-checkout] fallback to verbose mode if tempdir is too long --- utils/update_checkout/tests/test_clone.py | 15 +++++++++ .../update_checkout/parallel_runner.py | 17 +++++----- .../update_checkout/update_checkout.py | 31 +++++++++++++++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/utils/update_checkout/tests/test_clone.py b/utils/update_checkout/tests/test_clone.py index b6785a4f7f503..4f4b3d8f6b18b 100644 --- a/utils/update_checkout/tests/test_clone.py +++ b/utils/update_checkout/tests/test_clone.py @@ -13,6 +13,7 @@ import os from . import scheme_mock +from unittest import mock class CloneTestCase(scheme_mock.SchemeMockTestCase): @@ -42,6 +43,20 @@ def test_clone_with_additional_scheme(self): # Test that we're actually checking out the 'extra' scheme based on the output self.assertIn(b"git checkout refs/heads/main", output) + def test_manager_not_called_on_long_socket(self): + fake_tmpdir = '/tmp/very/' + '/long' * 20 + '/tmp' + + with mock.patch('tempfile.gettempdir', return_value=fake_tmpdir), \ + mock.patch('multiprocessing.Manager') as mock_manager: + + self.call([self.update_checkout_path, + '--config', self.config_path, + '--source-root', self.source_root, + '--clone']) + # Ensure that we do not try to create a Manager when the tempdir + # is too long. + mock_manager.assert_not_called() + class SchemeWithMissingRepoTestCase(scheme_mock.SchemeMockTestCase): def __init__(self, *args, **kwargs): diff --git a/utils/update_checkout/update_checkout/parallel_runner.py b/utils/update_checkout/update_checkout/parallel_runner.py index fbbe7bd060e7b..8c743ac5838e4 100644 --- a/utils/update_checkout/update_checkout/parallel_runner.py +++ b/utils/update_checkout/update_checkout/parallel_runner.py @@ -52,19 +52,20 @@ def __init__( self._terminal_width = shutil.get_terminal_size().columns self._n_processes = n_processes self._pool_args = pool_args - manager = Manager() - self._lock = manager.Lock() - self._running_tasks = manager.list() - self._updated_repos = manager.Value("i", 0) self._fn = fn self._pool = Pool(processes=self._n_processes) - self._verbose = pool_args[0].verbose self._output_prefix = pool_args[0].output_prefix self._nb_repos = len(pool_args) self._stop_event = Event() - self._monitored_fn = MonitoredFunction( - self._fn, self._running_tasks, self._updated_repos, self._lock - ) + self._verbose = pool_args[0].verbose + if not self._verbose: + manager = Manager() + self._lock = manager.Lock() + self._running_tasks = manager.list() + self._updated_repos = manager.Value("i", 0) + self._monitored_fn = MonitoredFunction( + self._fn, self._running_tasks, self._updated_repos, self._lock + ) def run(self) -> List[Any]: print( diff --git a/utils/update_checkout/update_checkout/update_checkout.py b/utils/update_checkout/update_checkout/update_checkout.py index 38fcbfc04c173..70cf5f38a5e01 100755 --- a/utils/update_checkout/update_checkout/update_checkout.py +++ b/utils/update_checkout/update_checkout/update_checkout.py @@ -13,6 +13,7 @@ import os import platform import re +import tempfile import sys import traceback from multiprocessing import freeze_support @@ -27,6 +28,29 @@ SCRIPT_FILE = os.path.abspath(__file__) SCRIPT_DIR = os.path.dirname(SCRIPT_FILE) +def is_unix_sock_path_too_long() -> bool: + """Check if the unix socket_path exceeds the 104 limit (108 on Linux). + + The multiprocessing module creates a socket in the TEMPDIR folder. The + socket path should not exceed: + - 104 bytes on macOS + - 108 bytes on Linux (https://www.man7.org/linux/man-pages/man7/unix.7.html) + + Returns: + bool: Whether the socket path exceeds the limit. Always False on Windows. + """ + + if os.name != "posix": + return False + + MAX_UNIX_SOCKET_PATH = 104 + # `tempfile.mktemp` is deprecated yet that's what the multiprocessing + # module uses internally: https://github.com/python/cpython/blob/c4e7d245d61ac4547ecf3362b28f64fc00aa88c0/Lib/multiprocessing/connection.py#L72 + # Since we are not using the resulting file, it is safe to use this + # method. + sock_path = tempfile.mktemp(prefix="sock-", dir=tempfile.gettempdir()) + return len(sock_path.encode("utf-8")) > MAX_UNIX_SOCKET_PATH + def confirm_tag_in_repo(tag: str, repo_name: str) -> Optional[str]: """Confirm that a given tag exists in a git repository. This function assumes that the repository is already a current working directory before @@ -771,6 +795,13 @@ def main(): "specify --scheme=foo") sys.exit(1) + if is_unix_sock_path_too_long(): + print( + f"TEMPDIR={tempfile.gettempdir()} is too long and multiprocessing " + "sockets will exceed the size limit. Falling back to verbose mode." + ) + args.verbose = True + clone = args.clone clone_with_ssh = args.clone_with_ssh skip_history = args.skip_history From ebd6b99a51f23ede20c809ff987674aa7a93c03f Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Wed, 8 Oct 2025 18:38:30 +0100 Subject: [PATCH 2/2] [update-checkout] limit the number of processes on Python < 3.10 --- utils/update_checkout/update_checkout/parallel_runner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/utils/update_checkout/update_checkout/parallel_runner.py b/utils/update_checkout/update_checkout/parallel_runner.py index 8c743ac5838e4..c216fcfeb46aa 100644 --- a/utils/update_checkout/update_checkout/parallel_runner.py +++ b/utils/update_checkout/update_checkout/parallel_runner.py @@ -48,7 +48,13 @@ def __init__( ): self._monitor_polling_period = 0.1 if n_processes == 0: - n_processes = cpu_count() * 2 + if sys.version_info.minor < 10: + # On Python < 3.10, https://bugs.python.org/issue46391 causes + # Pool.map and its variants to hang. Limiting the number of + # processes fixes the issue. + n_processes = int(cpu_count() * 1.25) + else: + n_processes = cpu_count() * 2 self._terminal_width = shutil.get_terminal_size().columns self._n_processes = n_processes self._pool_args = pool_args