From 1930ea44433d94cfef79fd338baef3e889053c2a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 9 Sep 2025 15:11:11 -0700 Subject: [PATCH 1/3] [LIT] Workaround the 60 processed limit on Windows Python multiprocessing is limited to 60 workers at most: https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672 The limit being per thread pool, we can work around it by using multiple pools on windows when we want to actually use more workers. --- llvm/utils/lit/lit/run.py | 53 +++++++++++++++++++++++++++++++------- llvm/utils/lit/lit/util.py | 5 ---- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index 62070e824e87f..5109aad97eb8f 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -72,25 +72,58 @@ def _execute(self, deadline): if v is not None } - pool = multiprocessing.Pool( - self.workers, lit.worker.initialize, (self.lit_config, semaphores) + # Windows has a limit of 60 workers per pool, so we need to use multiple pools + # if we have more than 60 workers requested + max_workers_per_pool = 60 if os.name == "nt" else self.workers + num_pools = max( + 1, (self.workers + max_workers_per_pool - 1) // max_workers_per_pool ) + workers_per_pool = min(self.workers, max_workers_per_pool) - async_results = [ - pool.apply_async( - lit.worker.execute, args=[test], callback=self.progress_callback + if num_pools > 1: + self.lit_config.note( + "Using %d pools with %d workers each (Windows worker limit workaround)" + % (num_pools, workers_per_pool) ) - for test in self.tests - ] - pool.close() + + # Create multiple pools + pools = [] + for i in range(num_pools): + pool = multiprocessing.Pool( + workers_per_pool, lit.worker.initialize, (self.lit_config, semaphores) + ) + pools.append(pool) + + # Distribute tests across pools + tests_per_pool = (len(self.tests) + num_pools - 1) // num_pools + async_results = [] + + for pool_idx, pool in enumerate(pools): + start_idx = pool_idx * tests_per_pool + end_idx = min(start_idx + tests_per_pool, len(self.tests)) + pool_tests = self.tests[start_idx:end_idx] + + for test in pool_tests: + ar = pool.apply_async( + lit.worker.execute, args=[test], callback=self.progress_callback + ) + async_results.append(ar) + + # Close all pools + for pool in pools: + pool.close() try: self._wait_for(async_results, deadline) except: - pool.terminate() + # Terminate all pools on exception + for pool in pools: + pool.terminate() raise finally: - pool.join() + # Join all pools + for pool in pools: + pool.join() def _wait_for(self, async_results, deadline): timeout = deadline - time.time() diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py index ce4c3c2df3436..518c1a3029b86 100644 --- a/llvm/utils/lit/lit/util.py +++ b/llvm/utils/lit/lit/util.py @@ -113,11 +113,6 @@ def usable_core_count(): except AttributeError: n = os.cpu_count() or 1 - # On Windows with more than 60 processes, multiprocessing's call to - # _winapi.WaitForMultipleObjects() prints an error and lit hangs. - if platform.system() == "Windows": - return min(n, 60) - return n def abs_path_preserve_drive(path): From 19353445093cb006b1a7e499d1d00b4d764669d3 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 16 Sep 2025 13:20:52 -0700 Subject: [PATCH 2/3] Distribute workers more evenly --- llvm/utils/lit/lit/run.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index 5109aad97eb8f..9654b0d11578f 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -7,6 +7,14 @@ import lit.util import lit.worker +# Windows has a limit of 60 workers per pool. +# This is defined in the multiprocessing module implementation. +# See: https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672 +WINDOWS_MAX_WORKERS_PER_POOL = 60 + + +def _ceilDiv(a, b): + return (a + b - 1) // b class MaxFailuresError(Exception): pass @@ -73,37 +81,39 @@ def _execute(self, deadline): } # Windows has a limit of 60 workers per pool, so we need to use multiple pools - # if we have more than 60 workers requested - max_workers_per_pool = 60 if os.name == "nt" else self.workers - num_pools = max( - 1, (self.workers + max_workers_per_pool - 1) // max_workers_per_pool + # if we have more workers requested than the limit. + max_workers_per_pool = ( + WINDOWS_MAX_WORKERS_PER_POOL if os.name == "nt" else self.workers ) - workers_per_pool = min(self.workers, max_workers_per_pool) + num_pools = max(1, _ceilDiv(self.workers, max_workers_per_pool)) + + # Distribute self.workers across num_pools as evenly as possible + workers_per_pool_list = [self.workers // num_pools] * num_pools + for pool_idx in range(self.workers % num_pools): + workers_per_pool_list[pool_idx] += 1 if num_pools > 1: self.lit_config.note( - "Using %d pools with %d workers each (Windows worker limit workaround)" - % (num_pools, workers_per_pool) + "Using %d pools balancing %d workers total distributed as %s (Windows worker limit workaround)" + % (num_pools, self.workers, workers_per_pool_list) ) # Create multiple pools pools = [] - for i in range(num_pools): + for pool_size in workers_per_pool_list: pool = multiprocessing.Pool( - workers_per_pool, lit.worker.initialize, (self.lit_config, semaphores) + pool_size, lit.worker.initialize, (self.lit_config, semaphores) ) pools.append(pool) # Distribute tests across pools - tests_per_pool = (len(self.tests) + num_pools - 1) // num_pools + tests_per_pool = _ceilDiv(len(self.tests), num_pools) async_results = [] for pool_idx, pool in enumerate(pools): start_idx = pool_idx * tests_per_pool end_idx = min(start_idx + tests_per_pool, len(self.tests)) - pool_tests = self.tests[start_idx:end_idx] - - for test in pool_tests: + for test in self.tests[start_idx:end_idx]: ar = pool.apply_async( lit.worker.execute, args=[test], callback=self.progress_callback ) From c9e4f407036c213e8c4db25c6c1e0e91e809b398 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 17 Sep 2025 07:57:42 -0700 Subject: [PATCH 3/3] Add a test --- llvm/utils/lit/lit/run.py | 5 +++++ llvm/utils/lit/tests/windows-pools.py | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 llvm/utils/lit/tests/windows-pools.py diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index 9654b0d11578f..ea280e3ce3521 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -82,9 +82,14 @@ def _execute(self, deadline): # Windows has a limit of 60 workers per pool, so we need to use multiple pools # if we have more workers requested than the limit. + # Also, allow to override the limit with the LIT_WINDOWS_MAX_WORKERS_PER_POOL environment variable. max_workers_per_pool = ( WINDOWS_MAX_WORKERS_PER_POOL if os.name == "nt" else self.workers ) + max_workers_per_pool = int( + os.getenv("LIT_WINDOWS_MAX_WORKERS_PER_POOL", max_workers_per_pool) + ) + num_pools = max(1, _ceilDiv(self.workers, max_workers_per_pool)) # Distribute self.workers across num_pools as evenly as possible diff --git a/llvm/utils/lit/tests/windows-pools.py b/llvm/utils/lit/tests/windows-pools.py new file mode 100644 index 0000000000000..67dd852955248 --- /dev/null +++ b/llvm/utils/lit/tests/windows-pools.py @@ -0,0 +1,27 @@ +# Create a directory with 20 files and check the number of pools and workers per pool that lit will use. + +# RUN: rm -Rf %t.dir && mkdir -p %t.dir +# RUN: python -c "for i in range(20): open(f'%t.dir/file{i}.txt', 'w').write('RUN:')" + +# RUN: echo "import lit.formats" > %t.dir/lit.cfg +# RUN: echo "config.name = \"top-level-suite\"" >> %t.dir/lit.cfg +# RUN: echo "config.suffixes = [\".txt\"]" >> %t.dir/lit.cfg +# RUN: echo "config.test_format = lit.formats.ShTest()" >> %t.dir/lit.cfg + + +# 15 workers per pool max, 100 workers total max: we expect lit to cap the workers to the number of files +# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=15" %{lit} -s %t.dir/ -j100 > %t.out 2>&1 +# CHECK: Using 2 pools balancing 20 workers total distributed as [10, 10] +# CHECK: Passed: 20 + +# 5 workers per pool max, 17 workers total max +# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=5" %{lit} -s %t.dir/ -j17 >> %t.out 2>&1 +# CHECK: Using 4 pools balancing 17 workers total distributed as [5, 4, 4, 4] +# CHECK: Passed: 20 + +# 19 workers per pool max, 19 workers total max +# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=19" %{lit} -s %t.dir/ -j19 >> %t.out 2>&1 +# CHECK-NOT: workers total distributed as +# CHECK: Passed: 20 + +# RUN: cat %t.out | FileCheck %s