-
-
Notifications
You must be signed in to change notification settings - Fork 33k
Description
Bug report
Bug description:
GH-130398 introduced estimation of stack size (for the purpose of recursion-limit management) via pthread_getattr_np()
on platforms that support it. This non-portable function is not available on macOS, and so the following fallback codepath is used:
Lines 485 to 487 in 4fb338d
_tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); | |
_tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; | |
_tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); |
with Py_C_STACK_SIZE
being set to 4 MB on macOS:
Lines 369 to 381 in 4fb338d
#if defined(__s390x__) | |
# define Py_C_STACK_SIZE 320000 | |
#elif defined(_WIN32) | |
// Don't define Py_C_STACK_SIZE, ask the O/S | |
#elif defined(__ANDROID__) | |
# define Py_C_STACK_SIZE 1200000 | |
#elif defined(__sparc__) | |
# define Py_C_STACK_SIZE 1600000 | |
#elif defined(__hppa__) || defined(__powerpc64__) | |
# define Py_C_STACK_SIZE 2000000 | |
#else | |
# define Py_C_STACK_SIZE 4000000 | |
#endif |
However, this is too conservative, as the python's build script explicitly increases the stack to 16 MB:
Lines 3601 to 3628 in 4fb338d
Darwin/*|iOS/*) | |
LINKFORSHARED="$extra_undefs -framework CoreFoundation" | |
# Issue #18075: the default maximum stack size (8MBytes) is too | |
# small for the default recursion limit. Increase the stack size | |
# to ensure that tests don't crash | |
stack_size="1000000" # 16 MB | |
if test "$with_ubsan" = "yes" | |
then | |
# Undefined behavior sanitizer requires an even deeper stack | |
stack_size="4000000" # 64 MB | |
fi | |
AC_DEFINE_UNQUOTED([THREAD_STACK_SIZE], | |
[0x$stack_size], | |
[Custom thread stack size depending on chosen sanitizer runtimes.]) | |
if test $ac_sys_system = "Darwin"; then | |
LINKFORSHARED="-Wl,-stack_size,$stack_size $LINKFORSHARED" | |
if test "$enable_framework"; then | |
LINKFORSHARED="$LINKFORSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' | |
fi | |
LINKFORSHARED="$LINKFORSHARED" | |
elif test $ac_sys_system = "iOS"; then | |
LINKFORSHARED="-Wl,-stack_size,$stack_size $LINKFORSHARED "'$(PYTHONFRAMEWORKDIR)/$(PYTHONFRAMEWORK)' | |
fi | |
;; |
This was observed in GH-131543, but was not investigated further, because the primary issue there turned out to be excessive stack consumption due to inlining (GH-137573).
Nevertheless, the under-estimated stack size still negatively impacts the recursion limit.
Using slightly modified reproducer from #131543 (comment) that keeps trying to dynamically increase recursion limit as it keeps recursing:
// stackpointer.c
//
// gcc -shared -fpic -o stackpointer.dylib stackpointer.c
//
// import ctypes
// sp = ctypes.CDLL('./stackpointer.dylib')
// address = sp.get_machine_stack_pointer()
#include <stdint.h>
uintptr_t get_machine_stack_pointer(void)
{
return (uintptr_t)__builtin_frame_address(0);
}
# recursion_limit_test.py
import sys
import ctypes
import ctypes.util
# Obtain stack address and stack size as reported by non-portable pthread functions:
libc = ctypes.CDLL(ctypes.util.find_library('c'))
libc.pthread_self.restype = ctypes.c_void_p
libc.pthread_get_stackaddr_np.argtypes = [ctypes.c_void_p]
libc.pthread_get_stackaddr_np.restype = ctypes.c_void_p
libc.pthread_get_stacksize_np.argtypes = [ctypes.c_void_p]
libc.pthread_get_stacksize_np.restype = ctypes.c_ulonglong
this_thread = libc.pthread_self()
stack_address = libc.pthread_get_stackaddr_np(this_thread)
stack_size = libc.pthread_get_stacksize_np(this_thread)
print(f"Stack address: {stack_address} = 0x{stack_address:X}")
print(f"Stack size: {stack_size} = {stack_size / 1024.0} kB = {stack_size / 1024**2} MB")
# Helper for tracking stack pointer location
splib = ctypes.CDLL('./stackpointer.dylib')
splib.get_machine_stack_pointer.restype = ctypes.c_void_p
stack_pointer = splib.get_machine_stack_pointer()
print(f"Stack pointer: 0x{stack_pointer:X}, depth: {(stack_address - stack_pointer)/1024:.2f} kB")
# Recursion limit test
limit = sys.getrecursionlimit()
counter = 0
class A:
def __getattribute__(self, name):
global counter
counter += 1
stack_pointer = splib.get_machine_stack_pointer()
print(f"Recursion level: {counter}, stack pointer: 0x{stack_pointer:X}, depth: {(stack_address - stack_pointer)/1024:.2f} kB")
# Increase recursion limit, if necessary
global limit
if counter + 1 >= limit:
limit *= 2
print(f"Increasing recursion limit: {limit}")
sys.setrecursionlimit(limit)
# Recurse
return getattr(self, name)
a = A()
print("Testing Recursion Limit")
print(f"Initial limit: {limit}")
try:
a.test
except RecursionError:
print(f"Recursion Limit ok (reached level {counter})")
Running with python 3.13:
% python3.13 recursion_limit_test.py
Stack address: 6101024768 = 0x16BA64000
Stack size: 16777216 = 16384.0 kB = 16.0 MB
Stack pointer: 0x16BA622A0, depth: 7.34 kB
Testing Recursion Limit
Initial limit: 1000
Recursion level: 1, stack pointer: 0x16BA61CD0, depth: 8.80 kB
Recursion level: 2, stack pointer: 0x16BA616A0, depth: 10.34 kB
Recursion level: 3, stack pointer: 0x16BA610B0, depth: 11.83 kB
...
Recursion level: 4997, stack pointer: 0x16B323CD0, depth: 7424.80 kB
Recursion level: 4998, stack pointer: 0x16B3236E0, depth: 7426.28 kB
Recursion Limit ok (reached level 4999)
Note that even with the old approach, only about half of the actual 16 MB stack was used before the recursion limit kicked in (as evident from estimated stack depth).
With 3.14(.0rc2):
% python3.14 recursion_limit_test.py
Stack address: 6091063296 = 0x16B0E4000
Stack size: 16777216 = 16384.0 kB = 16.0 MB
Stack pointer: 0x16B0E1CC0, depth: 8.81 kB
Testing Recursion Limit
Initial limit: 1000
Recursion level: 1, stack pointer: 0x16B0E1120, depth: 11.72 kB
Recursion level: 2, stack pointer: 0x16B0E0550, depth: 14.67 kB
...
Recursion level: 1320, stack pointer: 0x16AD13470, depth: 3906.89 kB
Recursion level: 1321, stack pointer: 0x16AD128A0, depth: 3909.84 kB
Recursion Limit ok (reached level 1322)
With 3.14, we don't get far above the original limit before the recursion limit kicks in, under assumption of 4 MB stack.
CPython versions tested on:
3.14
Operating systems tested on:
macOS