Skip to content
Open
26 changes: 26 additions & 0 deletions Doc/c-api/init.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,32 @@ All of the following functions must be called after :c:func:`Py_Initialize`.
.. versionadded:: 3.11
.. c:function:: int PyUnstable_ThreadState_SetStack(PyThreadState *tstate, void *stack_start_addr, size_t stack_size)
Set the stack start address and stack size of a Python thread state.
*stack_size* must be greater than ``0``.
On success, return ``0``.
On failure, set an exception and return ``-1``.
.. seealso::
The :c:func:`PyUnstable_ThreadState_ResetStack` function.
.. versionadded:: next
.. c:function:: void PyUnstable_ThreadState_ResetStack(PyThreadState *tstate)
Reset the stack start address and stack size of a Python thread state to
the operating system defaults.
.. seealso::
The :c:func:`PyUnstable_ThreadState_SetStack` function.
.. versionadded:: next
.. c:function:: PyInterpreterState* PyInterpreterState_Get(void)
Get the current interpreter.
Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2981,6 +2981,11 @@ New features in the C API
as arguments to C API functions.
(Contributed by Sam Gross in :gh:`133164`.)

* Add :c:func:`PyUnstable_ThreadState_SetStack` and
:c:func:`PyUnstable_ThreadState_ResetStack` functions to set the stack base
address and stack size of a Python thread state.
(Contributed by Victor Stinner in :gh:`139653`.)


Limited C API changes
---------------------
Expand Down
10 changes: 10 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,16 @@ PyAPI_FUNC(int) PyGILState_Check(void);
*/
PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void);

// Set the stack start address and stack size of a Python thread state
PyAPI_FUNC(int) PyUnstable_ThreadState_SetStack(
PyThreadState *tstate,
void *stack_start_addr, // Stack start address
size_t stack_size); // Stack size (in bytes)

// Reset the stack start address and stack size of a Python thread state
PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStack(
PyThreadState *tstate);

/* Routines for advanced debuggers, requested by David Beazley.
Don't use unless you know what you are doing! */
PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void);
Expand Down
2 changes: 0 additions & 2 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,6 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {
(void)tstate;
}

PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);

static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ typedef struct _PyThreadStateImpl {
uintptr_t c_stack_soft_limit;
uintptr_t c_stack_hard_limit;

// PyUnstable_ThreadState_ResetStack() values
uintptr_t c_stack_init_start;
size_t c_stack_init_size;

PyObject *asyncio_running_loop; // Strong reference
PyObject *asyncio_running_task; // Strong reference

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add :c:func:`PyUnstable_ThreadState_SetStack` and
:c:func:`PyUnstable_ThreadState_ResetStack` functions to set the stack base
address and stack size of a Python thread state. Patch by Victor Stinner.
50 changes: 50 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -2418,6 +2418,55 @@ set_vectorcall_nop(PyObject *self, PyObject *func)
Py_RETURN_NONE;
}

static void
check_threadstate_set_stack(PyThreadState *tstate, void *start, size_t size)
{
assert(PyUnstable_ThreadState_SetStack(tstate, start, size) == 0);
assert(!PyErr_Occurred());

_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
assert(ts->c_stack_hard_limit == (uintptr_t)start + _PyOS_STACK_MARGIN_BYTES);
assert(ts->c_stack_top == (uintptr_t)start + size);
assert(ts->c_stack_soft_limit >= ts->c_stack_hard_limit);
assert(ts->c_stack_soft_limit < ts->c_stack_top);
}


static PyObject *
test_threadstate_set_stack(PyObject *self, PyObject *Py_UNUSED(args))
{
PyThreadState *tstate = PyThreadState_GET();
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
assert(!PyErr_Occurred());

uintptr_t init_start = ts->c_stack_init_start;
size_t init_size = ts->c_stack_init_size;

// Test the minimum stack size
size_t size = _PyOS_STACK_MARGIN_BYTES * 3;
void *start = (void*)(_Py_get_machine_stack_pointer() - size);
check_threadstate_set_stack(tstate, start, size);

// Test a larger size
size = 7654321;
start = (void*)(_Py_get_machine_stack_pointer() - size);
check_threadstate_set_stack(tstate, start, size);

// Test invalid size (too small)
size = 5;
start = (void*)(_Py_get_machine_stack_pointer() - size);
assert(PyUnstable_ThreadState_SetStack(tstate, start, size) == -1);
assert(PyErr_ExceptionMatches(PyExc_ValueError));
PyErr_Clear();

// Test PyUnstable_ThreadState_ResetStack()
PyUnstable_ThreadState_ResetStack(tstate);
assert(ts->c_stack_init_start == init_start);
assert(ts->c_stack_init_size == init_size);

Py_RETURN_NONE;
}

static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
Expand Down Expand Up @@ -2527,6 +2576,7 @@ static PyMethodDef module_functions[] = {
#endif
{"simple_pending_call", simple_pending_call, METH_O},
{"set_vectorcall_nop", set_vectorcall_nop, METH_O},
{"test_threadstate_set_stack", test_threadstate_set_stack, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
128 changes: 96 additions & 32 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
return 0;
}
if (_tstate->c_stack_hard_limit == 0) {
_Py_InitializeRecursionLimits(tstate);
PyUnstable_ThreadState_ResetStack(tstate);
}
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
}
Expand Down Expand Up @@ -439,62 +439,126 @@ int pthread_attr_destroy(pthread_attr_t *a)
#endif


void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
static void
tstate_set_stack(PyThreadState *tstate,
void *stack_start_addr, size_t stack_size)
{
assert(stack_size > 0);
assert(stack_size >= (_PyOS_STACK_MARGIN_BYTES * 3));

uintptr_t start = (uintptr_t)stack_start_addr;
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
ts->c_stack_hard_limit = start + _PyOS_STACK_MARGIN_BYTES;
ts->c_stack_top = start + stack_size;

uintptr_t soft_limit = ts->c_stack_hard_limit;
#ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use a bit more than half the stack.
soft_limit += (stack_size / 2);
#else
soft_limit += _PyOS_STACK_MARGIN_BYTES;
#endif
ts->c_stack_soft_limit = soft_limit;

// Sanity checks
assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit);
assert(ts->c_stack_soft_limit < ts->c_stack_top);
}


int
PyUnstable_ThreadState_SetStack(PyThreadState *tstate,
void *stack_start_addr, size_t stack_size)
{
if (stack_size < (_PyOS_STACK_MARGIN_BYTES * 3)) {
PyErr_Format(PyExc_ValueError,
"stack_size must be at least %zu bytes",
_PyOS_STACK_MARGIN_BYTES * 3);
return -1;
}

tstate_set_stack(tstate, stack_start_addr, stack_size);
return 0;
}


// Get the stack start address and stack size (in bytes)
static void
get_stack(uintptr_t *start, size_t *size)
{
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
#ifdef WIN32
ULONG_PTR low, high;
GetCurrentThreadStackLimits(&low, &high);
_tstate->c_stack_top = (uintptr_t)high;

ULONG guarantee = 0;
SetThreadStackGuarantee(&guarantee);
_tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;

*start = (uintptr_t)low + guarantee;
*size = (uintptr_t)high - *start;

#elif defined(__APPLE__)
pthread_t this_thread = pthread_self();
void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack
size_t stack_size = pthread_get_stacksize_np(this_thread);
_tstate->c_stack_top = (uintptr_t)stack_addr;
_tstate->c_stack_hard_limit = _tstate->c_stack_top - stack_size;
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
void *top = pthread_get_stackaddr_np(this_thread); // top of the stack
*size = pthread_get_stacksize_np(this_thread);
*start = (uintptr_t)top - *size;

#else
uintptr_t here_addr = _Py_get_machine_stack_pointer();
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
// (on alpine at least) is much smaller than expected and imposes undue limits
// compared to the old stack size estimation. (We assume musl is not glibc.)
# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \
!defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__))
size_t stack_size, guard_size;
void *stack_addr;
pthread_attr_t attr;
size_t guard_size, stack_size;
void *stack_addr;
int err = pthread_getattr_np(pthread_self(), &attr);
if (err == 0) {
err = pthread_attr_getguardsize(&attr, &guard_size);
err |= pthread_attr_getstack(&attr, &stack_addr, &stack_size);
err |= pthread_attr_destroy(&attr);
}
if (err == 0) {
uintptr_t base = ((uintptr_t)stack_addr) + guard_size;
_tstate->c_stack_top = base + stack_size;
#ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use a bit more than half the stack.
_tstate->c_stack_soft_limit = base + (stack_size / 2);
#else
_tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
*start = (uintptr_t)stack_addr + guard_size;
*size = stack_size - guard_size;
}
else
# endif
{
uintptr_t here_addr = _Py_get_machine_stack_pointer();
uintptr_t top = _Py_SIZE_ROUND_UP(here_addr, 4096);
*size = Py_C_STACK_SIZE;
*start = top - *size;
}
#endif
_tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
assert(_tstate->c_stack_soft_limit < here_addr);
assert(here_addr < _tstate->c_stack_top);
}

void
PyUnstable_ThreadState_ResetStack(PyThreadState *tstate)
{
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
if (ts->c_stack_init_start != 0) {
tstate_set_stack(tstate,
(void*)ts->c_stack_init_start,
ts->c_stack_init_size);
return;
}
# endif
_tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096);
_tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE;
_tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES);

uintptr_t start;
size_t size;
get_stack(&start, &size);
tstate_set_stack(tstate, (void*)start, size);
ts->c_stack_init_start = start;
ts->c_stack_init_size = size;

// Test the stack pointer
#if !defined(NDEBUG) && !defined(__wasi__)
uintptr_t here_addr = _Py_get_machine_stack_pointer();
assert(ts->c_stack_soft_limit < here_addr);
assert(here_addr < ts->c_stack_top);
#endif
}


/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */
int
Expand Down
2 changes: 1 addition & 1 deletion Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ pycore_interp_init(PyThreadState *tstate)
{
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
if (_tstate->c_stack_hard_limit == 0) {
_Py_InitializeRecursionLimits(tstate);
PyUnstable_ThreadState_ResetStack(tstate);
}
PyInterpreterState *interp = tstate->interp;
PyStatus status;
Expand Down
5 changes: 4 additions & 1 deletion Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1490,6 +1490,9 @@ init_threadstate(_PyThreadStateImpl *_tstate,
_tstate->c_stack_top = 0;
_tstate->c_stack_hard_limit = 0;

_tstate->c_stack_init_start = 0;
_tstate->c_stack_init_size = 0;

_tstate->asyncio_running_loop = NULL;
_tstate->asyncio_running_task = NULL;

Expand Down Expand Up @@ -2093,7 +2096,7 @@ _PyThreadState_Attach(PyThreadState *tstate)
}
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
if (_tstate->c_stack_hard_limit == 0) {
_Py_InitializeRecursionLimits(tstate);
PyUnstable_ThreadState_ResetStack(tstate);
}

while (1) {
Expand Down
Loading