Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Doc/c-api/exceptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,9 @@ because the :ref:`call protocol <call>` takes care of recursion handling.
be concatenated to the :exc:`RecursionError` message caused by the recursion
depth limit.

.. seealso::
The :c:func:`PyUnstable_ThreadState_SetStackProtection` function.

.. versionchanged:: 3.9
This function is now also available in the :ref:`limited API <limited-c-api>`.

Expand Down
37 changes: 37 additions & 0 deletions Doc/c-api/init.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,43 @@ All of the following functions must be called after :c:func:`Py_Initialize`.
.. versionadded:: 3.11


.. c:function:: int PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, void *stack_start_addr, size_t stack_size)

Set the stack protection start address and stack protection size
of a Python thread state.

On success, return ``0``.
On failure, set an exception and return ``-1``.

CPython implements :ref:`recursion control <recursion>` for C code by raising
:py:exc:`RecursionError` when it notices that the machine execution stack is close
to overflow. See for example the :c:func:`Py_EnterRecursiveCall` function.
For this, it needs to know the location of the current thread's stack, which it
normally gets from the operating system.
When the stack is changed, for example using context switching techniques like the
Boost library's ``boost::context``, you must call
:c:func:`~PyUnstable_ThreadState_SetStackProtection` to inform CPython of the change.

Call :c:func:`~PyUnstable_ThreadState_SetStackProtection` either before
or after changing the stack.
Do not call any other Python C API between the call and the stack
change.

See :c:func:`PyUnstable_ThreadState_ResetStackProtection` for undoing this operation.

.. versionadded:: next


.. c:function:: void PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)

Reset the stack protection start address and stack protection size
of a Python thread state to the operating system defaults.

See :c:func:`PyUnstable_ThreadState_SetStackProtection` for an explanation.

.. versionadded:: next


.. c:function:: PyInterpreterState* PyInterpreterState_Get(void)

Get the current interpreter.
Expand Down
12 changes: 12 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,18 @@ PyAPI_FUNC(int) PyGILState_Check(void);
*/
PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void);

// Set the stack protection start address and stack protection size
// of a Python thread state
PyAPI_FUNC(int) PyUnstable_ThreadState_SetStackProtection(
PyThreadState *tstate,
void *stack_start_addr, // Stack start address
size_t stack_size); // Stack size (in bytes)

// Reset the stack protection start address and stack protection size
// of a Python thread state
PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStackProtection(
PyThreadState *tstate);

/* Routines for advanced debuggers, requested by David Beazley.
Don't use unless you know what you are doing! */
PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void);
Expand Down
7 changes: 5 additions & 2 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,13 @@ extern void _PyEval_DeactivateOpCache(void);
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
// We could have the wrong stack limits because of limited platform support, or user-space threads.
#if _Py_STACK_GROWS_DOWN
return here_addr < _tstate->c_stack_soft_limit;
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit;
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down
6 changes: 6 additions & 0 deletions Include/internal/pycore_pythonrun.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ extern const char* _Py_SourceAsString(
# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2)
#endif

#ifdef _Py_THREAD_SANITIZER
# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 6)
#else
# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 3)
#endif


#ifdef __cplusplus
}
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ typedef struct _PyThreadStateImpl {
uintptr_t c_stack_soft_limit;
uintptr_t c_stack_hard_limit;

// PyUnstable_ThreadState_ResetStackProtection() values
uintptr_t c_stack_init_base;
uintptr_t c_stack_init_top;

PyObject *asyncio_running_loop; // Strong reference
PyObject *asyncio_running_task; // Strong reference

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and
:c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set the
stack protection base address and stack protection size of a Python thread
state. Patch by Victor Stinner.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Only raise a ``RecursionError`` or trigger a fatal error if the stack
pointer is both below the limit pointer *and* above the stack base. If
outside of these bounds assume that it is OK. This prevents false positives
when user-space threads swap stacks.
53 changes: 53 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -2408,6 +2408,57 @@ set_vectorcall_nop(PyObject *self, PyObject *func)
Py_RETURN_NONE;
}

static void
check_threadstate_set_stack_protection(PyThreadState *tstate,
void *start, size_t size)
{
assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == 0);
assert(!PyErr_Occurred());

_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
assert(ts->c_stack_top == (uintptr_t)start + size);
assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit);
assert(ts->c_stack_soft_limit < ts->c_stack_top);
}


static PyObject *
test_threadstate_set_stack_protection(PyObject *self, PyObject *Py_UNUSED(args))
{
PyThreadState *tstate = PyThreadState_GET();
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
assert(!PyErr_Occurred());

uintptr_t init_base = ts->c_stack_init_base;
size_t init_top = ts->c_stack_init_top;

// Test the minimum stack size
size_t size = _PyOS_MIN_STACK_SIZE;
void *start = (void*)(_Py_get_machine_stack_pointer() - size);
check_threadstate_set_stack_protection(tstate, start, size);

// Test a larger size
size = 7654321;
assert(size > _PyOS_MIN_STACK_SIZE);
start = (void*)(_Py_get_machine_stack_pointer() - size);
check_threadstate_set_stack_protection(tstate, start, size);

// Test invalid size (too small)
size = 5;
start = (void*)(_Py_get_machine_stack_pointer() - size);
assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == -1);
assert(PyErr_ExceptionMatches(PyExc_ValueError));
PyErr_Clear();

// Test PyUnstable_ThreadState_ResetStackProtection()
PyUnstable_ThreadState_ResetStackProtection(tstate);
assert(ts->c_stack_init_base == init_base);
assert(ts->c_stack_init_top == init_top);

Py_RETURN_NONE;
}


static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
Expand Down Expand Up @@ -2516,6 +2567,8 @@ static PyMethodDef module_functions[] = {
{"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS},
#endif
{"set_vectorcall_nop", set_vectorcall_nop, METH_O},
{"test_threadstate_set_stack_protection",
test_threadstate_set_stack_protection, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
147 changes: 107 additions & 40 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
_Py_InitializeRecursionLimits(tstate);
}
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down Expand Up @@ -436,24 +438,26 @@ int pthread_attr_destroy(pthread_attr_t *a)

#endif


void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
static void
hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
{
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
#ifdef WIN32
ULONG_PTR low, high;
GetCurrentThreadStackLimits(&low, &high);
_tstate->c_stack_top = (uintptr_t)high;
*top = (uintptr_t)high;
ULONG guarantee = 0;
SetThreadStackGuarantee(&guarantee);
_tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
*base = (uintptr_t)low + guarantee;
#elif defined(__APPLE__)
pthread_t this_thread = pthread_self();
void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack
size_t stack_size = pthread_get_stacksize_np(this_thread);
*top = (uintptr_t)stack_addr;
*base = ((uintptr_t)stack_addr) - stack_size;
#else
uintptr_t here_addr = _Py_get_machine_stack_pointer();
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \
!defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__))
size_t stack_size, guard_size;
Expand All @@ -466,40 +470,101 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate)
err |= pthread_attr_destroy(&attr);
}
if (err == 0) {
uintptr_t base = ((uintptr_t)stack_addr) + guard_size;
uintptr_t top = base + stack_size;
# ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use a bit more than half the stack.
# if _Py_STACK_GROWS_DOWN
base += stack_size / 2;
# else
top -= stack_size / 2;
# endif
# endif
# if _Py_STACK_GROWS_DOWN
_tstate->c_stack_top = top;
_tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
assert(_tstate->c_stack_soft_limit < here_addr);
assert(here_addr < _tstate->c_stack_top);
# else
_tstate->c_stack_top = base;
_tstate->c_stack_hard_limit = top - _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = top - _PyOS_STACK_MARGIN_BYTES * 2;
assert(here_addr > base);
assert(here_addr < _tstate->c_stack_soft_limit);
# endif
*base = ((uintptr_t)stack_addr) + guard_size;
*top = (uintptr_t)stack_addr + stack_size;
return;
}
# endif
_tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096);
_tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE;
_tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES);
// Add some space for caller function then round to minimum page size
// This is a guess at the top of the stack, but should be a reasonably
// good guess if called from _PyThreadState_Attach when creating a thread.
// If the thread is attached deep in a call stack, then the guess will be poor.
#if _Py_STACK_GROWS_DOWN
uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*top = top_addr;
*base = top_addr - Py_C_STACK_SIZE;
# else
uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*base = base_addr;
*top = base_addr + Py_C_STACK_SIZE;
#endif
#endif
}

static void
tstate_set_stack(PyThreadState *tstate,
uintptr_t base, uintptr_t top)
{
assert(base < top);
assert((top - base) >= _PyOS_MIN_STACK_SIZE);

#ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use more than half the stack.
uintptr_t stacksize = top - base;
base += stacksize / 2;
#endif
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
_tstate->c_stack_top = top;
_tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;

#ifndef NDEBUG
// Sanity checks
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit);
assert(ts->c_stack_soft_limit < ts->c_stack_top);
#endif
}

void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
{
uintptr_t base, top;
uintptr_t here_addr = _Py_get_machine_stack_pointer();
hardware_stack_limits(&base, &top, here_addr);
assert(top != 0);

tstate_set_stack(tstate, base, top);
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
ts->c_stack_init_base = base;
ts->c_stack_init_top = top;
}

int
PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate,
void *stack_start_addr, size_t stack_size)
{
if (stack_size < _PyOS_MIN_STACK_SIZE) {
PyErr_Format(PyExc_ValueError,
"stack_size must be at least %zu bytes",
_PyOS_MIN_STACK_SIZE);
return -1;
}

uintptr_t base = (uintptr_t)stack_start_addr;
uintptr_t top = base + stack_size;
tstate_set_stack(tstate, base, top);
return 0;
}


void
PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
{
_PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate;
if (ts->c_stack_init_top != 0) {
tstate_set_stack(tstate,
ts->c_stack_init_base,
ts->c_stack_init_top);
return;
}

_Py_InitializeRecursionLimits(tstate);
}


/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */
if the stack pointer is between the stack base and c_stack_hard_limit. */
int
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
{
Expand All @@ -508,10 +573,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
assert(_tstate->c_stack_soft_limit != 0);
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
if (here_addr < _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
#else
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
if (here_addr > _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
Expand Down
3 changes: 3 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,9 @@ init_threadstate(_PyThreadStateImpl *_tstate,
_tstate->c_stack_top = 0;
_tstate->c_stack_hard_limit = 0;

_tstate->c_stack_init_base = 0;
_tstate->c_stack_init_top = 0;

_tstate->asyncio_running_loop = NULL;
_tstate->asyncio_running_task = NULL;

Expand Down
Loading