Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,13 @@ extern void _PyEval_DeactivateOpCache(void);
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
// We could have the wrong stack limits because of limited platform support, or user-space threads.
#if _Py_STACK_GROWS_DOWN
return here_addr < _tstate->c_stack_soft_limit;
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit;
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down
68 changes: 68 additions & 0 deletions InternalDocs/stack_protection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Stack Protection

CPython protects against stack overflow in the form of runaway, or just very deep, recursion by raising a `RecursionError` instead of just crashing.
Protection against pure Python stack recursion has existed since very early, but in 3.12 we added protection against stack overflow
in C code. This was initially implemented using a counter and later improved in 3.14 to use the actual stack depth.
For those platforms that support it (Windows, Mac, and most Linuxes) we query the operating system to find the stack bounds.
For other platforms we use conservative estimates.


The C stack looks like this:

```
+-------+ <--- Top of machine stack
| |
| |

~~

| |
| |
+-------+ <--- Soft limit
| |
| | _PyOS_STACK_MARGIN_BYTES
| |
+-------+ <--- Hard limit
| |
| | _PyOS_STACK_MARGIN_BYTES
| |
+-------+ <--- Bottom of machine stack
```


We get the current stack pointer using compiler intrinsics where available, or by taking the address of a C local variable. See `_Py_get_machine_stack_pointer()`.

The soft and hard limits pointers are set by calling `_Py_InitializeRecursionLimits()` during thread initialization.

Recursion checks are performed by `_Py_EnterRecursiveCall()` or `_Py_EnterRecursiveCallTstate()` which compare the stack pointer to the soft limit. If the stack pointer is lower than the soft limit, then `_Py_CheckRecursiveCall()` is called which checks against both the hard and soft limits:

```python
kb_used = (stack_top - stack_pointer)>>10
if stack_pointer < bottom_of_machine_stack:
pass # Our stack limits could be wrong so it is safest to do nothing.
elif stack_pointer < hard_limit:
FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)")
elif stack_pointer < soft_limit:
raise RecursionError(f"Stack overflow (used {kb_used} kB)")
```

### User space threads and other oddities

Some libraries provide user-space threads. These will change the C stack at runtime.
To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit.

### Diagnosing and fixing stack overflows

For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`.

If you see a traceback ending in: `RecursionError: Stack overflow (used ... kB)` then the stack protection is working as intended. If you don't expect to see the error, then check the amount of stack used. If it seems low then CPython may not be configured properly.

However, if you see a fatal error or crash, then something is not right.
Either a recursive call is not checking `_Py_EnterRecursiveCall()`, or the amount of C stack consumed by a single call exceeds `_PyOS_STACK_MARGIN_BYTES`. If a hard crash occurs, it probably means that the amount of C stack consumed is more than double `_PyOS_STACK_MARGIN_BYTES`.

Likely causes:
* Recursive code is not calling `_Py_EnterRecursiveCall()`
* `-O0` compilation flags, especially for Clang. With no optimization, C calls can consume a lot of stack space
* Giant, complex functions in third-party C extensions. This is unlikely as the function in question would need to be more complicated than the bytecode interpreter.
* `_PyOS_STACK_MARGIN_BYTES` is just too low.
* `_Py_InitializeRecursionLimits()` is not setting the soft and hard limits correctly for that platform.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Only raise a ``RecursionError`` or trigger a fatal error if the stack
pointer is both below the limit pointer *and* above the stack base. If
outside of these bounds assume that it is OK. This prevents false positives
when user-space threads swap stacks.
28 changes: 21 additions & 7 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
_Py_InitializeRecursionLimits(tstate);
}
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES;
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif
}

Expand Down Expand Up @@ -437,7 +439,7 @@ int pthread_attr_destroy(pthread_attr_t *a)
#endif

static void
hardware_stack_limits(uintptr_t *base, uintptr_t *top)
hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
{
#ifdef WIN32
ULONG_PTR low, high;
Expand Down Expand Up @@ -473,10 +475,19 @@ hardware_stack_limits(uintptr_t *base, uintptr_t *top)
return;
}
# endif
uintptr_t here_addr = _Py_get_machine_stack_pointer();
uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
// Add some space for caller function then round to minimum page size
// This is a guess at the top of the stack, but should be a reasonably
// good guess if called from _PyThreadState_Attach when creating a thread.
// If the thread is attached deep in a call stack, then the guess will be poor.
#if _Py_STACK_GROWS_DOWN
uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*top = top_addr;
*base = top_addr - Py_C_STACK_SIZE;
# else
uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*base = base_addr;
*top = base_addr + Py_C_STACK_SIZE;
#endif
#endif
}

Expand Down Expand Up @@ -525,7 +536,8 @@ void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
{
uintptr_t base, top;
hardware_stack_limits(&base, &top);
uintptr_t here_addr = _Py_get_machine_stack_pointer();
hardware_stack_limits(&base, &top, here_addr);
assert(top != 0);

tstate_set_stack(tstate, base, top);
Expand Down Expand Up @@ -569,7 +581,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)


/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */
if the stack pointer is between the stack base and c_stack_hard_limit. */
int
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
{
Expand All @@ -578,10 +590,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
assert(_tstate->c_stack_soft_limit != 0);
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
if (here_addr < _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
#else
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
if (here_addr > _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
Expand Down
Loading