Skip to content
11 changes: 1 addition & 10 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,16 +249,7 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {

PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);

static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit;
#else
return here_addr >= _tstate->c_stack_soft_limit;
#endif
}
PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate);

// Export for test_peg_generator
PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
Expand Down
13 changes: 12 additions & 1 deletion Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,18 @@ static uintptr_t return_pointer_as_int(char* p) {
}
#endif

PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void);
static inline uintptr_t
_Py_get_machine_stack_pointer(void) {
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
return (uintptr_t)__builtin_frame_address(0);
#elif defined(_MSC_VER)
return (uintptr_t)_AddressOfReturnAddress();
#else
char here;
/* Avoid compiler warning about returning stack address */
return return_pointer_as_int(&here);
#endif
}

static inline intptr_t
_Py_RecursionLimit_GetMargin(PyThreadState *tstate)
Expand Down
13 changes: 13 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,19 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from
return PyStackRef_FromPyObjectSteal(iter_o);
}

Py_NO_INLINE int
_Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit;
#else
return here_addr >= _tstate->c_stack_soft_limit;
#endif
}


#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__)
/*
* gh-129987: The SLP autovectorizer can cause poor code generation for
Expand Down
13 changes: 0 additions & 13 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -3286,16 +3286,3 @@ _Py_GetMainConfig(void)
}
return _PyInterpreterState_GetConfig(interp);
}

uintptr_t
_Py_get_machine_stack_pointer(void) {
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
return (uintptr_t)__builtin_frame_address(0);
#elif defined(_MSC_VER)
return (uintptr_t)_AddressOfReturnAddress();
#else
char here;
/* Avoid compiler warning about returning stack address */
return return_pointer_as_int(&here);
#endif
}
17 changes: 16 additions & 1 deletion Tools/jit/_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class Optimizer:
label_prefix: str
symbol_prefix: str
re_global: re.Pattern[str]
frame_pointers: bool
# The first block in the linked list:
_root: _Block = dataclasses.field(init=False, default_factory=_Block)
_labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
Expand Down Expand Up @@ -193,6 +194,7 @@ class Optimizer:
_re_small_const_1 = _RE_NEVER_MATCH
_re_small_const_2 = _RE_NEVER_MATCH
const_reloc = "<Not supported>"
_frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH

def __post_init__(self) -> None:
# Split the code into a linked list of basic blocks. A basic block is an
Expand Down Expand Up @@ -553,6 +555,16 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
raise NotImplementedError()

def _validate(self) -> None:
for block in self._blocks():
if not block.instructions:
continue
for inst in block.instructions:
if self.frame_pointers:
assert (
self._frame_pointer_modify.match(inst.text) is None
), "Frame pointer should not be modified"

def run(self) -> None:
"""Run this optimizer."""
self._insert_continue_label()
Expand All @@ -565,6 +577,7 @@ def run(self) -> None:
self._remove_unreachable()
self._fixup_external_labels()
self._fixup_constants()
self._validate()
self.path.write_text(self._body())


Expand Down Expand Up @@ -595,6 +608,7 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
r"\s*(?P<instruction>ldr)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
)
const_reloc = "CUSTOM_AARCH64_CONST"
_frame_pointer_modify = re.compile(r"\s*stp\s+x29.*")

def _get_reg(self, inst: Instruction) -> str:
_, rest = inst.text.split(inst.name)
Expand Down Expand Up @@ -649,4 +663,5 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods
# https://www.felixcloutier.com/x86/jmp
_re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
# https://www.felixcloutier.com/x86/ret
_re_return = re.compile(r"\s*ret\b")
_re_return = re.compile(r"\s*retq?\b")
_frame_pointer_modify = re.compile(r"\s*movq?\s+%(\w+),\s+%rbp.*")
17 changes: 10 additions & 7 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,21 +176,24 @@ async def _compile(
f"{s}",
f"{c}",
]
is_shim = opname == "shim"
if self.frame_pointers:
frame_pointer = "all" if opname == "shim" else "reserved"
frame_pointer = "all" if is_shim else "reserved"
args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"]
args_s += self.args
# Allow user-provided CFLAGS to override any defaults
args_s += shlex.split(self.cflags)
await _llvm.run(
"clang", args_s, echo=self.verbose, llvm_version=self.llvm_version
)
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
).run()
if not is_shim:
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
frame_pointers=self.frame_pointers,
).run()
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
await _llvm.run(
"clang", args_o, echo=self.verbose, llvm_version=self.llvm_version
Expand Down
Loading