From 7a6d81929582dff1a0a58933b3579d4dfbe3b110 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 15 Jul 2025 09:35:33 -0700 Subject: [PATCH 01/11] Test the JIT stencils build process --- Lib/test/test_jit_stencils.py | 49 +++++ Tools/jit/_targets.py | 23 ++- Tools/jit/build.py | 8 + Tools/jit/test/test_executor_cases.c.h | 27 +++ .../test_jit_stencils-aarch64-apple-darwin.h | 0 ...est_jit_stencils-aarch64-pc-windows-msvc.h | 0 ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 192 ++++++++++++++++++ .../test_jit_stencils-i686-pc-windows-msvc.h | 0 .../test_jit_stencils-x86_64-apple-darwin.h | 0 ...test_jit_stencils-x86_64-pc-windows-msvc.h | 0 ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 154 ++++++++++++++ 11 files changed, 446 insertions(+), 7 deletions(-) create mode 100644 Lib/test/test_jit_stencils.py create mode 100644 Tools/jit/test/test_executor_cases.c.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h create mode 100644 Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py new file mode 100644 index 00000000000000..2a96f24d744d25 --- /dev/null +++ b/Lib/test/test_jit_stencils.py @@ -0,0 +1,49 @@ + +import pathlib +import shlex +import sys +import sysconfig +import tempfile +import test.support +import unittest + +import test.support.script_helper + + +_CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve() +_TOOLS_JIT = _CPYTHON / "Tools" / "jit" +_TOOLS_JIT_TEST = _TOOLS_JIT / "test" +_TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" + +@unittest.skipIf(test.support.Py_DEBUG, "XXX") +@unittest.skipUnless(sys._jit.is_available(), "XXX") +@unittest.skipIf(test.support.Py_GIL_DISABLED, "XXX") +@unittest.skipUnless(sysconfig.is_python_build(), "XXX") +class TestJITStencils(unittest.TestCase): + + def test_jit_stencils(self): + self.maxDiff = None + found = False + pyconfig_dir = pathlib.Path(sysconfig.get_config_h_filename()).parent + with tempfile.TemporaryDirectory() as work: + output_dir = pathlib.Path(work).resolve() + for test_jit_stencils_h in sorted(_TOOLS_JIT_TEST.glob("test_jit_stencils-*.h")): + target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") + jit_stencils_h = output_dir / f"jit_stencils-{target}.h" + with self.subTest(target): + # relative = jit_stencils_h.relative_to(_CPYTHON) + result, args = test.support.script_helper.run_python_until_end( + _TOOLS_JIT_BUILD_PY, + "--input-file", _TOOLS_JIT_TEST / "test_executor_cases.c.h", + "--output-dir", output_dir, + "--pyconfig-dir", pyconfig_dir, + target, + __isolated=False + ) + if result.rc: + self.skipTest(shlex.join(map(str, args))) + found = True + expected = test_jit_stencils_h.read_text() + actual = "".join(jit_stencils_h.read_text().splitlines(True)[3:]) + self.assertEqual(expected, actual) + self.assertTrue(found, "No JIT stencil tests run!") diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 3883671e92aa39..b14ef6670d76b4 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -51,6 +51,7 @@ class _Target(typing.Generic[_S, _R]): verbose: bool = False cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + input_file: pathlib.Path = PYTHON_EXECUTOR_CASES_C_H pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: @@ -68,7 +69,7 @@ def _compute_digest(self) -> str: hasher.update(self.debug.to_bytes()) hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: - hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) + hasher.update(self.input_file.read_bytes()) hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: @@ -82,10 +83,16 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: if output is not None: # Make sure that full paths don't leak out (for reproducibility): long, short = str(path), str(path.name) - group.code.disassembly.extend( - line.expandtabs().strip().replace(long, short) - for line in output.splitlines() - ) + lines = output.splitlines() + started = False + for line in lines: + if not started: + if "_JIT_ENTRY" not in line: + continue + started = True + cleaned = line.replace(long, short).expandtabs().strip() + if cleaned: + group.code.disassembly.append(cleaned) args = [ "--elf-output-style=JSON", "--expand-relocs", @@ -181,10 +188,12 @@ async def _compile( return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: - generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() + generated_cases = self.input_file.read_text() cases_and_opnames = sorted( re.findall( - r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + r"^ {8}(case (\w+): \{\n.*?\n {8}\})", + generated_cases, + flags=re.DOTALL | re.MULTILINE, ) ) tasks = [] diff --git a/Tools/jit/build.py b/Tools/jit/build.py index a0733005929bf2..b0122850e2de3a 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -22,6 +22,12 @@ parser.add_argument( "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) + parser.add_argument( + "-i", + "--input-file", + help="where to find the generated executor cases", + type=lambda p: pathlib.Path(p).resolve(), + ) parser.add_argument( "-o", "--output-dir", @@ -48,6 +54,8 @@ target.force = args.force target.verbose = args.verbose target.cflags = args.cflags + if args.input_file is not None: + target.input_file = args.input_file target.pyconfig_dir = args.pyconfig_dir target.build( comment=comment, diff --git a/Tools/jit/test/test_executor_cases.c.h b/Tools/jit/test/test_executor_cases.c.h new file mode 100644 index 00000000000000..7ce624682a5ee3 --- /dev/null +++ b/Tools/jit/test/test_executor_cases.c.h @@ -0,0 +1,27 @@ + case 0: { + break; + } + + case 1: { + if (CURRENT_OPARG()) { + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case 2: { + if (CURRENT_OPARG()) { + JUMP_TO_ERROR(); + } + break; + } + + case 3: { + GOTO_TIER_ONE((void *)CURRENT_OPERAND0() + CURRENT_TARGET()); + break; + } + + case 4: { + GOTO_TIER_TWO((void *)CURRENT_OPERAND1()); + break; + } \ No newline at end of file diff --git a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..6dd8486e1f0942 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -0,0 +1,192 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: a9047bfd stp x29, x30, [sp, #0x40] + // 28: 910103fd add x29, sp, #0x40 + // 2c: a9056ffc stp x28, x27, [sp, #0x50] + // 30: a90667fa stp x26, x25, [sp, #0x60] + // 34: a9075ff8 stp x24, x23, [sp, #0x70] + // 38: 9400000c bl 0x68 <_JIT_ENTRY+0x68> + // 3c: a9494ff4 ldp x20, x19, [sp, #0x90] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: a9447bfd ldp x29, x30, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfd, 0x7b, 0x04, 0xa9, + 0xfd, 0x03, 0x01, 0x91, 0xfc, 0x6f, 0x05, 0xa9, + 0xfa, 0x67, 0x06, 0xa9, 0xf8, 0x5f, 0x07, 0xa9, + 0x0c, 0x00, 0x00, 0x94, 0xf4, 0x4f, 0x49, 0xa9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xfd, 0x7b, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG + // 8: 72003d1f tst w8, #0xffff + // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> + // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> + // 0000000000000010: R_AARCH64_JUMP26 _JIT_JUMP_TARGET + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, + 0x00, 0x00, 0x00, 0x14, + }; + // 0: OPARG + patch_64(data + 0x0, instruction->oparg); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->jump_target]); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG + // 8: 72003d1f tst w8, #0xffff + // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> + // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> + // 0000000000000010: R_AARCH64_JUMP26 _JIT_ERROR_TARGET + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, + 0x00, 0x00, 0x00, 0x14, + }; + // 0: OPARG + patch_64(data + 0x0, instruction->oparg); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->error_target]); +} + +void +emit_3( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_TARGET + // 4: 90000009 adrp x9, 0x0 <_JIT_ENTRY> + // 0000000000000004: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND0 + // 8: f9400108 ldr x8, [x8] + // 0000000000000008: R_AARCH64_LD64_GOT_LO12_NC _JIT_TARGET + // c: f9400129 ldr x9, [x9] + // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND0 + // 10: f9008adf str xzr, [x22, #0x110] + // 14: f9002295 str x21, [x20, #0x40] + // 18: 8b284120 add x0, x9, w8, uxtw + // 1c: d65f03c0 ret + const unsigned char code_body[32] = { + 0x08, 0x00, 0x00, 0x90, 0x09, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x29, 0x01, 0x40, 0xf9, + 0xdf, 0x8a, 0x00, 0xf9, 0x95, 0x22, 0x00, 0xf9, + 0x20, 0x41, 0x28, 0x8b, 0xc0, 0x03, 0x5f, 0xd6, + }; + // 0: TARGET + // 8: OPERAND0 + patch_64(data + 0x0, instruction->target); + patch_64(data + 0x8, instruction->operand0); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_21rx(code + 0x0, (uintptr_t)data); + patch_aarch64_21rx(code + 0x4, (uintptr_t)data + 0x8); + patch_aarch64_12x(code + 0x8, (uintptr_t)data); + patch_aarch64_12x(code + 0xc, (uintptr_t)data + 0x8); +} + +void +emit_4( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND1 + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND1 + // 8: f9403d00 ldr x0, [x8, #0x78] + // c: f9008ac8 str x8, [x22, #0x110] + // 10: d61f0000 br x0 + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x00, 0x3d, 0x40, 0xf9, 0xc8, 0x8a, 0x00, 0xf9, + 0x00, 0x00, 0x1f, 0xd6, + }; + // 0: OPERAND1 + patch_64(data + 0x0, instruction->operand1); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 20, 8, {0}}, + [2] = {emit_2, 20, 8, {0}}, + [3] = {emit_3, 32, 16, {0}}, + [4] = {emit_4, 20, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..fb2decfc97497d --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -0,0 +1,154 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 53 pushq %rbx + // 9: 49 89 fc movq %rdi, %r12 + // c: 49 89 f5 movq %rsi, %r13 + // f: 49 89 d6 movq %rdx, %r14 + // 12: e8 0a 00 00 00 callq 0x21 <_JIT_ENTRY+0x21> + // 17: 5b popq %rbx + // 18: 41 5c popq %r12 + // 1a: 41 5d popq %r13 + // 1c: 41 5e popq %r14 + // 1e: 41 5f popq %r15 + // 20: c3 retq + const unsigned char code_body[33] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x53, 0x49, 0x89, 0xfc, 0x49, 0x89, 0xf5, 0x49, + 0x89, 0xd6, 0xe8, 0x0a, 0x00, 0x00, 0x00, 0x5b, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPARG + // a: 66 85 c0 testw %ax, %ax + // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 000000000000000f: R_X86_64_PLT32 _JIT_JUMP_TARGET-0x4 + const unsigned char code_body[19] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->oparg); + patch_32r(code + 0xf, state->instruction_starts[instruction->jump_target] + -0x4); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPARG + // a: 66 85 c0 testw %ax, %ax + // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 000000000000000f: R_X86_64_PLT32 _JIT_ERROR_TARGET-0x4 + const unsigned char code_body[19] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->oparg); + patch_32r(code + 0xf, state->instruction_starts[instruction->error_target] + -0x4); +} + +void +emit_3( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 49 c7 86 10 01 00 00 00 00 00 00 movq $0x0, 0x110(%r14) + // b: 4d 89 6c 24 40 movq %r13, 0x40(%r12) + // 10: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000012: R_X86_64_64 _JIT_TARGET + // 1a: 89 c1 movl %eax, %ecx + // 1c: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 000000000000001e: R_X86_64_64 _JIT_OPERAND0 + // 26: 48 01 c8 addq %rcx, %rax + // 29: c3 retq + const unsigned char code_body[42] = { + 0x49, 0xc7, 0x86, 0x10, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x4d, 0x89, 0x6c, 0x24, 0x40, + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x89, 0xc1, 0x48, 0xb8, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x01, + 0xc8, 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x12, instruction->target); + patch_64(code + 0x1e, instruction->operand0); +} + +void +emit_4( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPERAND1 + // a: 49 89 86 10 01 00 00 movq %rax, 0x110(%r14) + // 11: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000013: R_X86_64_64 _JIT_OPERAND1+0x78 + // 1b: 48 8b 00 movq (%rax), %rax + // 1e: ff e0 jmpq *%rax + const unsigned char code_body[32] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x49, 0x89, 0x86, 0x10, 0x01, 0x00, + 0x00, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x48, 0x8b, 0x00, 0xff, 0xe0, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->operand1); + patch_64(code + 0x13, instruction->operand1 + 0x78); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 33, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 19, 0, {0}}, + [2] = {emit_2, 19, 0, {0}}, + [3] = {emit_3, 42, 0, {0}}, + [4] = {emit_4, 32, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; From a322ad46cdf45e5a94ef92e476a2e47d64e26a1e Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:23:08 -0700 Subject: [PATCH 02/11] Cleanup --- Lib/test/test_jit_stencils.py | 80 ++++++---- Tools/jit/_targets.py | 7 + Tools/jit/test/test_executor_cases.c.h | 28 ++-- ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 150 +++++++----------- ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 105 ++++-------- 5 files changed, 166 insertions(+), 204 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 2a96f24d744d25..94beecc9eed8d3 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -1,49 +1,67 @@ - import pathlib import shlex import sys import sysconfig import tempfile import test.support -import unittest - import test.support.script_helper - +import unittest _CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve() _TOOLS_JIT = _CPYTHON / "Tools" / "jit" _TOOLS_JIT_TEST = _TOOLS_JIT / "test" +_TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" _TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" -@unittest.skipIf(test.support.Py_DEBUG, "XXX") -@unittest.skipUnless(sys._jit.is_available(), "XXX") -@unittest.skipIf(test.support.Py_GIL_DISABLED, "XXX") -@unittest.skipUnless(sysconfig.is_python_build(), "XXX") + +@test.support.cpython_only +@unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") +@unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.") +@unittest.skipUnless(sysconfig.is_python_build(), "Requires a local Python build.") class TestJITStencils(unittest.TestCase): + def _build_jit_stencils(self, target: str) -> str: + with tempfile.TemporaryDirectory() as work: + jit_stencils_h = pathlib.Path(work, f"jit_stencils-{target}.h").resolve() + pyconfig_h = pathlib.Path(sysconfig.get_config_h_filename()).resolve() + result, args = test.support.script_helper.run_python_until_end( + _TOOLS_JIT_BUILD_PY, + "--input-file", _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H, + "--output-dir", jit_stencils_h.parent, + "--pyconfig-dir", pyconfig_h.parent, + target, + __isolated=False, + ) + if result.rc: + self.skipTest(f"Build failed: {shlex.join(map(str, args))}") + body = jit_stencils_h.read_text() + # Strip out two lines of header comments: + _, _, body = body.split("\n", 2) + return body + + def _check_jit_stencils( + self, expected: str, actual: str, test_jit_stencils_h: pathlib.Path + ) -> None: + try: + self.assertEqual(expected.strip("\n"), actual.strip("\n")) + except AssertionError as e: + # Make it easy to re-validate the expected output: + relative = test_jit_stencils_h.relative_to(_CPYTHON) + message = f"If this is expected, replace {relative} with:" + banner = "=" * len(message) + e.add_note("\n".join([banner, message, banner])) + e.add_note(actual) + raise + def test_jit_stencils(self): self.maxDiff = None found = False - pyconfig_dir = pathlib.Path(sysconfig.get_config_h_filename()).parent - with tempfile.TemporaryDirectory() as work: - output_dir = pathlib.Path(work).resolve() - for test_jit_stencils_h in sorted(_TOOLS_JIT_TEST.glob("test_jit_stencils-*.h")): - target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") - jit_stencils_h = output_dir / f"jit_stencils-{target}.h" - with self.subTest(target): - # relative = jit_stencils_h.relative_to(_CPYTHON) - result, args = test.support.script_helper.run_python_until_end( - _TOOLS_JIT_BUILD_PY, - "--input-file", _TOOLS_JIT_TEST / "test_executor_cases.c.h", - "--output-dir", output_dir, - "--pyconfig-dir", pyconfig_dir, - target, - __isolated=False - ) - if result.rc: - self.skipTest(shlex.join(map(str, args))) - found = True - expected = test_jit_stencils_h.read_text() - actual = "".join(jit_stencils_h.read_text().splitlines(True)[3:]) - self.assertEqual(expected, actual) - self.assertTrue(found, "No JIT stencil tests run!") + for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): + target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") + with self.subTest(target): + expected = test_jit_stencils_h.read_text() + actual = self._build_jit_stencils(target) + found = True + self._check_jit_stencils(expected, actual, test_jit_stencils_h) + # This is a local build. If the JIT is available, at least one test should run: + assert found or not sys._jit.is_available(), "No JIT stencils built!" diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index b14ef6670d76b4..60850377d5486e 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -566,36 +566,43 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: optimizer: type[_optimizers.Optimizer] target: _COFF32 | _COFF64 | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): + host = "aarch64-apple-darwin" condition = "defined(__aarch64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerAArch64 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"aarch64-pc-windows-msvc", host): + host = "aarch64-pc-windows-msvc" args = ["-fms-runtime-lib=dll", "-fplt"] condition = "defined(_M_ARM64)" optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): + host = "aarch64-unknown-linux-gnu" # -mno-outline-atomics: Keep intrinsics from being emitted. args = ["-fpic", "-mno-outline-atomics"] condition = "defined(__aarch64__) && defined(__linux__)" optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): + host = "i686-pc-windows-msvc" # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] optimizer = _optimizers.OptimizerX86 condition = "defined(_M_IX86)" target = _COFF32(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-apple-darwin.*", host): + host = "x86_64-apple-darwin" condition = "defined(__x86_64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerX86 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"x86_64-pc-windows-msvc", host): + host = "x86_64-pc-windows-msvc" args = ["-fms-runtime-lib=dll"] condition = "defined(_M_X64)" optimizer = _optimizers.OptimizerX86 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): + host = "x86_64-unknown-linux-gnu" args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] condition = "defined(__x86_64__) && defined(__linux__)" optimizer = _optimizers.OptimizerX86 diff --git a/Tools/jit/test/test_executor_cases.c.h b/Tools/jit/test/test_executor_cases.c.h index 7ce624682a5ee3..496380cfc081a5 100644 --- a/Tools/jit/test/test_executor_cases.c.h +++ b/Tools/jit/test/test_executor_cases.c.h @@ -1,27 +1,29 @@ case 0: { + // Zero-length jumps should be removed: break; } case 1: { - if (CURRENT_OPARG()) { - JUMP_TO_JUMP_TARGET(); + // -Os duplicates less code than -O3: + PyAPI_DATA(bool) sausage; + PyAPI_DATA(bool) spammed; + PyAPI_FUNC(void) order_eggs_and_bacon(void); + PyAPI_FUNC(void) order_eggs_sausage_and_bacon(void); + if (!sausage) { + order_eggs_and_bacon(); } + else { + order_eggs_sausage_and_bacon(); + } + spammed = false; break; } case 2: { - if (CURRENT_OPARG()) { + // The assembly optimizer inverts hot branches: + PyAPI_DATA(bool) spam; + if (spam) { JUMP_TO_ERROR(); } break; } - - case 3: { - GOTO_TIER_ONE((void *)CURRENT_OPERAND0() + CURRENT_TARGET()); - break; - } - - case 4: { - GOTO_TIER_TWO((void *)CURRENT_OPERAND1()); - break; - } \ No newline at end of file diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h index 6dd8486e1f0942..42b36c0b8b7d6a 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -61,109 +61,81 @@ emit_1( const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG - // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG - // 8: 72003d1f tst w8, #0xffff - // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> - // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> - // 0000000000000010: R_AARCH64_JUMP26 _JIT_JUMP_TARGET - const unsigned char code_body[20] = { + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: R_AARCH64_ADR_GOT_PAGE sausage + // 8: 910003fd mov x29, sp + // c: f9400108 ldr x8, [x8] + // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000088 tbz w8, #0x0, 0x24 <_JIT_ENTRY+0x24> + // 18: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000018: R_AARCH64_ADR_GOT_PAGE order_eggs_sausage_and_bacon + // 1c: f9400108 ldr x8, [x8] + // 000000000000001c: R_AARCH64_LD64_GOT_LO12_NC order_eggs_sausage_and_bacon + // 20: 14000003 b 0x2c <_JIT_ENTRY+0x2c> + // 24: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000024: R_AARCH64_ADR_GOT_PAGE order_eggs_and_bacon + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: R_AARCH64_LD64_GOT_LO12_NC order_eggs_and_bacon + // 2c: d63f0100 blr x8 + // 30: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000030: R_AARCH64_ADR_GOT_PAGE spammed + // 34: f9400108 ldr x8, [x8] + // 0000000000000034: R_AARCH64_LD64_GOT_LO12_NC spammed + // 38: 3900011f strb wzr, [x8] + // 3c: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[64] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0x08, 0x00, 0x00, 0x90, + 0xfd, 0x03, 0x00, 0x91, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x88, 0x00, 0x00, 0x36, 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, - 0x00, 0x00, 0x00, 0x14, - }; - // 0: OPARG - patch_64(data + 0x0, instruction->oparg); - memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_33rx(code + 0x0, (uintptr_t)data); - patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->jump_target]); -} - -void -emit_2( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG - // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG - // 8: 72003d1f tst w8, #0xffff - // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> - // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> - // 0000000000000010: R_AARCH64_JUMP26 _JIT_ERROR_TARGET - const unsigned char code_body[20] = { + 0x03, 0x00, 0x00, 0x14, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x00, 0x01, 0x3f, 0xd6, 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, - 0x00, 0x00, 0x00, 0x14, + 0x1f, 0x01, 0x00, 0x39, 0xfd, 0x7b, 0xc1, 0xa8, }; - // 0: OPARG - patch_64(data + 0x0, instruction->oparg); + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_33rx(code + 0x0, (uintptr_t)data); - patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->error_target]); + patch_aarch64_21rx(code + 0x4, (uintptr_t)data); + patch_aarch64_12x(code + 0xc, (uintptr_t)data); + patch_aarch64_33rx(code + 0x18, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x30, (uintptr_t)data + 0x18); } void -emit_3( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_TARGET - // 4: 90000009 adrp x9, 0x0 <_JIT_ENTRY> - // 0000000000000004: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND0 - // 8: f9400108 ldr x8, [x8] - // 0000000000000008: R_AARCH64_LD64_GOT_LO12_NC _JIT_TARGET - // c: f9400129 ldr x9, [x9] - // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND0 - // 10: f9008adf str xzr, [x22, #0x110] - // 14: f9002295 str x21, [x20, #0x40] - // 18: 8b284120 add x0, x9, w8, uxtw - // 1c: d65f03c0 ret - const unsigned char code_body[32] = { - 0x08, 0x00, 0x00, 0x90, 0x09, 0x00, 0x00, 0x90, - 0x08, 0x01, 0x40, 0xf9, 0x29, 0x01, 0x40, 0xf9, - 0xdf, 0x8a, 0x00, 0xf9, 0x95, 0x22, 0x00, 0xf9, - 0x20, 0x41, 0x28, 0x8b, 0xc0, 0x03, 0x5f, 0xd6, - }; - // 0: TARGET - // 8: OPERAND0 - patch_64(data + 0x0, instruction->target); - patch_64(data + 0x8, instruction->operand0); - memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_21rx(code + 0x0, (uintptr_t)data); - patch_aarch64_21rx(code + 0x4, (uintptr_t)data + 0x8); - patch_aarch64_12x(code + 0x8, (uintptr_t)data); - patch_aarch64_12x(code + 0xc, (uintptr_t)data + 0x8); -} - -void -emit_4( +emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND1 + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE spam // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND1 - // 8: f9403d00 ldr x0, [x8, #0x78] - // c: f9008ac8 str x8, [x22, #0x110] - // 10: d61f0000 br x0 - const unsigned char code_body[20] = { + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: R_AARCH64_JUMP26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x00, 0x3d, 0x40, 0xf9, 0xc8, 0x8a, 0x00, 0xf9, - 0x00, 0x00, 0x1f, 0xd6, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, }; - // 0: OPERAND1 - patch_64(data + 0x0, instruction->operand1); + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); memcpy(code, code_body, sizeof(code_body)); patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); } static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); @@ -181,10 +153,8 @@ static const StencilGroup shim = {emit_shim, 104, 0, {0}}; static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { [0] = {emit_0, 0, 0, {0}}, - [1] = {emit_1, 20, 8, {0}}, - [2] = {emit_2, 20, 8, {0}}, - [3] = {emit_3, 32, 16, {0}}, - [4] = {emit_4, 20, 8, {0}}, + [1] = {emit_1, 64, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, }; static const void * const symbols_map[1] = { diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h index fb2decfc97497d..e4de3a1dfb6b8f 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -42,18 +42,37 @@ emit_1( const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: - // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPARG - // a: 66 85 c0 testw %ax, %ax - // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> - // 000000000000000f: R_X86_64_PLT32 _JIT_JUMP_TARGET-0x4 - const unsigned char code_body[19] = { - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + // 0: 50 pushq %rax + // 1: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000003: R_X86_64_64 sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: R_X86_64_GOTPCRELX order_eggs_sausage_and_bacon-0x4 + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: R_X86_64_GOTPCRELX order_eggs_and_bacon-0x4 + // 1e: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000020: R_X86_64_64 spammed + // 28: c6 00 00 movb $0x0, (%rax) + // 2b: 58 popq %rax + const unsigned char code_body[44] = { + 0x50, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0xb8, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x00, 0x00, 0x58, }; + // 0: &order_eggs_sausage_and_bacon+0x0 + // 8: &order_eggs_and_bacon+0x0 + patch_64(data + 0x0, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->oparg); - patch_32r(code + 0xf, state->instruction_starts[instruction->jump_target] + -0x4); + patch_64(code + 0x3, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_64(code + 0x20, (uintptr_t)&spammed); } void @@ -63,71 +82,19 @@ emit_2( { // 0000000000000000 <_JIT_ENTRY>: // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPARG - // a: 66 85 c0 testw %ax, %ax - // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 0000000000000002: R_X86_64_64 spam + // a: 80 38 01 cmpb $0x1, (%rax) + // d: 0f 84 00 00 00 00 je 0x13 <_JIT_ENTRY+0x13> // 000000000000000f: R_X86_64_PLT32 _JIT_ERROR_TARGET-0x4 const unsigned char code_body[19] = { 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + 0x00, 0x00, 0x80, 0x38, 0x01, 0x0f, 0x84, }; memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->oparg); + patch_64(code + 0x2, (uintptr_t)&spam); patch_32r(code + 0xf, state->instruction_starts[instruction->error_target] + -0x4); } -void -emit_3( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 49 c7 86 10 01 00 00 00 00 00 00 movq $0x0, 0x110(%r14) - // b: 4d 89 6c 24 40 movq %r13, 0x40(%r12) - // 10: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000012: R_X86_64_64 _JIT_TARGET - // 1a: 89 c1 movl %eax, %ecx - // 1c: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 000000000000001e: R_X86_64_64 _JIT_OPERAND0 - // 26: 48 01 c8 addq %rcx, %rax - // 29: c3 retq - const unsigned char code_body[42] = { - 0x49, 0xc7, 0x86, 0x10, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x4d, 0x89, 0x6c, 0x24, 0x40, - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x89, 0xc1, 0x48, 0xb8, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x01, - 0xc8, 0xc3, - }; - memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x12, instruction->target); - patch_64(code + 0x1e, instruction->operand0); -} - -void -emit_4( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPERAND1 - // a: 49 89 86 10 01 00 00 movq %rax, 0x110(%r14) - // 11: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000013: R_X86_64_64 _JIT_OPERAND1+0x78 - // 1b: 48 8b 00 movq (%rax), %rax - // 1e: ff e0 jmpq *%rax - const unsigned char code_body[32] = { - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x49, 0x89, 0x86, 0x10, 0x01, 0x00, - 0x00, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x48, 0x8b, 0x00, 0xff, 0xe0, - }; - memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->operand1); - patch_64(code + 0x13, instruction->operand1 + 0x78); -} - static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); typedef struct { @@ -143,10 +110,8 @@ static const StencilGroup shim = {emit_shim, 33, 0, {0}}; static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { [0] = {emit_0, 0, 0, {0}}, - [1] = {emit_1, 19, 0, {0}}, + [1] = {emit_1, 44, 16, {0}}, [2] = {emit_2, 19, 0, {0}}, - [3] = {emit_3, 42, 0, {0}}, - [4] = {emit_4, 32, 0, {0}}, }; static const void * const symbols_map[1] = { From e1eb85d459b9f207f8b44a1778f2e7cbcf768694 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:37:33 -0700 Subject: [PATCH 03/11] Add expected output for Windows --- ...est_jit_stencils-aarch64-pc-windows-msvc.h | 159 ++++++++++++++++ .../test_jit_stencils-i686-pc-windows-msvc.h | 128 +++++++++++++ ...test_jit_stencils-x86_64-pc-windows-msvc.h | 169 ++++++++++++++++++ 3 files changed, 456 insertions(+) diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h index e69de29bb2d1d6..a9e71cc52d7136 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h @@ -0,0 +1,159 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: f90023fe str x30, [sp, #0x40] + // 28: a9056ffc stp x28, x27, [sp, #0x50] + // 2c: a90667fa stp x26, x25, [sp, #0x60] + // 30: a9075ff8 stp x24, x23, [sp, #0x70] + // 34: 9400000c bl 0x64 <_JIT_ENTRY+0x64> + // 38: a9494ff4 ldp x20, x19, [sp, #0x90] + // 3c: f94023fe ldr x30, [sp, #0x40] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 54: 6d422beb ldp d11, d10, [sp, #0x20] + // 58: 6d4133ed ldp d13, d12, [sp, #0x10] + // 5c: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 60: d65f03c0 ret + const unsigned char code_body[100] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfe, 0x23, 0x00, 0xf9, + 0xfc, 0x6f, 0x05, 0xa9, 0xfa, 0x67, 0x06, 0xa9, + 0xf8, 0x5f, 0x07, 0xa9, 0x0c, 0x00, 0x00, 0x94, + 0xf4, 0x4f, 0x49, 0xa9, 0xfe, 0x23, 0x40, 0xf9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xe9, 0x23, 0x43, 0x6d, 0xeb, 0x2b, 0x42, 0x6d, + 0xed, 0x33, 0x41, 0x6d, 0xef, 0x3b, 0xca, 0x6c, + 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: f81f0ffe str x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_sausage + // 8: f9400108 ldr x8, [x8] + // 0000000000000008: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_sausage + // c: 39400108 ldrb w8, [x8] + // 10: 36000088 tbz w8, #0x0, 0x20 <_JIT_ENTRY+0x20> + // 14: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000014: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_sausage_and_bacon + // 18: f9400108 ldr x8, [x8] + // 0000000000000018: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_sausage_and_bacon + // 1c: 14000003 b 0x28 <_JIT_ENTRY+0x28> + // 20: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000020: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_and_bacon + // 24: f9400108 ldr x8, [x8] + // 0000000000000024: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_and_bacon + // 28: d63f0100 blr x8 + // 2c: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 000000000000002c: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spammed + // 30: f9400108 ldr x8, [x8] + // 0000000000000030: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spammed + // 34: 3900011f strb wzr, [x8] + // 38: f84107fe ldr x30, [sp], #0x10 + const unsigned char code_body[60] = { + 0xfe, 0x0f, 0x1f, 0xf8, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x08, 0x01, 0x40, 0x39, + 0x88, 0x00, 0x00, 0x36, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x03, 0x00, 0x00, 0x14, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x00, 0x01, 0x3f, 0xd6, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfe, 0x07, 0x41, 0xf8, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x4, (uintptr_t)data); + patch_aarch64_33rx(code + 0x14, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x20, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x2c, (uintptr_t)data + 0x18); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: IMAGE_REL_ARM64_BRANCH26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 100, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 60, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h index e69de29bb2d1d6..2ea27265604e4c 100644 --- a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h @@ -0,0 +1,128 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 8b 44 24 0c movl 0xc(%esp), %eax + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 54 24 04 movl 0x4(%esp), %edx + // c: 89 54 24 04 movl %edx, 0x4(%esp) + // 10: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 14: 89 44 24 0c movl %eax, 0xc(%esp) + const unsigned char code_body[24] = { + 0x8b, 0x44, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x54, 0x24, 0x04, 0x89, 0x54, 0x24, 0x04, + 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, 0x0c, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 53 pushl %ebx + // 1: 57 pushl %edi + // 2: 56 pushl %esi + // 3: 8b 74 24 18 movl 0x18(%esp), %esi + // 7: 8b 7c 24 14 movl 0x14(%esp), %edi + // b: 8b 5c 24 10 movl 0x10(%esp), %ebx + // f: 80 3d 00 00 00 00 00 cmpb $0x0, 0x0 + // 00000011: IMAGE_REL_I386_DIR32 _sausage + // 16: 74 07 je 0x1f <__JIT_ENTRY+0x1f> + // 18: e8 00 00 00 00 calll 0x1d <__JIT_ENTRY+0x1d> + // 00000019: IMAGE_REL_I386_REL32 _order_eggs_sausage_and_bacon + // 1d: eb 05 jmp 0x24 <__JIT_ENTRY+0x24> + // 1f: e8 00 00 00 00 calll 0x24 <__JIT_ENTRY+0x24> + // 00000020: IMAGE_REL_I386_REL32 _order_eggs_and_bacon + // 24: c6 05 00 00 00 00 00 movb $0x0, 0x0 + // 00000026: IMAGE_REL_I386_DIR32 _spammed + // 2b: 89 5c 24 10 movl %ebx, 0x10(%esp) + // 2f: 89 7c 24 14 movl %edi, 0x14(%esp) + // 33: 89 74 24 18 movl %esi, 0x18(%esp) + // 37: 5e popl %esi + // 38: 5f popl %edi + // 39: 5b popl %ebx + const unsigned char code_body[58] = { + 0x53, 0x57, 0x56, 0x8b, 0x74, 0x24, 0x18, 0x8b, + 0x7c, 0x24, 0x14, 0x8b, 0x5c, 0x24, 0x10, 0x80, + 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x07, + 0xe8, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x05, 0xe8, + 0x00, 0x00, 0x00, 0x00, 0xc6, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x5c, 0x24, 0x10, 0x89, + 0x7c, 0x24, 0x14, 0x89, 0x74, 0x24, 0x18, 0x5e, + 0x5f, 0x5b, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0x11, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x19, (uintptr_t)&order_eggs_sausage_and_bacon + -0x4); + patch_x86_64_32rx(code + 0x20, (uintptr_t)&order_eggs_and_bacon + -0x4); + patch_32(code + 0x26, (uintptr_t)&spammed); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 8b 54 24 0c movl 0xc(%esp), %edx + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 44 24 04 movl 0x4(%esp), %eax + // c: 80 3d 00 00 00 00 01 cmpb $0x1, 0x0 + // 0000000e: IMAGE_REL_I386_DIR32 _spam + // 13: 75 11 jne 0x26 <__JIT_ENTRY+0x26> + // 15: 89 54 24 0c movl %edx, 0xc(%esp) + // 19: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 1d: 89 44 24 04 movl %eax, 0x4(%esp) + // 21: e9 00 00 00 00 jmp 0x26 <__JIT_ENTRY+0x26> + // 00000022: IMAGE_REL_I386_REL32 __JIT_ERROR_TARGET + // 26: 89 54 24 0c movl %edx, 0xc(%esp) + // 2a: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 2e: 89 44 24 04 movl %eax, 0x4(%esp) + const unsigned char code_body[50] = { + 0x8b, 0x54, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x44, 0x24, 0x04, 0x80, 0x3d, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x75, 0x11, 0x89, 0x54, 0x24, + 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, + 0x04, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x89, 0x54, + 0x24, 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, + 0x24, 0x04, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0xe, (uintptr_t)&spam); + patch_x86_64_32rx(code + 0x22, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 0, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 24, 0, {0}}, + [1] = {emit_1, 58, 0, {0}}, + [2] = {emit_2, 50, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h index e69de29bb2d1d6..8a4f58d9e397ac 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h @@ -0,0 +1,169 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 56 pushq %rsi + // 9: 57 pushq %rdi + // a: 53 pushq %rbx + // b: 48 81 ec a0 00 00 00 subq $0xa0, %rsp + // 12: 44 0f 29 bc 24 90 00 00 00 movaps %xmm15, 0x90(%rsp) + // 1b: 44 0f 29 b4 24 80 00 00 00 movaps %xmm14, 0x80(%rsp) + // 24: 44 0f 29 6c 24 70 movaps %xmm13, 0x70(%rsp) + // 2a: 44 0f 29 64 24 60 movaps %xmm12, 0x60(%rsp) + // 30: 44 0f 29 5c 24 50 movaps %xmm11, 0x50(%rsp) + // 36: 44 0f 29 54 24 40 movaps %xmm10, 0x40(%rsp) + // 3c: 44 0f 29 4c 24 30 movaps %xmm9, 0x30(%rsp) + // 42: 44 0f 29 44 24 20 movaps %xmm8, 0x20(%rsp) + // 48: 0f 29 7c 24 10 movaps %xmm7, 0x10(%rsp) + // 4d: 0f 29 34 24 movaps %xmm6, (%rsp) + // 51: 49 89 cc movq %rcx, %r12 + // 54: 49 89 d5 movq %rdx, %r13 + // 57: 4d 89 c6 movq %r8, %r14 + // 5a: e8 52 00 00 00 callq 0xb1 <_JIT_ENTRY+0xb1> + // 5f: 0f 28 34 24 movaps (%rsp), %xmm6 + // 63: 0f 28 7c 24 10 movaps 0x10(%rsp), %xmm7 + // 68: 44 0f 28 44 24 20 movaps 0x20(%rsp), %xmm8 + // 6e: 44 0f 28 4c 24 30 movaps 0x30(%rsp), %xmm9 + // 74: 44 0f 28 54 24 40 movaps 0x40(%rsp), %xmm10 + // 7a: 44 0f 28 5c 24 50 movaps 0x50(%rsp), %xmm11 + // 80: 44 0f 28 64 24 60 movaps 0x60(%rsp), %xmm12 + // 86: 44 0f 28 6c 24 70 movaps 0x70(%rsp), %xmm13 + // 8c: 44 0f 28 b4 24 80 00 00 00 movaps 0x80(%rsp), %xmm14 + // 95: 44 0f 28 bc 24 90 00 00 00 movaps 0x90(%rsp), %xmm15 + // 9e: 48 81 c4 a0 00 00 00 addq $0xa0, %rsp + // a5: 5b popq %rbx + // a6: 5f popq %rdi + // a7: 5e popq %rsi + // a8: 41 5c popq %r12 + // aa: 41 5d popq %r13 + // ac: 41 5e popq %r14 + // ae: 41 5f popq %r15 + // b0: c3 retq + const unsigned char code_body[177] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x56, 0x57, 0x53, 0x48, 0x81, 0xec, 0xa0, 0x00, + 0x00, 0x00, 0x44, 0x0f, 0x29, 0xbc, 0x24, 0x90, + 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0xb4, 0x24, + 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0x6c, + 0x24, 0x70, 0x44, 0x0f, 0x29, 0x64, 0x24, 0x60, + 0x44, 0x0f, 0x29, 0x5c, 0x24, 0x50, 0x44, 0x0f, + 0x29, 0x54, 0x24, 0x40, 0x44, 0x0f, 0x29, 0x4c, + 0x24, 0x30, 0x44, 0x0f, 0x29, 0x44, 0x24, 0x20, + 0x0f, 0x29, 0x7c, 0x24, 0x10, 0x0f, 0x29, 0x34, + 0x24, 0x49, 0x89, 0xcc, 0x49, 0x89, 0xd5, 0x4d, + 0x89, 0xc6, 0xe8, 0x52, 0x00, 0x00, 0x00, 0x0f, + 0x28, 0x34, 0x24, 0x0f, 0x28, 0x7c, 0x24, 0x10, + 0x44, 0x0f, 0x28, 0x44, 0x24, 0x20, 0x44, 0x0f, + 0x28, 0x4c, 0x24, 0x30, 0x44, 0x0f, 0x28, 0x54, + 0x24, 0x40, 0x44, 0x0f, 0x28, 0x5c, 0x24, 0x50, + 0x44, 0x0f, 0x28, 0x64, 0x24, 0x60, 0x44, 0x0f, + 0x28, 0x6c, 0x24, 0x70, 0x44, 0x0f, 0x28, 0xb4, + 0x24, 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x28, + 0xbc, 0x24, 0x90, 0x00, 0x00, 0x00, 0x48, 0x81, + 0xc4, 0xa0, 0x00, 0x00, 0x00, 0x5b, 0x5f, 0x5e, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 83 ec 28 subq $0x28, %rsp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0xb <_JIT_ENTRY+0xb> + // 0000000000000007: IMAGE_REL_AMD64_REL32 __imp_sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: IMAGE_REL_AMD64_REL32 __imp_order_eggs_sausage_and_bacon + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: IMAGE_REL_AMD64_REL32 __imp_order_eggs_and_bacon + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x25 <_JIT_ENTRY+0x25> + // 0000000000000021: IMAGE_REL_AMD64_REL32 __imp_spammed + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 48 83 c4 28 addq $0x28, %rsp + const unsigned char code_body[44] = { + 0x48, 0x83, 0xec, 0x28, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x48, 0x83, 0xc4, 0x28, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + 0x14); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x7 <_JIT_ENTRY+0x7> + // 0000000000000003: IMAGE_REL_AMD64_REL32 __imp_spam + // 7: 80 38 01 cmpb $0x1, (%rax) + // a: 0f 84 00 00 00 00 je 0x10 <_JIT_ENTRY+0x10> + // 000000000000000c: IMAGE_REL_AMD64_REL32 _JIT_ERROR_TARGET + const unsigned char code_body[16] = { + 0x48, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x80, + 0x38, 0x01, 0x0f, 0x84, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x3, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0xc, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 177, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 44, 32, {0}}, + [2] = {emit_2, 16, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; From f4c05b3d8346b701e82aeba634d35078fa492efd Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:56:03 -0700 Subject: [PATCH 04/11] Add expected output for macOS --- Lib/test/test_jit_stencils.py | 4 + Tools/jit/_targets.py | 11 +- .../test_jit_stencils-aarch64-apple-darwin.h | 149 ++++++++++++++++++ ...est_jit_stencils-aarch64-pc-windows-msvc.h | 3 - ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 3 - .../test_jit_stencils-i686-pc-windows-msvc.h | 3 - .../test_jit_stencils-x86_64-apple-darwin.h | 142 +++++++++++++++++ ...test_jit_stencils-x86_64-pc-windows-msvc.h | 3 - ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 3 - 9 files changed, 300 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 94beecc9eed8d3..72dee974f8c6c6 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -65,3 +65,7 @@ def test_jit_stencils(self): self._check_jit_stencils(expected, actual, test_jit_stencils_h) # This is a local build. If the JIT is available, at least one test should run: assert found or not sys._jit.is_available(), "No JIT stencils built!" + + +if __name__ == "__main__": + unittest.main() diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 60850377d5486e..2666d31665c37b 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -86,13 +86,12 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: lines = output.splitlines() started = False for line in lines: - if not started: - if "_JIT_ENTRY" not in line: - continue + if line.lstrip().startswith("0:"): started = True - cleaned = line.replace(long, short).expandtabs().strip() - if cleaned: - group.code.disassembly.append(cleaned) + if started: + cleaned = line.replace(long, short).expandtabs().strip() + if cleaned: + group.code.disassembly.append(cleaned) args = [ "--elf-output-style=JSON", "--expand-relocs", diff --git a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h index e69de29bb2d1d6..c26310b9ab4cc4 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h @@ -0,0 +1,149 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: 6d0133ed stp d13, d12, [sp, #0x10] + // 8: 6d022beb stp d11, d10, [sp, #0x20] + // c: 6d0323e9 stp d9, d8, [sp, #0x30] + // 10: a9046ffc stp x28, x27, [sp, #0x40] + // 14: a90567fa stp x26, x25, [sp, #0x50] + // 18: a9065ff8 stp x24, x23, [sp, #0x60] + // 1c: a90757f6 stp x22, x21, [sp, #0x70] + // 20: a9084ff4 stp x20, x19, [sp, #0x80] + // 24: a9097bfd stp x29, x30, [sp, #0x90] + // 28: 910243fd add x29, sp, #0x90 + // 2c: aa0003f4 mov x20, x0 + // 30: aa0103f5 mov x21, x1 + // 34: aa0203f6 mov x22, x2 + // 38: 9400000c bl 0x68 + // 3c: a9497bfd ldp x29, x30, [sp, #0x90] + // 40: a9484ff4 ldp x20, x19, [sp, #0x80] + // 44: a94757f6 ldp x22, x21, [sp, #0x70] + // 48: a9465ff8 ldp x24, x23, [sp, #0x60] + // 4c: a94567fa ldp x26, x25, [sp, #0x50] + // 50: a9446ffc ldp x28, x27, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xed, 0x33, 0x01, 0x6d, + 0xeb, 0x2b, 0x02, 0x6d, 0xe9, 0x23, 0x03, 0x6d, + 0xfc, 0x6f, 0x04, 0xa9, 0xfa, 0x67, 0x05, 0xa9, + 0xf8, 0x5f, 0x06, 0xa9, 0xf6, 0x57, 0x07, 0xa9, + 0xf4, 0x4f, 0x08, 0xa9, 0xfd, 0x7b, 0x09, 0xa9, + 0xfd, 0x43, 0x02, 0x91, 0xf4, 0x03, 0x00, 0xaa, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0x0c, 0x00, 0x00, 0x94, 0xfd, 0x7b, 0x49, 0xa9, + 0xf4, 0x4f, 0x48, 0xa9, 0xf6, 0x57, 0x47, 0xa9, + 0xf8, 0x5f, 0x46, 0xa9, 0xfa, 0x67, 0x45, 0xa9, + 0xfc, 0x6f, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 910003fd mov x29, sp + // 8: 90000008 adrp x8, 0x0 + // 0000000000000008: ARM64_RELOC_GOT_LOAD_PAGE21 _sausage + // c: f9400108 ldr x8, [x8] + // 000000000000000c: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000068 tbz w8, #0x0, 0x20 + // 18: 94000000 bl 0x18 + // 0000000000000018: ARM64_RELOC_BRANCH26 _order_eggs_sausage_and_bacon + // 1c: 14000002 b 0x24 + // 20: 94000000 bl 0x20 + // 0000000000000020: ARM64_RELOC_BRANCH26 _order_eggs_and_bacon + // 24: 90000008 adrp x8, 0x0 + // 0000000000000024: ARM64_RELOC_GOT_LOAD_PAGE21 _spammed + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spammed + // 2c: 3900011f strb wzr, [x8] + // 30: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[52] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x68, 0x00, 0x00, 0x36, + 0x00, 0x00, 0x00, 0x94, 0x02, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x94, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfd, 0x7b, 0xc1, 0xa8, + }; + // 0: &spammed+0x0 + // 8: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x8, (uintptr_t)data + 0x8); + patch_aarch64_trampoline(code + 0x18, 0x1, state); + patch_aarch64_trampoline(code + 0x20, 0x0, state); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 90000008 adrp x8, 0x0 + // 0000000000000000: ARM64_RELOC_GOT_LOAD_PAGE21 _spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 + // 14: 14000000 b 0x14 + // 0000000000000014: ARM64_RELOC_BRANCH26 __JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 52, 16, {0x03}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[2] = { + [0] = &order_eggs_and_bacon, + [1] = &order_eggs_sausage_and_bacon, +}; diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h index a9e71cc52d7136..b8a3afbaee133e 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! // 4: a90857f6 stp x22, x21, [sp, #0x80] // 8: aa0103f5 mov x21, x1 @@ -59,7 +58,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: f81f0ffe str x30, [sp, #-0x10]! // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000004: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_sausage @@ -113,7 +111,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000000: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spam // 4: f9400108 ldr x8, [x8] diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h index 42b36c0b8b7d6a..5e2ed0db8cc2d5 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! // 4: a90857f6 stp x22, x21, [sp, #0x80] // 8: aa0103f5 mov x21, x1 @@ -60,7 +59,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000004: R_AARCH64_ADR_GOT_PAGE sausage @@ -116,7 +114,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000000: R_AARCH64_ADR_GOT_PAGE spam // 4: f9400108 ldr x8, [x8] diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h index 2ea27265604e4c..2f8e7d768cd78f 100644 --- a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h @@ -10,7 +10,6 @@ emit_0( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 8b 44 24 0c movl 0xc(%esp), %eax // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx // 8: 8b 54 24 04 movl 0x4(%esp), %edx @@ -30,7 +29,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 53 pushl %ebx // 1: 57 pushl %edi // 2: 56 pushl %esi @@ -75,7 +73,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 8b 54 24 0c movl 0xc(%esp), %edx // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx // 8: 8b 44 24 04 movl 0x4(%esp), %eax diff --git a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h index e69de29bb2d1d6..4d6ee50c4c1f41 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h @@ -0,0 +1,142 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 41 57 pushq %r15 + // 6: 41 56 pushq %r14 + // 8: 41 55 pushq %r13 + // a: 41 54 pushq %r12 + // c: 53 pushq %rbx + // d: 50 pushq %rax + // e: 49 89 fc movq %rdi, %r12 + // 11: 49 89 f5 movq %rsi, %r13 + // 14: 49 89 d6 movq %rdx, %r14 + // 17: e8 0f 00 00 00 callq 0x2b <__JIT_ENTRY+0x2b> + // 1c: 48 83 c4 08 addq $0x8, %rsp + // 20: 5b popq %rbx + // 21: 41 5c popq %r12 + // 23: 41 5d popq %r13 + // 25: 41 5e popq %r14 + // 27: 41 5f popq %r15 + // 29: 5d popq %rbp + // 2a: c3 retq + const unsigned char code_body[43] = { + 0x55, 0x48, 0x89, 0xe5, 0x41, 0x57, 0x41, 0x56, + 0x41, 0x55, 0x41, 0x54, 0x53, 0x50, 0x49, 0x89, + 0xfc, 0x49, 0x89, 0xf5, 0x49, 0x89, 0xd6, 0xe8, + 0x0f, 0x00, 0x00, 0x00, 0x48, 0x83, 0xc4, 0x08, + 0x5b, 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, + 0x5f, 0x5d, 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 5d popq %rbp + const unsigned char code_body[5] = { + 0x55, 0x48, 0x89, 0xe5, 0x5d, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _sausage@GOTPCREL + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <__JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) ## 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_GOT _order_eggs_sausage_and_bacon@GOTPCREL + // 16: eb 06 jmp 0x1e <__JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) ## 0x1e <__JIT_ENTRY+0x1e> + // 000000000000001a: X86_64_RELOC_GOT _order_eggs_and_bacon@GOTPCREL + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0x25 <__JIT_ENTRY+0x25> + // 0000000000000021: X86_64_RELOC_GOT_LOAD _spammed@GOTPCREL + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 5d popq %rbp + const unsigned char code_body[41] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x5d, + }; + // 0: &spammed+0x0 + // 8: &order_eggs_and_bacon+0x0 + // 10: &order_eggs_sausage_and_bacon+0x0 + // 18: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x18, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + 0x14); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + -0x4); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _spam@GOTPCREL + // b: 80 38 01 cmpb $0x1, (%rax) + // e: 75 06 jne 0x16 <__JIT_ENTRY+0x16> + // 10: 5d popq %rbp + // 11: e9 00 00 00 00 jmp 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_BRANCH __JIT_ERROR_TARGET + // 16: 5d popq %rbp + const unsigned char code_body[23] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x01, 0x75, 0x06, + 0x5d, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x5d, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_32r(code + 0x12, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 43, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 5, 0, {0}}, + [1] = {emit_1, 41, 32, {0}}, + [2] = {emit_2, 23, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h index 8a4f58d9e397ac..356055584e4d61 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 41 57 pushq %r15 // 2: 41 56 pushq %r14 // 4: 41 55 pushq %r13 @@ -85,7 +84,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 83 ec 28 subq $0x28, %rsp // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0xb <_JIT_ENTRY+0xb> // 0000000000000007: IMAGE_REL_AMD64_REL32 __imp_sausage @@ -128,7 +126,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x7 <_JIT_ENTRY+0x7> // 0000000000000003: IMAGE_REL_AMD64_REL32 __imp_spam // 7: 80 38 01 cmpb $0x1, (%rax) diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h index e4de3a1dfb6b8f..075ecac8be499b 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 41 57 pushq %r15 // 2: 41 56 pushq %r14 // 4: 41 55 pushq %r13 @@ -41,7 +40,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 50 pushq %rax // 1: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax // 0000000000000003: R_X86_64_64 sausage @@ -80,7 +78,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax // 0000000000000002: R_X86_64_64 spam // a: 80 38 01 cmpb $0x1, (%rax) From 00cd7e389ac9be4cbdd12328970f0a35f587b262 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 15:38:05 -0700 Subject: [PATCH 05/11] Is it BSS? --- Tools/jit/_targets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 2666d31665c37b..1e4415ec1f4692 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -270,6 +270,8 @@ def _handle_section( else: # Zeroed BSS data, seen with printf debugging calls: section_data_bytes = [0] * section["RawDataSize"] + # XXX + assert section["RawDataSize"] == 0, section["RawDataSize"] if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code From 202fb6f9e102f782d3c960424b5d7dc53ac755bd Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 18:41:22 -0700 Subject: [PATCH 06/11] It *is* BSS! --- Tools/jit/_targets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 1e4415ec1f4692..96e4de5934ec1c 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -271,7 +271,7 @@ def _handle_section( # Zeroed BSS data, seen with printf debugging calls: section_data_bytes = [0] * section["RawDataSize"] # XXX - assert section["RawDataSize"] == 0, section["RawDataSize"] + assert section["RawDataSize"] == 0, (group.symbols, section["Symbols"]) if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code From 4e5554cf3c504ff41ef28e35335dd4e63cf40e0d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 12:25:56 -0700 Subject: [PATCH 07/11] Optimistically strip writable data --- Tools/jit/_stencils.py | 23 +++++++++++++++++------ Tools/jit/_targets.py | 17 ++++++++++++----- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 1d82f5366f6ce0..840bf312383aa7 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -19,12 +19,16 @@ class HoleValue(enum.Enum): CODE = enum.auto() # The base address of the read-only data for this uop: DATA = enum.auto() + # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): + ERROR_TARGET = enum.auto() # The address of the current executor (exposed as _JIT_EXECUTOR): EXECUTOR = enum.auto() # The base address of the "global" offset table located in the read-only data. # Shouldn't be present in the final stencils, since these are all replaced with # equivalent DATA values: GOT = enum.auto() + # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): + JUMP_TARGET = enum.auto() # The current uop's oparg (exposed as _JIT_OPARG): OPARG = enum.auto() # The current uop's operand0 on 64-bit platforms (exposed as _JIT_OPERAND0): @@ -39,10 +43,9 @@ class HoleValue(enum.Enum): OPERAND1_LO = enum.auto() # The current uop's target (exposed as _JIT_TARGET): TARGET = enum.auto() - # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): - JUMP_TARGET = enum.auto() - # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): - ERROR_TARGET = enum.auto() + # Writable data, which we don't support! Optimistically remove their data + # from the stencil, and raise later if they're actually used: + WRITABLE = enum.auto() # A hardcoded value of zero (used for symbol lookups): ZERO = enum.auto() @@ -96,9 +99,11 @@ class HoleValue(enum.Enum): _HOLE_EXPRS = { HoleValue.CODE: "(uintptr_t)code", HoleValue.DATA: "(uintptr_t)data", + HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", HoleValue.EXECUTOR: "(uintptr_t)executor", # These should all have been turned into DATA values by process_relocations: # HoleValue.GOT: "", + HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", HoleValue.OPARG: "instruction->oparg", HoleValue.OPERAND0: "instruction->operand0", HoleValue.OPERAND0_HI: "(instruction->operand0 >> 32)", @@ -107,8 +112,8 @@ class HoleValue(enum.Enum): HoleValue.OPERAND1_HI: "(instruction->operand1 >> 32)", HoleValue.OPERAND1_LO: "(instruction->operand1 & UINT32_MAX)", HoleValue.TARGET: "instruction->target", - HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", - HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", + # These should all have raised an error if they were actually used: + # HoleValue.WRITABLE: "", HoleValue.ZERO: "", } @@ -246,6 +251,12 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None: self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: + if hole.symbol in self.symbols: + value, _ = self.symbols[hole.symbol] + if value is HoleValue.WRITABLE: + raise ValueError( + f"Writable data ({hole.symbol}) is not supported!" + ) if hole.value is HoleValue.GOT: assert hole.symbol is not None hole.value = HoleValue.DATA diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 96e4de5934ec1c..d598419823f8ac 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -268,10 +268,8 @@ def _handle_section( if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: - # Zeroed BSS data, seen with printf debugging calls: + # Zeroed BSS data: section_data_bytes = [0] * section["RawDataSize"] - # XXX - assert section["RawDataSize"] == 0, (group.symbols, section["Symbols"]) if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code @@ -280,6 +278,10 @@ def _handle_section( stencil = group.data else: return + if "IMAGE_SCN_MEM_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] base = len(stencil.body) group.symbols[section["Number"]] = value, base stencil.body.extend(section_data_bytes) @@ -382,7 +384,7 @@ def _handle_section( if value is _stencils.HoleValue.CODE: stencil = group.code else: - assert value is _stencils.HoleValue.DATA + assert value in (_stencils.HoleValue.DATA, _stencils.HoleValue.WRITABLE) stencil = group.data for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] @@ -397,6 +399,11 @@ def _handle_section( else: value = _stencils.HoleValue.DATA stencil = group.data + section_data_bytes = section["SectionData"]["Bytes"] + if "SHF_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] group.symbols[section["Index"]] = value, len(stencil.body) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] @@ -404,7 +411,7 @@ def _handle_section( name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset - stencil.body.extend(section["SectionData"]["Bytes"]) + stencil.body.extend(section_data_bytes) assert not section["Relocations"] else: assert section_type in { From 9db056350efd7cad9ab24f083690acf06362bf2c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 12:37:21 -0700 Subject: [PATCH 08/11] Ditto for ELF --- Tools/jit/_schema.py | 1 + Tools/jit/_targets.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 228fc389584dd7..674c09027f11b1 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -102,6 +102,7 @@ class ELFSection(typing.TypedDict): Info: int Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] SectionData: dict[typing.Literal["Bytes"], list[int]] + Size: int Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] Type: dict[typing.Literal["Name"], str] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index d598419823f8ac..9ac37c2dfbd7fe 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -390,7 +390,7 @@ def _handle_section( relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) - elif section_type == "SHT_PROGBITS": + elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}: if "SHF_ALLOC" not in flags: return if "SHF_EXECINSTR" in flags: @@ -399,7 +399,11 @@ def _handle_section( else: value = _stencils.HoleValue.DATA stencil = group.data - section_data_bytes = section["SectionData"]["Bytes"] + if section_type == "SHT_PROGBITS": + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] if "SHF_WRITE" in flags: assert value is _stencils.HoleValue.DATA value = _stencils.HoleValue.WRITABLE From c4c3cccfa1df8672df442fff16d8f865c2a25a57 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 15:28:24 -0700 Subject: [PATCH 09/11] Ditto for Mach-O, and *way* simplify the parsing! --- Tools/jit/_schema.py | 4 +++- Tools/jit/_targets.py | 27 ++++++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 674c09027f11b1..8f96305dbfa979 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -101,7 +101,7 @@ class ELFSection(typing.TypedDict): Index: int Info: int Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] - SectionData: dict[typing.Literal["Bytes"], list[int]] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] Size: int Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] Type: dict[typing.Literal["Name"], str] @@ -118,4 +118,6 @@ class MachOSection(typing.TypedDict): list[dict[typing.Literal["Relocation"], MachORelocation]] ] SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Segment: dict[typing.Literal["Value"], str] + Size: int Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 9ac37c2dfbd7fe..bc57c623b0acd8 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -400,6 +400,7 @@ def _handle_section( value = _stencils.HoleValue.DATA stencil = group.data if section_type == "SHT_PROGBITS": + assert "SectionData" in section section_data_bytes = section["SectionData"]["Bytes"] else: # Zeroed BSS data: @@ -474,7 +475,11 @@ def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: assert section["Address"] >= len(group.code.body) - assert "SectionData" in section + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} name = section["Name"]["Value"] name = name.removeprefix(self.symbol_prefix) @@ -483,23 +488,23 @@ def _handle_section( if "PureInstructions" in flags: value = _stencils.HoleValue.CODE stencil = group.code - start_address = 0 - group.symbols[name] = value, section["Address"] - start_address else: value = _stencils.HoleValue.DATA stencil = group.data - start_address = len(group.code.body) - group.symbols[name] = value, len(group.code.body) - base = section["Address"] - start_address + segment = section["Segment"]["Value"] + if segment == "__DATA": + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] + else: + assert segment == "__TEXT", segment + base = len(stencil.body) + group.symbols[name] = value, base group.symbols[section["Index"]] = value, base - stencil.body.extend( - [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) - ) - stencil.body.extend(section["SectionData"]["Bytes"]) + stencil.body.extend(section_data_bytes) assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = symbol["Value"] - start_address + offset = symbol["Value"] - section["Address"] + base name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset From c6fc7bd8a2d5c1982ad5bb290ec1356e9e90ded8 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 16:26:21 -0700 Subject: [PATCH 10/11] Rework tests --- Lib/test/test_jit_stencils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 72dee974f8c6c6..2a7a19a057d90f 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -1,9 +1,10 @@ +import asyncio import pathlib import shlex -import sys import sysconfig import tempfile import test.support +import test.test_tools import test.support.script_helper import unittest @@ -13,11 +14,13 @@ _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" _TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" +test.test_tools.skip_if_missing("jit") +with test.test_tools.imports_under_tool("jit"): + import _llvm @test.support.cpython_only @unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") @unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.") -@unittest.skipUnless(sysconfig.is_python_build(), "Requires a local Python build.") class TestJITStencils(unittest.TestCase): def _build_jit_stencils(self, target: str) -> str: @@ -31,6 +34,9 @@ def _build_jit_stencils(self, target: str) -> str: "--pyconfig-dir", pyconfig_h.parent, target, __isolated=False, + # Windows leaks temporary files on failure because the JIT build + # process is async. This forces it to be "sync" for this test: + PYTHON_CPU_COUNT="1", ) if result.rc: self.skipTest(f"Build failed: {shlex.join(map(str, args))}") @@ -54,6 +60,8 @@ def _check_jit_stencils( raise def test_jit_stencils(self): + if not asyncio.run(_llvm._find_tool("clang")): + self.skipTest(f"LLVM {_llvm._LLVM_VERSION} isn't installed.") self.maxDiff = None found = False for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): @@ -64,7 +72,7 @@ def test_jit_stencils(self): found = True self._check_jit_stencils(expected, actual, test_jit_stencils_h) # This is a local build. If the JIT is available, at least one test should run: - assert found or not sys._jit.is_available(), "No JIT stencils built!" + assert found, "No JIT stencils built!" if __name__ == "__main__": From 867a686038cdf7ffe85f7d0e39cf64e689c8a3d2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 18 Jul 2025 12:06:33 -0700 Subject: [PATCH 11/11] Cleanup for PR --- Lib/test/test_jit_stencils.py | 3 +-- Tools/jit/_targets.py | 40 +++++++++++++++++------------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 2a7a19a057d90f..5a8c4ecf28ca98 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -71,8 +71,7 @@ def test_jit_stencils(self): actual = self._build_jit_stencils(target) found = True self._check_jit_stencils(expected, actual, test_jit_stencils_h) - # This is a local build. If the JIT is available, at least one test should run: - assert found, "No JIT stencils built!" + self.assertTrue(found, "No JIT stencils built!") if __name__ == "__main__": diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index bc57c623b0acd8..29c5cad9e7eedc 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -393,30 +393,30 @@ def _handle_section( elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}: if "SHF_ALLOC" not in flags: return + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] if "SHF_EXECINSTR" in flags: value = _stencils.HoleValue.CODE stencil = group.code else: value = _stencils.HoleValue.DATA stencil = group.data - if section_type == "SHT_PROGBITS": - assert "SectionData" in section - section_data_bytes = section["SectionData"]["Bytes"] - else: - # Zeroed BSS data: - section_data_bytes = [0] * section["Size"] if "SHF_WRITE" in flags: assert value is _stencils.HoleValue.DATA value = _stencils.HoleValue.WRITABLE section_data_bytes = [] - group.symbols[section["Index"]] = value, len(stencil.body) + base = len(stencil.body) + group.symbols[section["Index"]] = value, base + stencil.body.extend(section_data_bytes) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = len(stencil.body) + symbol["Value"] + offset = base + symbol["Value"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset - stencil.body.extend(section_data_bytes) assert not section["Relocations"] else: assert section_type in { @@ -474,15 +474,12 @@ class _MachO( def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: - assert section["Address"] >= len(group.code.body) if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: # Zeroed BSS data: section_data_bytes = [0] * section["Size"] flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} - name = section["Name"]["Value"] - name = name.removeprefix(self.symbol_prefix) if "Debug" in flags: return if "PureInstructions" in flags: @@ -492,19 +489,20 @@ def _handle_section( value = _stencils.HoleValue.DATA stencil = group.data segment = section["Segment"]["Value"] + assert segment in {"__DATA", "__TEXT"}, segment if segment == "__DATA": value = _stencils.HoleValue.WRITABLE section_data_bytes = [] - else: - assert segment == "__TEXT", segment base = len(stencil.body) - group.symbols[name] = value, base group.symbols[section["Index"]] = value, base stencil.body.extend(section_data_bytes) + name = section["Name"]["Value"] + name = name.removeprefix(self.symbol_prefix) + group.symbols[name] = value, base assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = symbol["Value"] - section["Address"] + base + offset = base + symbol["Value"] - section["Address"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset @@ -589,23 +587,23 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"aarch64-pc-windows-msvc", host): host = "aarch64-pc-windows-msvc" - args = ["-fms-runtime-lib=dll", "-fplt"] condition = "defined(_M_ARM64)" + args = ["-fms-runtime-lib=dll", "-fplt"] optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): host = "aarch64-unknown-linux-gnu" + condition = "defined(__aarch64__) && defined(__linux__)" # -mno-outline-atomics: Keep intrinsics from being emitted. args = ["-fpic", "-mno-outline-atomics"] - condition = "defined(__aarch64__) && defined(__linux__)" optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): host = "i686-pc-windows-msvc" + condition = "defined(_M_IX86)" # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] optimizer = _optimizers.OptimizerX86 - condition = "defined(_M_IX86)" target = _COFF32(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-apple-darwin.*", host): host = "x86_64-apple-darwin" @@ -614,14 +612,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"x86_64-pc-windows-msvc", host): host = "x86_64-pc-windows-msvc" - args = ["-fms-runtime-lib=dll"] condition = "defined(_M_X64)" + args = ["-fms-runtime-lib=dll"] optimizer = _optimizers.OptimizerX86 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): host = "x86_64-unknown-linux-gnu" - args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] condition = "defined(__x86_64__) && defined(__linux__)" + args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] optimizer = _optimizers.OptimizerX86 target = _ELF(host, condition, args=args, optimizer=optimizer) else: