From 11185017b6cdf51543e4481593b12b5653ce47c3 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 17 Sep 2025 16:33:55 +0200 Subject: [PATCH] gh-128627: Use __builtin_wasm_test_function_pointer_signature for Emscripten trampoline (GH-137470) With https://github.com/llvm/llvm-project/pull/150201 being merged, there is now a better way to generate the Emscripten trampoline, instead of including hand-generated binary WASM content. Requires Emscripten 4.0.12. (cherry picked from commit 2629ee4eb0e9f66f28772cb4b4cc44ebe6de79ad) Co-authored-by: Hood Chatham --- Include/internal/pycore_runtime_structs.h | 6 - Makefile.pre.in | 6 + Python/emscripten_trampoline.c | 211 +++--------------- Python/emscripten_trampoline_inner.c | 38 ++++ Python/pystate.c | 6 - Tools/c-analyzer/cpython/_parser.py | 1 + .../wasm/emscripten/prepare_external_wasm.py | 54 +++++ configure | 4 +- configure.ac | 4 +- 9 files changed, 129 insertions(+), 201 deletions(-) create mode 100644 Python/emscripten_trampoline_inner.c create mode 100644 Tools/wasm/emscripten/prepare_external_wasm.py diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 6bf3aae7175a97..c34d7e7edc0715 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -279,12 +279,6 @@ struct pyruntimestate { struct _types_runtime_state types; struct _Py_time_runtime_state time; -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - // Used in "Python/emscripten_trampoline.c" to choose between type - // reflection trampoline and EM_JS trampoline. - int (*emscripten_count_args_function)(PyCFunctionWithKeywords func); -#endif - /* All the objects that are shared by the runtime's interpreters. */ struct _Py_cached_objects cached_objects; struct _Py_static_objects static_objects; diff --git a/Makefile.pre.in b/Makefile.pre.in index 01e10d1ab209ae..764eef5be3ed2b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -3101,6 +3101,12 @@ config.status: $(srcdir)/configure Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< +Python/emscripten_trampoline_inner.wasm: $(srcdir)/Python/emscripten_trampoline_inner.c + # emcc has a path that ends with emsdk/upstream/emscripten/emcc, we're looking for emsdk/upstream/bin/clang. + $$(dirname $$(dirname $(CC)))/bin/clang -o $@ $< -mgc -O2 -Wl,--no-entry -Wl,--import-table -Wl,--import-memory -target wasm32-unknown-unknown -nostdlib + +Python/emscripten_trampoline_wasm.c: Python/emscripten_trampoline_inner.wasm + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/wasm/emscripten/prepare_external_wasm.py $< $@ getWasmTrampolineModule JIT_DEPS = \ $(srcdir)/Tools/jit/*.c \ diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index 75b98a047234d8..d61146504d0959 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -2,75 +2,20 @@ #include // EM_JS, EM_JS_DEPS #include -#include "pycore_runtime.h" // _PyRuntime -typedef int (*CountArgsFunc)(PyCFunctionWithKeywords func); - -// Offset of emscripten_count_args_function in _PyRuntimeState. There's a couple -// of alternatives: -// 1. Just make emscripten_count_args_function a real C global variable instead -// of a field of _PyRuntimeState. This would violate our rule against mutable -// globals. -// 2. #define a preprocessor constant equal to a hard coded number and make a -// _Static_assert(offsetof(_PyRuntimeState, emscripten_count_args_function) -// == OURCONSTANT) This has the disadvantage that we have to update the hard -// coded constant when _PyRuntimeState changes -// -// So putting the mutable constant in _PyRuntime and using a immutable global to -// record the offset so we can access it from JS is probably the best way. -EMSCRIPTEN_KEEPALIVE const int _PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET = offsetof(_PyRuntimeState, emscripten_count_args_function); - -EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { - return Module._PyEM_CountArgsPtr; // initialized below +EM_JS( +PyObject*, +_PyEM_TrampolineCall_inner, (int* success, + PyCFunctionWithKeywords func, + PyObject *arg1, + PyObject *arg2, + PyObject *arg3), { + // JavaScript fallback trampoline + return wasmTable.get(func)(arg1, arg2, arg3); } -// Binary module for the checks. It has to be done in web assembly because -// clang/llvm have no support yet for the reference types yet. In fact, the wasm -// binary toolkit doesn't yet support the ref.test instruction either. To -// convert the following textual wasm to a binary, you can build wabt from this -// branch: https://github.com/WebAssembly/wabt/pull/2529 and then use that -// wat2wasm binary. -// -// (module -// (type $type0 (func (param) (result i32))) -// (type $type1 (func (param i32) (result i32))) -// (type $type2 (func (param i32 i32) (result i32))) -// (type $type3 (func (param i32 i32 i32) (result i32))) -// (type $blocktype (func (param) (result))) -// (table $funcs (import "e" "t") 0 funcref) -// (export "f" (func $f)) -// (func $f (param $fptr i32) (result i32) -// (local $fref funcref) -// local.get $fptr -// table.get $funcs -// local.tee $fref -// ref.test $type3 -// if $blocktype -// i32.const 3 -// return -// end -// local.get $fref -// ref.test $type2 -// if $blocktype -// i32.const 2 -// return -// end -// local.get $fref -// ref.test $type1 -// if $blocktype -// i32.const 1 -// return -// end -// local.get $fref -// ref.test $type0 -// if $blocktype -// i32.const 0 -// return -// end -// i32.const -1 -// ) -// ) - -function getPyEMCountArgsPtr() { +// Try to replace the JS definition of _PyEM_TrampolineCall_inner with a wasm +// version. +(function () { // Starting with iOS 18.3.1, WebKit on iOS has an issue with the garbage // collector that breaks the call trampoline. See #130418 and // https://bugs.webkit.org/show_bug.cgi?id=293113 for details. @@ -82,137 +27,33 @@ function getPyEMCountArgsPtr() { (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1) ); if (isIOS) { - return 0; + return; } - - // Try to initialize countArgsFunc - const code = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, // \0asm magic number - 0x01, 0x00, 0x00, 0x00, // version 1 - 0x01, 0x1a, // Type section, body is 0x1a bytes - 0x05, // 6 entries - 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) - 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) - 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) - 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) - 0x60, 0x00, 0x00, // (type $blocktype (func (param) (result))) - 0x02, 0x09, // Import section, 0x9 byte body - 0x01, // 1 import (table $funcs (import "e" "t") 0 funcref) - 0x01, 0x65, // "e" - 0x01, 0x74, // "t" - 0x01, // importing a table - 0x70, // of entry type funcref - 0x00, 0x00, // table limits: no max, min of 0 - 0x03, 0x02, // Function section - 0x01, 0x01, // We're going to define one function of type 1 (func (param i32) (result i32)) - 0x07, 0x05, // export section - 0x01, // 1 export - 0x01, 0x66, // called "f" - 0x00, // a function - 0x00, // at index 0 - - 0x0a, 56, // Code section, - 0x01, 54, // one entry of length 54 - 0x01, 0x01, 0x70, // one local of type funcref - // Body of the function - 0x20, 0x00, // local.get $fptr - 0x25, 0x00, // table.get $funcs - 0x22, 0x01, // local.tee $fref - 0xfb, 0x14, 0x03, // ref.test $type3 - 0x04, 0x04, // if (type $blocktype) - 0x41, 0x03, // i32.const 3 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x02, // ref.test $type2 - 0x04, 0x04, // if (type $blocktype) - 0x41, 0x02, // i32.const 2 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x01, // ref.test $type1 - 0x04, 0x04, // if (type $blocktype) - 0x41, 0x01, // i32.const 1 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x00, // ref.test $type0 - 0x04, 0x04, // if (type $blocktype) - 0x41, 0x00, // i32.const 0 - 0x0f, // return - 0x0b, // end block - - 0x41, 0x7f, // i32.const -1 - 0x0b // end function - ]); try { - const mod = new WebAssembly.Module(code); - const inst = new WebAssembly.Instance(mod, { e: { t: wasmTable } }); - return addFunction(inst.exports.f); + const trampolineModule = getWasmTrampolineModule(); + const trampolineInstance = new WebAssembly.Instance(trampolineModule, { + env: { __indirect_function_table: wasmTable, memory: wasmMemory }, + }); + _PyEM_TrampolineCall_inner = trampolineInstance.exports.trampoline_call; } catch (e) { - // If something goes wrong, we'll null out _PyEM_CountFuncParams and fall - // back to the JS trampoline. - return 0; + // Compilation error due to missing wasm-gc support, fall back to JS + // trampoline } -} - -addOnPreRun(() => { - const ptr = getPyEMCountArgsPtr(); - Module._PyEM_CountArgsPtr = ptr; - const offset = HEAP32[__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET / 4]; - HEAP32[(__PyRuntime + offset) / 4] = ptr; -}); +})(); ); -void -_Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime) -{ - runtime->emscripten_count_args_function = _PyEM_GetCountArgsPtr(); -} - -// We have to be careful to work correctly with memory snapshots. Even if we are -// loading a memory snapshot, we need to perform the JS initialization work. -// That means we can't call the initialization code from C. Instead, we export -// this function pointer to JS and then fill it in a preRun function which runs -// unconditionally. -/** - * Backwards compatible trampoline works with all JS runtimes - */ -EM_JS(PyObject*, _PyEM_TrampolineCall_JS, (PyCFunctionWithKeywords func, PyObject *arg1, PyObject *arg2, PyObject *arg3), { - return wasmTable.get(func)(arg1, arg2, arg3); -}); - -typedef PyObject* (*zero_arg)(void); -typedef PyObject* (*one_arg)(PyObject*); -typedef PyObject* (*two_arg)(PyObject*, PyObject*); -typedef PyObject* (*three_arg)(PyObject*, PyObject*, PyObject*); - PyObject* _PyEM_TrampolineCall(PyCFunctionWithKeywords func, PyObject* self, PyObject* args, PyObject* kw) { - CountArgsFunc count_args = _PyRuntime.emscripten_count_args_function; - if (count_args == 0) { - return _PyEM_TrampolineCall_JS(func, self, args, kw); - } - switch (count_args(func)) { - case 0: - return ((zero_arg)func)(); - case 1: - return ((one_arg)func)(self); - case 2: - return ((two_arg)func)(self, args); - case 3: - return ((three_arg)func)(self, args, kw); - default: - PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); - return NULL; + int success = 1; + PyObject *result = _PyEM_TrampolineCall_inner(&success, func, self, args, kw); + if (!success) { + PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); } + return result; } #endif diff --git a/Python/emscripten_trampoline_inner.c b/Python/emscripten_trampoline_inner.c new file mode 100644 index 00000000000000..a2bad4857ed089 --- /dev/null +++ b/Python/emscripten_trampoline_inner.c @@ -0,0 +1,38 @@ +// This file must be compiled with -mgc to enable the extra wasm-gc +// instructions. It has to be compiled separately because not enough JS runtimes +// support wasm-gc yet. If the JS runtime does not support wasm-gc (or has buggy +// support like iOS), we will use the JS trampoline fallback. + +// We can't import Python.h here because it is compiled/linked with -nostdlib. +// We don't need to know what's inside PyObject* anyways. We could just call it +// void* everywhere. There are two reasons to do this: +// 1. to improve readability +// 2. eventually when we are comfortable requiring wasm-gc, we can merge this +// into emscripten_trampoline.c without worrying about it. +typedef void PyObject; + +typedef PyObject* (*three_arg)(PyObject*, PyObject*, PyObject*); +typedef PyObject* (*two_arg)(PyObject*, PyObject*); +typedef PyObject* (*one_arg)(PyObject*); +typedef PyObject* (*zero_arg)(void); + +#define TRY_RETURN_CALL(ty, args...) \ + if (__builtin_wasm_test_function_pointer_signature((ty)func)) { \ + return ((ty)func)(args); \ + } + +__attribute__((export_name("trampoline_call"))) PyObject* +trampoline_call(int* success, + void* func, + PyObject* self, + PyObject* args, + PyObject* kw) +{ + *success = 1; + TRY_RETURN_CALL(three_arg, self, args, kw); + TRY_RETURN_CALL(two_arg, self, args); + TRY_RETURN_CALL(one_arg, self); + TRY_RETURN_CALL(zero_arg); + *success = 0; + return 0; +} diff --git a/Python/pystate.c b/Python/pystate.c index 02621f36d91c8a..ed4d50850468b9 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -8,7 +8,6 @@ #include "pycore_codecs.h" // _PyCodec_Fini() #include "pycore_critical_section.h" // _PyCriticalSection_Resume() #include "pycore_dtoa.h" // _dtoa_state_INIT() -#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interpframe.h" // _PyThreadState_HasStackSpace() @@ -434,11 +433,6 @@ init_runtime(_PyRuntimeState *runtime, runtime->main_thread = PyThread_get_thread_ident(); runtime->unicode_state.ids.next_index = unicode_next_index; - -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - _Py_EmscriptenTrampoline_Init(runtime); -#endif - runtime->_initialized = 1; } diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 1e754040eaf1cc..7201e76c43a1d4 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -67,6 +67,7 @@ def clean_lines(text): Python/dynload_hpux.c # dl.h Python/emscripten_signal.c Python/emscripten_syscalls.c +Python/emscripten_trampoline_inner.c Python/thread_pthread.h Python/thread_pthread_stubs.h diff --git a/Tools/wasm/emscripten/prepare_external_wasm.py b/Tools/wasm/emscripten/prepare_external_wasm.py new file mode 100644 index 00000000000000..960e5aefd24eb5 --- /dev/null +++ b/Tools/wasm/emscripten/prepare_external_wasm.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import argparse +import sys +from pathlib import Path + +JS_TEMPLATE = """ +#include "emscripten.h" + +EM_JS(void, {function_name}, (void), {{ + return new WebAssembly.Module(hexStringToUTF8Array("{hex_string}")); +}} +function hexStringToUTF8Array(hex) {{ + const bytes = []; + for (let i = 0; i < hex.length; i += 2) {{ + bytes.push(parseInt(hex.substr(i, 2), 16)); + }} + return new Uint8Array(bytes); +}}); +""" + +def prepare_wasm(input_file, output_file, function_name): + # Read the compiled WASM as binary and convert to hex + wasm_bytes = Path(input_file).read_bytes() + + hex_string = "".join(f"{byte:02x}" for byte in wasm_bytes) + + # Generate JavaScript module + js_content = JS_TEMPLATE.format( + function_name=function_name, hex_string=hex_string + ) + Path(output_file).write_text(js_content) + + print( + f"Successfully compiled {input_file} and generated {output_file}" + ) + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Compile WebAssembly text files using wasm-as" + ) + parser.add_argument("input_file", help="Input .wat file to compile") + parser.add_argument("output_file", help="Output file name") + parser.add_argument("function_name", help="Name of the export function") + + args = parser.parse_args() + + return prepare_wasm(args.input_file, args.output_file, args.function_name) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/configure b/configure index 345822a6c6df76..6383271b477fd1 100755 --- a/configure +++ b/configure @@ -9604,7 +9604,7 @@ fi as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY" - as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback" + as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback" as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" as_fn_append LINKFORSHARED " -sTEXTDECODER=2" @@ -19077,7 +19077,7 @@ PLATFORM_OBJS= case $ac_sys_system in #( Emscripten) : - as_fn_append PLATFORM_OBJS ' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_syscalls.o' + as_fn_append PLATFORM_OBJS ' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_trampoline_wasm.o Python/emscripten_syscalls.o' as_fn_append PLATFORM_HEADERS ' $(srcdir)/Include/internal/pycore_emscripten_signal.h $(srcdir)/Include/internal/pycore_emscripten_trampoline.h' ;; #( *) : diff --git a/configure.ac b/configure.ac index d6059471771871..42d94776cc7ced 100644 --- a/configure.ac +++ b/configure.ac @@ -2336,7 +2336,7 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY"]) - AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) dnl Avoid bugs in JS fallback string decoding path AS_VAR_APPEND([LINKFORSHARED], [" -sTEXTDECODER=2"]) @@ -5131,7 +5131,7 @@ PLATFORM_OBJS= AS_CASE([$ac_sys_system], [Emscripten], [ - AS_VAR_APPEND([PLATFORM_OBJS], [' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_syscalls.o']) + AS_VAR_APPEND([PLATFORM_OBJS], [' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_trampoline_wasm.o Python/emscripten_syscalls.o']) AS_VAR_APPEND([PLATFORM_HEADERS], [' $(srcdir)/Include/internal/pycore_emscripten_signal.h $(srcdir)/Include/internal/pycore_emscripten_trampoline.h']) ], )