diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h index df12ae440f024b..9fd7cc0cb43931 100644 --- a/Include/cpython/sysmodule.h +++ b/Include/cpython/sysmodule.h @@ -21,3 +21,6 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( unsigned int code_size, const char *entry_name); PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 1717ec4f41c36b..072bbcda0c3c82 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -55,6 +55,7 @@ struct _ceval_runtime_state { struct code_arena_st *code_arena; struct trampoline_api_st trampoline_api; FILE *map_file; + Py_ssize_t persist_after_fork; #else int _not_used; #endif @@ -68,6 +69,7 @@ struct _ceval_runtime_state { { \ .status = PERF_STATUS_NO_INIT, \ .extra_code_index = -1, \ + .persist_after_fork = 0, \ } #else # define _PyEval_RUNTIME_PERF_INIT {0} diff --git a/Include/sysmodule.h b/Include/sysmodule.h index 7406513ec1439a..7b14f72ee2e494 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -1,6 +1,3 @@ - -/* System module interface */ - #ifndef Py_SYSMODULE_H #define Py_SYSMODULE_H #ifdef __cplusplus diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index fe8707a156e9dc..040be63da11447 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -353,6 +353,82 @@ def baz(n): self.assertNotIn(f"py::bar:{script}", stdout) self.assertNotIn(f"py::baz:{script}", stdout) + def test_pre_fork_compile(self): + code = """if 1: + import sys + import os + import sysconfig + from _testinternalcapi import ( + compile_perf_trampoline_entry, + perf_trampoline_set_persist_after_fork, + ) + + def foo_fork(): + pass + + def bar_fork(): + foo_fork() + + def foo(): + pass + + def bar(): + foo() + + def compile_trampolines_for_all_functions(): + perf_trampoline_set_persist_after_fork(1) + for _, obj in globals().items(): + if callable(obj) and hasattr(obj, '__code__'): + compile_perf_trampoline_entry(obj.__code__) + + if __name__ == "__main__": + compile_trampolines_for_all_functions() + pid = os.fork() + if pid == 0: + print(os.getpid()) + bar_fork() + else: + bar() + """ + + with temp_dir() as script_dir: + script = make_script(script_dir, "perftest", code) + with subprocess.Popen( + [sys.executable, "-Xperf", script], + universal_newlines=True, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) as process: + stdout, stderr = process.communicate() + + self.assertEqual(process.returncode, 0) + self.assertNotIn("Error:", stderr) + child_pid = int(stdout.strip()) + perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map") + perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map") + self.assertTrue(perf_file.exists()) + self.assertTrue(perf_child_file.exists()) + + perf_file_contents = perf_file.read_text() + self.assertIn(f"py::foo:{script}", perf_file_contents) + self.assertIn(f"py::bar:{script}", perf_file_contents) + self.assertIn(f"py::foo_fork:{script}", perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", perf_file_contents) + + child_perf_file_contents = perf_child_file.read_text() + self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents) + + # Pre-compiled perf-map entries of a forked process must be + # identical in both the parent and child perf-map files. + perf_file_lines = perf_file_contents.split("\n") + for line in perf_file_lines: + if ( + f"py::foo_fork:{script}" in line + or f"py::bar_fork:{script}" in line + ): + self.assertIn(line, child_perf_file_contents) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst new file mode 100644 index 00000000000000..c6fa24f024c20c --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst @@ -0,0 +1,2 @@ +Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and +:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4ead1b6bea7fae..1869f48c2b1fbf 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1556,6 +1556,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module) Py_RETURN_NONE; } +static PyObject * +compile_perf_trampoline_entry(PyObject *self, PyObject *args) +{ + PyObject *co; + if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) { + return NULL; + } + int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co); + if (ret != 0) { + PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline"); + return NULL; + } + return PyLong_FromLong(ret); +} + +static PyObject * +perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args) +{ + int enable; + if (!PyArg_ParseTuple(args, "i", &enable)) { + return NULL; + } + int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable); + if (ret == 0) { + PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork"); + return NULL; + } + return PyLong_FromLong(ret); +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -1613,6 +1643,8 @@ static PyMethodDef module_functions[] = { {"run_in_subinterp_with_config", _PyCFunction_CAST(run_in_subinterp_with_config), METH_VARARGS | METH_KEYWORDS}, + {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS}, + {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS}, _TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {NULL, NULL} /* sentinel */ diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 209a23b6c1cbc7..491223924ed7f2 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t; #define perf_code_arena _PyRuntime.ceval.perf.code_arena #define trampoline_api _PyRuntime.ceval.perf.trampoline_api #define perf_map_file _PyRuntime.ceval.perf.map_file - +#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork static void perf_map_write_entry(void *state, const void *code_addr, @@ -361,6 +361,26 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, } #endif // PY_HAVE_PERF_TRAMPOLINE +int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) +{ +#ifdef PY_HAVE_PERF_TRAMPOLINE + py_trampoline f = NULL; + assert(extra_code_index != -1); + int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); + if (ret != 0 || f == NULL) { + py_trampoline new_trampoline = compile_trampoline(); + if (new_trampoline == NULL) { + return 0; + } + trampoline_api.write_state(trampoline_api.state, new_trampoline, + perf_code_arena->code_size, co); + return _PyCode_SetExtra((PyObject *)co, extra_code_index, + (void *)new_trampoline); + } +#endif // PY_HAVE_PERF_TRAMPOLINE + return 0; +} + int _PyIsPerfTrampolineActive(void) { @@ -448,16 +468,34 @@ _PyPerfTrampoline_Fini(void) return 0; } +int +PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){ +#ifdef PY_HAVE_PERF_TRAMPOLINE + persist_after_fork = enable; + return persist_after_fork; +#endif + return 0; +} + PyStatus _PyPerfTrampoline_AfterFork_Child(void) { #ifdef PY_HAVE_PERF_TRAMPOLINE - // Restart trampoline in file in child. - int was_active = _PyIsPerfTrampolineActive(); - _PyPerfTrampoline_Fini(); PyUnstable_PerfMapState_Fini(); - if (was_active) { - _PyPerfTrampoline_Init(1); + if (persist_after_fork) { + char filename[256]; + pid_t parent_pid = getppid(); + snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); + if (PyUnstable_CopyPerfMapFile(filename) != 0) { + return PyStatus_Error("Failed to copy perf map file."); + } + } else { + // Restart trampoline in file in child. + int was_active = _PyIsPerfTrampolineActive(); + _PyPerfTrampoline_Fini(); + if (was_active) { + _PyPerfTrampoline_Init(1); + } } #endif return PyStatus_Ok(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 3debe7f7c139c6..4008a28ad7bd8a 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2361,7 +2361,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( #ifndef MS_WINDOWS if (perf_map_state.perf_map == NULL) { int ret = PyUnstable_PerfMapState_Init(); - if(ret != 0){ + if (ret != 0){ return ret; } } @@ -2388,6 +2388,45 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) { #endif } +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) { +#ifndef MS_WINDOWS + FILE* from = fopen(parent_filename, "r"); + if (!from) { + return -1; + } + if (perf_map_state.perf_map == NULL) { + int ret = PyUnstable_PerfMapState_Init(); + if (ret != 0) { + return ret; + } + } + char buf[4096]; + PyThread_acquire_lock(perf_map_state.map_lock, 1); + int fflush_result = 0, result = 0; + while (1) { + size_t bytes_read = fread(buf, 1, sizeof(buf), from); + size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map); + fflush_result = fflush(perf_map_state.perf_map); + if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) { + result = -1; + goto close_and_release; + } + if (bytes_read < sizeof(buf) && feof(from)) { + goto close_and_release; + } + } +close_and_release: + fclose(from); + PyThread_release_lock(perf_map_state.map_lock); + return result; +#endif + return 0; +} + +#ifdef __cplusplus +} +#endif + static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */