-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Destruct internals during interpreter finalization #5958
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6f8308b
50c319d
cb5a5a5
f5676ea
d12be34
959b1e1
717e99f
5f9c30b
2848fd6
75bd292
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -103,7 +103,7 @@ class thread_specific_storage { | |
| // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call | ||
| // requires a living Python interpreter. | ||
| #ifdef GRAALVM_PYTHON | ||
| if (!Py_IsInitialized() || _Py_IsFinalizing()) { | ||
| if (Py_IsInitialized() == 0 || _Py_IsFinalizing() != 0) { | ||
| return; | ||
| } | ||
| #endif | ||
|
|
@@ -195,6 +195,14 @@ struct override_hash { | |
|
|
||
| using instance_map = std::unordered_multimap<const void *, instance *>; | ||
|
|
||
| inline bool is_interpreter_alive() { | ||
| #if PY_VERSION_HEX < 0x030D0000 | ||
| return Py_IsInitialized() != 0 || _Py_IsFinalizing() != 0; | ||
| #else | ||
| return Py_IsInitialized() != 0 || Py_IsFinalizing() != 0; | ||
| #endif | ||
| } | ||
|
|
||
| #ifdef Py_GIL_DISABLED | ||
| // Wrapper around PyMutex to provide BasicLockable semantics | ||
| class pymutex { | ||
|
|
@@ -308,7 +316,17 @@ struct internals { | |
| internals(internals &&other) = delete; | ||
| internals &operator=(const internals &other) = delete; | ||
| internals &operator=(internals &&other) = delete; | ||
| ~internals() = default; | ||
| ~internals() { | ||
| // Normally this destructor runs during interpreter finalization and it may DECREF things. | ||
| // In odd finalization scenarios it might end up running after the interpreter has | ||
| // completely shut down, In that case, we should not decref these objects because pymalloc | ||
| // is gone. | ||
| if (is_interpreter_alive()) { | ||
| Py_CLEAR(instance_base); | ||
| Py_CLEAR(default_metaclass); | ||
| Py_CLEAR(static_property_type); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| // the internals struct (above) is shared between all the modules. local_internals are only | ||
|
|
@@ -325,6 +343,16 @@ struct local_internals { | |
|
|
||
| std::forward_list<ExceptionTranslator> registered_exception_translators; | ||
| PyTypeObject *function_record_py_type = nullptr; | ||
|
|
||
| ~local_internals() { | ||
| // Normally this destructor runs during interpreter finalization and it may DECREF things. | ||
| // In odd finalization scenarios it might end up running after the interpreter has | ||
| // completely shut down, In that case, we should not decref these objects because pymalloc | ||
| // is gone. | ||
| if (is_interpreter_alive()) { | ||
| Py_CLEAR(function_record_py_type); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| enum class holder_enum_t : uint8_t { | ||
|
|
@@ -569,7 +597,7 @@ inline object get_python_state_dict() { | |
| // The bool follows std::map::insert convention: true = created, false = existed. | ||
| template <typename Payload> | ||
| std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key, | ||
| bool clear_destructor = false) { | ||
| void (*dtor)(PyObject *) = nullptr) { | ||
| error_scope err_scope; // preserve any existing Python error states | ||
|
|
||
| auto state_dict = reinterpret_borrow<dict>(get_python_state_dict()); | ||
|
|
@@ -586,16 +614,13 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key, | |
| // Use unique_ptr for exception safety: if capsule creation throws, the storage is | ||
| // automatically deleted. | ||
| auto storage_ptr = std::unique_ptr<Payload>(new Payload{}); | ||
| // Create capsule with destructor to clean up when the interpreter shuts down. | ||
| auto new_capsule = capsule( | ||
| storage_ptr.get(), | ||
| // The destructor will be called when the capsule is GC'ed. | ||
| // - If our capsule is inserted into the dict below, it will be kept alive until | ||
| // interpreter shutdown, so the destructor will be called at that time. | ||
| // - If our capsule is NOT inserted (another thread inserted first), it will be | ||
| // destructed when going out of scope here, so the destructor will be called | ||
| // immediately, which will also free the storage. | ||
| /*destructor=*/[](void *ptr) -> void { delete static_cast<Payload *>(ptr); }); | ||
| auto new_capsule | ||
| = capsule(storage_ptr.get(), | ||
| // The destructor will be called when the capsule is GC'ed. | ||
| // If the insert below fails (entry already in the dict), then this | ||
| // destructor will be called on the newly created capsule at the end of this | ||
| // function, and we want to just release this memory. | ||
| /*destructor=*/[](void *v) { delete static_cast<Payload *>(v); }); | ||
| // At this point, the capsule object is created successfully. | ||
| // Release the unique_ptr and let the capsule object own the storage to avoid double-free. | ||
| (void) storage_ptr.release(); | ||
|
|
@@ -613,17 +638,16 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key, | |
| throw error_already_set(); | ||
| } | ||
| created = (capsule_obj == new_capsule.ptr()); | ||
| if (clear_destructor && created) { | ||
| // Our capsule was inserted. | ||
| // Remove the destructor to leak the storage on interpreter shutdown. | ||
| if (PyCapsule_SetDestructor(capsule_obj, nullptr) < 0) { | ||
| // - If key already existed, our `new_capsule` is not inserted, it will be destructed when | ||
| // going out of scope here, and will call the destructor set above. | ||
| // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state | ||
| // dict will incref it. We need to set the caller's destructor on it, which will be | ||
| // called when the interpreter shuts down. | ||
| if (created && dtor) { | ||
| if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) { | ||
| throw error_already_set(); | ||
| } | ||
| } | ||
| // - If key already existed, our `new_capsule` is not inserted, it will be destructed when | ||
| // going out of scope here, which will also free the storage. | ||
| // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state | ||
| // dict will incref it. | ||
| } | ||
|
|
||
| // Get the storage pointer from the capsule. | ||
|
|
@@ -707,14 +731,27 @@ class internals_pp_manager { | |
| internals_pp_manager(char const *id, on_fetch_function *on_fetch) | ||
| : holder_id_(id), on_fetch_(on_fetch) {} | ||
|
|
||
| static void internals_shutdown(PyObject *capsule) { | ||
| auto *pp = static_cast<std::unique_ptr<InternalsType> *>( | ||
| PyCapsule_GetPointer(capsule, nullptr)); | ||
| if (pp) { | ||
| pp->reset(); | ||
| } | ||
| // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here. | ||
| // The pp_manager in this module (and possibly other modules sharing internals) holds | ||
| // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now. | ||
| // | ||
| // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is | ||
| // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the | ||
| // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension | ||
| // loaded into an external interpreter), destroy() is never called and the unique_ptr | ||
| // shell (8 bytes, not its contents) is leaked. | ||
| // (See PR #5958 for ideas to eliminate this leak.) | ||
| } | ||
|
|
||
| std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() { | ||
| // The `unique_ptr<InternalsType>` output is leaked on interpreter shutdown. Once an | ||
| // instance is created, it will never be deleted until the process exits (compare to | ||
| // interpreter shutdown in multiple-interpreter scenarios). | ||
| // Because we cannot guarantee the order of destruction of capsules in the interpreter | ||
| // state dict, leaking avoids potential use-after-free issues during interpreter shutdown. | ||
|
Comment on lines
-711
to
-715
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Freeing the unique_ptr content is causing use-after-free segmentation faults. Because the order of GC is not guaranteed. https://github.com/metaopt/optree/actions/runs/21203776041/job/60995252956?pr=245#step:18:229 libc++abi: terminating due to uncaught exception of type pybind11::cast_error: Unable to cast Python instance of type <class 'optree.PyTreeSpec'> to C++ type '?' #7 0x00007fb9cc4a5a55 in std::terminate() () from /lib/x86_64-linux-gnu/libstdc++.so.6
No symbol table info available.
#8 0x00007fb9cc4bb391 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6
No symbol table info available.
#9 0x00007fb9cc7b3990 in pybind11::detail::load_type<optree::PyTreeSpec, void> (conv=...,
handle=...) at /usr/include/c++/13/bits/allocator.h:184
No locals.
#10 0x00007fb9cc7cdfca in pybind11::detail::load_type<optree::PyTreeSpec&> (handle=...)
at /tmp/pip-build-env-5okmu3ac/overlay/lib/python3.13t/site-packages/pybind11/include/pybind11/cast.h:1655
conv = {<pybind11::detail::type_caster_base<optree::PyTreeSpec>> = {<pybind11::detail::type_caster_generic> = {typeinfo = 0x0, cpptype = 0x7fb9cc83a580 <typeinfo for optree::PyTreeSpec>,
value = 0x0}, static name = {text = "%"}}, <No data fields>}
#11 0x00007fb9cc7cdffb in pybind11::cast<optree::PyTreeSpec&, 0> (handle=...)
at /tmp/pip-build-env-5okmu3ac/overlay/lib/python3.13t/site-packages/pybind11/include/pybind11/cast.h:1680
is_enum_cast = false
#12 0x00007fb9cc7ce04d in thread_safe_cast<optree::PyTreeSpec&> (handle=...)
at /home/runner/work/optree/optree/include/optree/synchronization.h:182
No locals.
#13 0x00007fb9cc7f5876 in optree::PyTreeSpec::PyTpTraverse (self_base=0x20002ce0630,
visit=0x7fb9ceba61c2 <visit_decref>, arg=0x0)
at /home/runner/work/optree/optree/src/treespec/gc.cpp:40
self = <optimized out>
__PRETTY_FUNCTION__ = "static int optree::PyTreeSpec::PyTpTraverse(PyObject*, visitproc, void*)"See also: https://github.com/pybind/pybind11/blob/master/docs/advanced/classes.rst#custom-type-setup
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @XuehaiPan could you help by extracting a reproducer from your situation? That'll be worth gold. Without reproducers these mind-bending issues will just keep coming back.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Running this same stack with My guess is that the This is not a use-after-free segv, this is an unhandled/uncaught exception.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have found that a version bump is necessary for the internal version (used in the key in the interpreter state dict). In [1]: import env
In [2]: from pybind11_tests import custom_type_setup as m
In [3]: import torch
In [4]: obj = m.OwnsPythonObjects()
In [5]: obj.value = obj
In [6]: exit
[1] 85789 segmentation fault ipython
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is that
Each interpreter must not have more than two Here is my patch: diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h
index e2dee77e..26cc8cb7 100644
--- a/include/pybind11/detail/internals.h
+++ b/include/pybind11/detail/internals.h
@@ -20,6 +20,7 @@
#include <atomic>
#include <cstdint>
#include <exception>
+#include <fstream>
#include <limits>
#include <mutex>
#include <thread>
@@ -294,6 +295,11 @@ struct internals {
internals()
: static_property_type(make_static_property_type()),
default_metaclass(make_default_metaclass()) {
+ // Debug logging
+ {
+ std::ofstream dbg("debug.txt", std::ios::app);
+ dbg << "internals::internals() ctor called, this=" << this << "\n";
+ }
tstate.set(nullptr); // See PR #5870
PyThreadState *cur_tstate = PyThreadState_Get();
@@ -317,6 +323,11 @@ struct internals {
internals &operator=(const internals &other) = delete;
internals &operator=(internals &&other) = delete;
~internals() {
+ // Debug logging
+ {
+ std::ofstream dbg("debug.txt", std::ios::app);
+ dbg << "internals::~internals() dtor called, this=" << this << "\n";
+ }
// Normally this destructor runs during interpreter finalization and it may DECREF things.
// In odd finalization scenarios it might end up running after the interpreter has
// completely shut down, In that case, we should not decref these objects because pymallocOutput: internals::internals() ctor called, this=0xca4c508c0 # Original created
internals::~internals() dtor called, this=0xca4c508c0 # Destroyed by pp->reset()
internals::internals() ctor called, this=0xca1e3f480 # NEW empty one created!
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is expected behavior of this change whenever something accesses We could do a We could decide to back out some of this change and stay with a slightly more leaky behavior of pybind11 internals. We did not address all of the leaks yet, |
||
| auto result = atomic_get_or_create_in_state_dict<std::unique_ptr<InternalsType>>( | ||
| holder_id_, /*clear_destructor=*/true); | ||
| holder_id_, &internals_shutdown); | ||
| auto *pp = result.first; | ||
| bool created = result.second; | ||
| // Only call on_fetch_ when fetching existing internals, not when creating new ones. | ||
|
|
@@ -834,6 +871,17 @@ inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) { | |
| return cb(internals); | ||
| } | ||
|
|
||
| template <typename F> | ||
| inline void with_internals_if_internals(const F &cb) { | ||
| auto &ppmgr = get_internals_pp_manager(); | ||
| auto &internals_ptr = *ppmgr.get_pp(); | ||
| if (internals_ptr) { | ||
| auto &internals = *internals_ptr; | ||
| PYBIND11_LOCK_INTERNALS(internals); | ||
| cb(internals); | ||
| } | ||
| } | ||
|
|
||
| template <typename F> | ||
| inline auto with_exception_translators(const F &cb) | ||
| -> decltype(cb(get_internals().registered_exception_translators, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am sad to report that (AFAICT) this is still not quite correct.
Py_IsInitializedandPy_IsFinalizingcheck whether the whole runtime is initialized/finalizing, not whether a specific interpreter is. It can be invalid to DECREF an object from a particular interpreter even though the runtime is still active.Py_IsFinalizingremains true even afterPy_Finalizereturns. It does not reset to false unlessPy_Initializeis later called again.I think the correct solution is a
internals::leak_detach()method which will set the relevant members to NULL, without decref'ing them, so that a later destructor invocation won't call into the CPython API. Theninternals_pp_manager::destroy()can call that method if the internals_pp still contains a valid pointer, before destroying it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll point Cursor to this, the pybind11 sources, and the CPython 3.14 sources, and ask it to look very thoroughly to figure out what is the best achievable solution in terms of avoiding UB but leaking as little as possible. I'll report here what it finds.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
d'ooh, I should've caught this before merging it. I just opened #5965 for a possible solution to this by checking the current interpreter instead of the "is alive" check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All details and the results are posted here: PR #5966
@b-pass please take what's useful for your PR #5965 (I haven't looked there yet)