Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plug leaking function_records in cpp_function initialization in case of exceptions (found by Valgrind in #2746) #2756

Merged
merged 9 commits into from
Jan 14, 2021
98 changes: 74 additions & 24 deletions include/pybind11/pybind11.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,16 @@ class cpp_function : public function {
object name() const { return attr("__name__"); }

protected:
struct InitializingFunctionRecordDeleter {
// `destruct(function_record, false)`: `initialize_generic` copies strings and
// takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`.
void operator()(detail::function_record * rec) { destruct(rec, false); }
};
using unique_function_record = std::unique_ptr<detail::function_record, InitializingFunctionRecordDeleter>;

/// Space optimization: don't inline this frequently instantiated fragment
PYBIND11_NOINLINE detail::function_record *make_function_record() {
return new detail::function_record();
PYBIND11_NOINLINE unique_function_record make_function_record() {
return unique_function_record(new detail::function_record());
}

/// Special internal constructor for functors, lambda functions, etc.
Expand All @@ -126,7 +133,9 @@ class cpp_function : public function {
struct capture { remove_reference_t<Func> f; };

/* Store the function including any extra state it might have (e.g. a lambda capture object) */
auto rec = make_function_record();
// The unique_ptr makes sure nothing is leaked in case of an exception.
auto unique_rec = make_function_record();
YannickJadoul marked this conversation as resolved.
Show resolved Hide resolved
auto rec = unique_rec.get();

/* Store the capture object directly in the function record if there is enough space */
if (sizeof(capture) <= sizeof(rec->data)) {
Expand Down Expand Up @@ -207,7 +216,8 @@ class cpp_function : public function {
PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();

/* Register the function with Python from generic (non-templated) code */
initialize_generic(rec, signature.text, types.data(), sizeof...(Args));
// Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid.
YannickJadoul marked this conversation as resolved.
Show resolved Hide resolved
initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args));

if (cast_in::has_args) rec->has_args = true;
if (cast_in::has_kwargs) rec->has_kwargs = true;
Expand All @@ -223,20 +233,51 @@ class cpp_function : public function {
}
}

// Utility class that keeps track of all duplicated strings, and cleans them up in its destructor,
// unless they are released. Basically a RAII-solution to deal with exceptions along the way.
class strdup_guard {
public:
~strdup_guard() {
for (auto s : strings)
std::free(s);
}
char *operator()(const char *s) {
auto t = strdup(s);
strings.push_back(t);
return t;
}
void release() {
strings.clear();
YannickJadoul marked this conversation as resolved.
Show resolved Hide resolved
}
private:
std::vector<char *> strings;
};

/// Register a function call with Python (generic non-templated code goes here)
void initialize_generic(detail::function_record *rec, const char *text,
void initialize_generic(unique_function_record &&unique_rec, const char *text,
YannickJadoul marked this conversation as resolved.
Show resolved Hide resolved
const std::type_info *const *types, size_t args) {
// Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr,
// we do not want this to destuct the pointer. `initialize` (the caller) still relies on the
// pointee being alive after this call. Only move out if a `capsule` is going to keep it alive.
auto rec = unique_rec.get();

// Keep track of strdup'ed strings, and clean them up as long as the function's capsule
// has not taken ownership yet (when `unique_rec.release()` is called).
// Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings
// are only referenced before strdup'ing. So only *after* the following block could `destruct`
// safely be called, but even then, `repr` could still throw in the middle of copying all strings.
strdup_guard guarded_strdup;

/* Create copies of all referenced C-style strings */
rec->name = strdup(rec->name ? rec->name : "");
if (rec->doc) rec->doc = strdup(rec->doc);
rec->name = guarded_strdup(rec->name ? rec->name : "");
if (rec->doc) rec->doc = guarded_strdup(rec->doc);
for (auto &a: rec->args) {
if (a.name)
a.name = strdup(a.name);
a.name = guarded_strdup(a.name);
if (a.descr)
a.descr = strdup(a.descr);
a.descr = guarded_strdup(a.descr);
else if (a.value)
a.descr = strdup(repr(a.value).cast<std::string>().c_str());
a.descr = guarded_strdup(repr(a.value).cast<std::string>().c_str());
}

rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__");
Expand Down Expand Up @@ -319,13 +360,13 @@ class cpp_function : public function {
#if PY_MAJOR_VERSION < 3
if (strcmp(rec->name, "__next__") == 0) {
std::free(rec->name);
rec->name = strdup("next");
rec->name = guarded_strdup("next");
} else if (strcmp(rec->name, "__bool__") == 0) {
std::free(rec->name);
rec->name = strdup("__nonzero__");
rec->name = guarded_strdup("__nonzero__");
}
#endif
rec->signature = strdup(signature.c_str());
rec->signature = guarded_strdup(signature.c_str());
rec->args.shrink_to_fit();
rec->nargs = (std::uint16_t) args;

Expand Down Expand Up @@ -356,9 +397,10 @@ class cpp_function : public function {
rec->def->ml_meth = reinterpret_cast<PyCFunction>(reinterpret_cast<void (*) (void)>(*dispatcher));
rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;

capsule rec_capsule(rec, [](void *ptr) {
capsule rec_capsule(unique_rec.release(), [](void *ptr) {
destruct((detail::function_record *) ptr);
});
guarded_strdup.release();

object scope_module;
if (rec->scope) {
Expand Down Expand Up @@ -393,13 +435,15 @@ class cpp_function : public function {
chain_start = rec;
rec->next = chain;
auto rec_capsule = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
rec_capsule.set_pointer(rec);
rec_capsule.set_pointer(unique_rec.release());
guarded_strdup.release();
} else {
// Or end of chain (normal behavior)
chain_start = chain;
while (chain->next)
chain = chain->next;
chain->next = rec;
chain->next = unique_rec.release();
guarded_strdup.release();
}
}

Expand Down Expand Up @@ -452,7 +496,7 @@ class cpp_function : public function {
}

/// When a cpp_function is GCed, release any memory allocated by pybind11
static void destruct(detail::function_record *rec) {
static void destruct(detail::function_record *rec, bool free_strings = true) {
// If on Python 3.9, check the interpreter "MICRO" (patch) version.
// If this is running on 3.9.0, we have to work around a bug.
#if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
Expand All @@ -463,14 +507,20 @@ class cpp_function : public function {
detail::function_record *next = rec->next;
if (rec->free_data)
rec->free_data(rec);
std::free((char *) rec->name);
std::free((char *) rec->doc);
std::free((char *) rec->signature);
for (auto &arg: rec->args) {
std::free(const_cast<char *>(arg.name));
std::free(const_cast<char *>(arg.descr));
arg.value.dec_ref();
// During initialization, these strings might not have been copied yet,
// so they cannot be freed. Once the function has been created, they can.
// Check `make_function_record` for more details.
if (free_strings) {
std::free((char *) rec->name);
std::free((char *) rec->doc);
std::free((char *) rec->signature);
for (auto &arg: rec->args) {
std::free(const_cast<char *>(arg.name));
std::free(const_cast<char *>(arg.descr));
}
}
for (auto &arg: rec->args)
arg.value.dec_ref();
if (rec->def) {
std::free(const_cast<char *>(rec->def->ml_doc));
// Python 3.9.0 decref's these in the wrong order; rec->def
Expand Down
15 changes: 15 additions & 0 deletions tests/test_constants_and_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,19 @@ TEST_SUBMODULE(constants_and_functions, m) {
m.def("f2", f2);
m.def("f3", f3);
m.def("f4", f4);

// test_function_record_leaks
struct LargeCapture {
// This should always be enough to trigger the alternative branch
// where `sizeof(capture) > sizeof(rec->data)`
uint64_t zeros[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
};
m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) {
LargeCapture capture; // VS 2015's MSVC is acting up if we create the array here
m.def("should_raise", [capture](int) { return capture.zeros[9] + 33; }, py::kw_only(), py::arg());
});
m.def("register_with_raising_repr", [](py::module_ m, py::object default_value) {
m.def("should_raise", [](int, int, py::object) { return 42; }, "some docstring",
py::arg_v("x", 42), py::arg_v("y", 42, "<the answer>"), py::arg_v("z", default_value));
});
}
11 changes: 11 additions & 0 deletions tests/test_constants_and_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,14 @@ def test_exception_specifiers():
assert m.f2(53) == 55
assert m.f3(86) == 89
assert m.f4(140) == 144


def test_function_record_leaks():
class RaisingRepr:
def __repr__(self):
raise RuntimeError("Surprise!")

with pytest.raises(RuntimeError):
m.register_large_capture_with_invalid_arguments(m)
with pytest.raises(RuntimeError):
m.register_with_raising_repr(m, RaisingRepr())