From 3334e29668ad4570f73cb9e8d7b0d6bae2a50ef5 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 5 Oct 2025 17:37:42 +0200 Subject: [PATCH 1/3] gh-139400: Make sure that parent parsers outlive their subparsers in `pyexpat` (#139403) * Modules/pyexpat.c: Disallow collection of in-use parent parsers. Within libexpat, a parser created via `XML_ExternalEntityParserCreate` is relying on its parent parser throughout its entire lifetime. Prior to this fix, is was possible for the parent parser to be garbage-collected too early. (cherry picked from commit 6edb2ddb5f3695cf4938979d645f31d7fba43ec8) --- Lib/test/test_pyexpat.py | 36 +++++++++++++++++++ ...-09-29-00-01-28.gh-issue-139400.X2T-jO.rst | 4 +++ Modules/pyexpat.c | 27 +++++++++++++- 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 5212c7a704725e..4e269d4ad3945c 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -730,6 +730,42 @@ def resolve_entity(context, base, system_id, public_id): self.assertEqual(handler_call_args, [("bar", "baz")]) +class ParentParserLifetimeTest(unittest.TestCase): + """ + Subparsers make use of their parent XML_Parser inside of Expat. + As a result, parent parsers need to outlive subparsers. + + See https://github.com/python/cpython/issues/139400. + """ + + def test_parent_parser_outlives_its_subparsers__single(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parser + # while it's still being referenced by a related subparser. + del parser + + def test_parent_parser_outlives_its_subparsers__multiple(self): + parser = expat.ParserCreate() + subparser_one = parser.ExternalEntityParserCreate(None) + subparser_two = parser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parser + # while it's still being referenced by a related subparser. + del parser + + def test_parent_parser_outlives_its_subparsers__chain(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + subsubparser = subparser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parsers + # while they are still being referenced by a related subparser. + del parser + del subparser + + class ReparseDeferralTest(unittest.TestCase): def test_getter_setter_round_trip(self): parser = expat.ParserCreate() diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst b/Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst new file mode 100644 index 00000000000000..a5dea3b5f8147a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst @@ -0,0 +1,4 @@ +:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only +garbage-collected once they are no longer referenced by subparsers created +by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`. +Patch by Sebastian Pipping. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index e0c3e100d51d57..426a951d92998b 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -58,6 +58,15 @@ typedef struct { PyObject_HEAD XML_Parser itself; + /* + * Strong reference to a parent `xmlparseobject` if this parser + * is a child parser. Set to NULL if this parser is a root parser. + * This is needed to keep the parent parser alive as long as it has + * at least one child parser. + * + * See https://github.com/python/cpython/issues/139400 for details. + */ + PyObject *parent; int ordered_attributes; /* Return attributes as a list. */ int specified_attributes; /* Report only specified attributes. */ int in_callback; /* Is a callback active? */ @@ -967,6 +976,12 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype); if (new_parser == NULL) return NULL; + + // The new subparser will make use of the parent XML_Parser inside of Expat. + // So we need to take subparsers into account with the reference counting + // of their parent parser. + Py_INCREF(self); + new_parser->buffer_size = self->buffer_size; new_parser->buffer_used = 0; new_parser->buffer = NULL; @@ -976,6 +991,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->ns_prefixes = self->ns_prefixes; new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, encoding); + new_parser->parent = (PyObject *)self; new_parser->handlers = 0; new_parser->intern = self->intern; Py_XINCREF(new_parser->intern); @@ -984,11 +1000,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); if (new_parser->buffer == NULL) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } } if (!new_parser->itself) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } @@ -1001,6 +1019,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->handlers = PyMem_New(PyObject *, i); if (!new_parser->handlers) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } clear_handlers(new_parser, 1); @@ -1175,6 +1194,7 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, namespace_separator); + self->parent = NULL; if (self->itself == NULL) { PyErr_SetString(PyExc_RuntimeError, "XML_ParserCreate failed"); @@ -1204,7 +1224,6 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec return (PyObject*)self; } - static void xmlparse_dealloc(xmlparseobject *self) { @@ -1213,6 +1232,7 @@ xmlparse_dealloc(xmlparseobject *self) if (self->itself != NULL) XML_ParserFree(self->itself); self->itself = NULL; + Py_CLEAR(self->parent); if (self->handlers != NULL) { for (i = 0; handler_info[i].name != NULL; i++) @@ -1499,6 +1519,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) int i; for (i = 0; handler_info[i].name != NULL; i++) Py_VISIT(op->handlers[i]); + Py_VISIT(op->parent); return 0; } @@ -1507,6 +1528,10 @@ xmlparse_clear(xmlparseobject *op) { clear_handlers(op, 0); Py_CLEAR(op->intern); + // NOTE: We cannot call Py_CLEAR(op->parent) prior to calling + // XML_ParserFree(op->itself), or a subparser could lose its parent + // XML_Parser while still making use of it internally. + // https://github.com/python/cpython/issues/139400 return 0; } From f72a0886cf13949df2ca146a88f0ac8e20e04a74 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 5 Oct 2025 18:47:28 +0200 Subject: [PATCH 2/3] Restore blank line, deletion was not intended --- Modules/pyexpat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 426a951d92998b..50d74c65ba0be6 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1224,6 +1224,7 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec return (PyObject*)self; } + static void xmlparse_dealloc(xmlparseobject *self) { From 33b6a7f4d0ddd6101b4eb36b9c12df9e75cd6aba Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 6 Oct 2025 00:53:34 +0200 Subject: [PATCH 3/3] Move news item to from section "Core and Builtins" to section "Security" --- .../2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core and Builtins => Security}/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst (100%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst b/Misc/NEWS.d/next/Security/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst rename to Misc/NEWS.d/next/Security/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst