From 5e04f8ac73c42f6afa33a19a8dcf30b219c76293 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 25 Oct 2025 21:26:18 +0200 Subject: [PATCH 1/7] pyexpat: Add news item for issue 140593 --- .../Library/2025-10-25-21-26-16.gh-issue-140593.OxlLc9.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-10-25-21-26-16.gh-issue-140593.OxlLc9.rst diff --git a/Misc/NEWS.d/next/Library/2025-10-25-21-26-16.gh-issue-140593.OxlLc9.rst b/Misc/NEWS.d/next/Library/2025-10-25-21-26-16.gh-issue-140593.OxlLc9.rst new file mode 100644 index 00000000000000..612ad82dc64309 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-25-21-26-16.gh-issue-140593.OxlLc9.rst @@ -0,0 +1,3 @@ +:mod:`xml.parsers.expat`: Fix a memory leak that could affect users with +:meth:`~xml.parsers.expat.xmlparser.ElementDeclHandler` set to a custom +element declaration handler. Patch by Sebastian Pipping. From caae331a8af920cdd7394d7bb72b6d645f07e49b Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 25 Oct 2025 21:27:55 +0200 Subject: [PATCH 2/7] pyexpat: Fix mistaken bypass of call to XML_FreeContentModel --- Modules/pyexpat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 9c252be9cf22b2..e9255038eee5b5 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -642,7 +642,7 @@ my_ElementDeclHandler(void *userData, PyObject *modelobj, *nameobj; if (PyErr_Occurred()) - return; + goto finally; if (flush_character_buffer(self) < 0) goto finally; From b1ddc3b50cf1409608326afb186bddccbaa0971d Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 25 Oct 2025 21:37:42 +0200 Subject: [PATCH 3/7] pyexpat: Add test for pytest memleak issue 140593 --- Lib/test/test_pyexpat.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b4ce72dfd51774..7ff5bef05d2490 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -718,6 +718,24 @@ def test_expaterror(self): self.assertEqual(e.code, errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) +class ElementDeclHandlerCleanUpLeakTest(unittest.TestCase): + + def test_trigger_leak(self): + # Unfixed, this test would leak 32 to 56 bytes of memory + # https://github.com/python/cpython/issues/140593 + data = textwrap.dedent('''\ + + ]> + + ''').encode('UTF-8') + + parser = expat.ParserCreate() + parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float + parser.ElementDeclHandler = lambda _1, _2: None + with self.assertRaises(TypeError): + parser.Parse(data, True) + class ForeignDTDTests(unittest.TestCase): """ From fcf2f31431d8a90b1c63157299e03f8ef0046d2c Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 26 Oct 2025 14:09:15 +0100 Subject: [PATCH 4/7] Move and rename the test class + add full stop to comment --- Lib/test/test_pyexpat.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 7ff5bef05d2490..adcce8ed6bf0e0 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -684,6 +684,23 @@ def test_change_size_2(self): parser.Parse(xml2, True) self.assertEqual(self.n, 4) +class ElementDeclHandlerTest(unittest.TestCase): + def test_trigger_leak(self): + # Unfixed, this test would leak 32 to 56 bytes of memory. + # https://github.com/python/cpython/issues/140593 + data = textwrap.dedent('''\ + + ]> + + ''').encode('UTF-8') + + parser = expat.ParserCreate() + parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float + parser.ElementDeclHandler = lambda _1, _2: None + with self.assertRaises(TypeError): + parser.Parse(data, True) + class MalformedInputTest(unittest.TestCase): def test1(self): xml = b"\0\r\n" @@ -718,24 +735,6 @@ def test_expaterror(self): self.assertEqual(e.code, errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) -class ElementDeclHandlerCleanUpLeakTest(unittest.TestCase): - - def test_trigger_leak(self): - # Unfixed, this test would leak 32 to 56 bytes of memory - # https://github.com/python/cpython/issues/140593 - data = textwrap.dedent('''\ - - ]> - - ''').encode('UTF-8') - - parser = expat.ParserCreate() - parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float - parser.ElementDeclHandler = lambda _1, _2: None - with self.assertRaises(TypeError): - parser.Parse(data, True) - class ForeignDTDTests(unittest.TestCase): """ From afb6b42459039455edd51055fa4bd03ba7e25b1f Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 26 Oct 2025 14:11:39 +0100 Subject: [PATCH 5/7] Refactor self.assertRaises call I'm not in support of this - readability goes down - but it was requested. --- Lib/test/test_pyexpat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index adcce8ed6bf0e0..355d7be06bcc6d 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -698,8 +698,7 @@ def test_trigger_leak(self): parser = expat.ParserCreate() parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float parser.ElementDeclHandler = lambda _1, _2: None - with self.assertRaises(TypeError): - parser.Parse(data, True) + self.assertRaises(TypeError, parser.Parse, data, True) class MalformedInputTest(unittest.TestCase): def test1(self): From b6e9fab5746129c329e97184409e232231ffc2a7 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 26 Oct 2025 14:12:58 +0100 Subject: [PATCH 6/7] Add "See " and another full stop for the new sentence. As requested. --- Lib/test/test_pyexpat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 355d7be06bcc6d..98ca564135c4e9 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -687,7 +687,7 @@ def test_change_size_2(self): class ElementDeclHandlerTest(unittest.TestCase): def test_trigger_leak(self): # Unfixed, this test would leak 32 to 56 bytes of memory. - # https://github.com/python/cpython/issues/140593 + # See https://github.com/python/cpython/issues/140593. data = textwrap.dedent('''\ From a56f66885e3533002efe11fd733102fc6c12ec29 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 26 Oct 2025 14:17:35 +0100 Subject: [PATCH 7/7] Improve comment on the nature of the leak --- Lib/test/test_pyexpat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 98ca564135c4e9..74a75458289b4d 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -686,7 +686,8 @@ def test_change_size_2(self): class ElementDeclHandlerTest(unittest.TestCase): def test_trigger_leak(self): - # Unfixed, this test would leak 32 to 56 bytes of memory. + # Unfixed, this test would leak the memory of the so-called + # "content model" in function ``my_ElementDeclHandler`` of pyexpat. # See https://github.com/python/cpython/issues/140593. data = textwrap.dedent('''\