From 75c5addf4140bfca46ee5d992589ca4cd0a3d677 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 13 Nov 2025 13:37:01 +0200 Subject: [PATCH] gh-140601: Refactor ElementTree.iterparse() tests (GH-141499) Split existing tests on smaller methods and move them to separate class. Rename variable "content" to "it". Use BytesIO instead of StringIO. Add few more tests. (cherry picked from commit 2fbd39666663cb5ca1c0e3021ce2e7bc72331020) Co-authored-by: Serhiy Storchaka --- Lib/test/test_xml_etree.py | 430 ++++++++++++++++++++----------------- 1 file changed, 228 insertions(+), 202 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index f65baa0cfae2ad..25c084c8b9c9eb 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -574,208 +574,6 @@ def test_parseliteral(self): self.assertEqual(len(ids), 1) self.assertEqual(ids["body"].tag, 'body') - def test_iterparse(self): - # Test iterparse interface. - - iterparse = ET.iterparse - - context = iterparse(SIMPLE_XMLFILE) - self.assertIsNone(context.root) - action, elem = next(context) - self.assertIsNone(context.root) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - context = iterparse(SIMPLE_NS_XMLFILE) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', '{namespace}element'), - ('end', '{namespace}element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ]) - - with open(SIMPLE_XMLFILE, 'rb') as source: - context = iterparse(source) - action, elem = next(context) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - events = () - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = () - context = iterparse(SIMPLE_XMLFILE, events=events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = ("start", "end") - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('start', 'root'), - ('start', 'element'), - ('end', 'element'), - ('start', 'element'), - ('end', 'element'), - ('start', 'empty-element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - - events = ("start", "end", "start-ns", "end-ns") - context = iterparse(SIMPLE_NS_XMLFILE, events) - self.assertEqual([(action, elem.tag) if action in ("start", "end") - else (action, elem) - for action, elem in context], [ - ('start-ns', ('', 'namespace')), - ('start', '{namespace}root'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}empty-element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ('end-ns', None), - ]) - - events = ('start-ns', 'end-ns') - context = iterparse(io.StringIO(r""), events) - res = [action for action, elem in context] - self.assertEqual(res, ['start-ns', 'end-ns']) - - events = ("start", "end", "bogus") - with open(SIMPLE_XMLFILE, "rb") as f: - with self.assertRaises(ValueError) as cm: - iterparse(f, events) - self.assertFalse(f.closed) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ValueError) as cm: - iterparse(SIMPLE_XMLFILE, events) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - del cm - - source = io.BytesIO( - b"\n" - b"text\n") - events = ("start-ns",) - context = iterparse(source, events) - self.assertEqual([(action, elem) for action, elem in context], [ - ('start-ns', ('', 'http://\xe9ffbot.org/ns')), - ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), - ]) - - source = io.StringIO("junk") - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - - self.addCleanup(os_helper.unlink, TESTFN) - with open(TESTFN, "wb") as f: - f.write(b"junk") - it = iterparse(TESTFN) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - del cm, it - - # Not exhausting the iterator still closes the resource (bpo-43292) - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - it.close() - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - it.close() - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with self.assertRaises(FileNotFoundError): - iterparse("nonexistent") - - def test_iterparse_close(self): - iterparse = ET.iterparse - - it = iterparse(SIMPLE_XMLFILE) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - list(it) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - list(it) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - def test_writefile(self): elem = ET.Element("tag") elem.text = "text" @@ -1499,6 +1297,234 @@ def test_attlist_default(self): {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) +class IterparseTest(unittest.TestCase): + # Test iterparse interface. + + def test_basic(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + self.assertIsNone(it.root) + action, elem = next(it) + self.assertIsNone(it.root) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + it.close() + + it = iterparse(SIMPLE_NS_XMLFILE) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', '{namespace}element'), + ('end', '{namespace}element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ]) + it.close() + + def test_external_file(self): + with open(SIMPLE_XMLFILE, 'rb') as source: + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + + def test_events(self): + iterparse = ET.iterparse + + events = () + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = () + it = iterparse(SIMPLE_XMLFILE, events=events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = ("start", "end") + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('start', 'root'), + ('start', 'element'), + ('end', 'element'), + ('start', 'element'), + ('end', 'element'), + ('start', 'empty-element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + it.close() + + def test_namespace_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "start-ns", "end-ns") + it = iterparse(SIMPLE_NS_XMLFILE, events) + self.assertEqual([(action, elem.tag) if action in ("start", "end") + else (action, elem) + for action, elem in it], [ + ('start-ns', ('', 'namespace')), + ('start', '{namespace}root'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}empty-element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ('end-ns', None), + ]) + it.close() + + events = ('start-ns', 'end-ns') + it = iterparse(io.BytesIO(br""), events) + res = [action for action, elem in it] + self.assertEqual(res, ['start-ns', 'end-ns']) + it.close() + + def test_unknown_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "bogus") + with open(SIMPLE_XMLFILE, "rb") as f: + with self.assertRaises(ValueError) as cm: + iterparse(f, events) + self.assertFalse(f.closed) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ValueError) as cm: + iterparse(SIMPLE_XMLFILE, events) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + del cm + gc_collect() + + def test_non_utf8(self): + source = io.BytesIO( + b"\n" + b"text\n") + events = ("start-ns",) + it = ET.iterparse(source, events) + self.assertEqual([(action, elem) for action, elem in it], [ + ('start-ns', ('', 'http://\xe9ffbot.org/ns')), + ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), + ]) + + def test_parsing_error(self): + source = io.BytesIO(b"junk") + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + + def test_nonexistent_file(self): + with self.assertRaises(FileNotFoundError): + ET.iterparse("nonexistent") + + def test_resource_warnings_not_exhausted(self): + # Not exhausting the iterator still closes the underlying file (bpo-43292) + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + del it + gc_collect() + + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() + + def test_resource_warnings_failed_iteration(self): + self.addCleanup(os_helper.unlink, TESTFN) + with open(TESTFN, "wb") as f: + f.write(b"junk") + + it = ET.iterparse(TESTFN) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + del cm, it + gc_collect() + + def test_resource_warnings_exhausted(self): + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + list(it) + del it + gc_collect() + + def test_close_not_exhausted(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + def test_close_exhausted(self): + iterparse = ET.iterparse + it = iterparse(SIMPLE_XMLFILE) + list(it) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + list(it) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + class XMLPullParserTest(unittest.TestCase): def _feed(self, parser, data, chunk_size=None, flush=False):