From 4a2d1d8ab1d628de599a6d05e55b712975dca331 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 2 Jul 2021 13:19:27 +0300 Subject: [PATCH] gh-69893: Add the close() method for xml.etree.ElementTree.iterparse() iterator --- Doc/library/xml.etree.elementtree.rst | 5 + Doc/whatsnew/3.13.rst | 8 ++ Lib/test/test_xml_etree.py | 91 ++++++++++++++++++- Lib/xml/etree/ElementTree.py | 8 +- ...4-01-24-17-25-18.gh-issue-69893.PQq5fR.rst | 2 + 5 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-24-17-25-18.gh-issue-69893.PQq5fR.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index fe92400fb08dfd..b2f6ce15ccd453 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -625,6 +625,8 @@ Functions target. Returns an :term:`iterator` providing ``(event, elem)`` pairs; it has a ``root`` attribute that references the root element of the resulting XML tree once *source* is fully read. + The iterator has the :meth:`!close` method that closes the internal + file object if *source* is a filename. Note that while :func:`iterparse` builds the tree incrementally, it issues blocking reads on *source* (or the file it names). As such, it's unsuitable @@ -647,6 +649,9 @@ Functions .. versionchanged:: 3.8 The ``comment`` and ``pi`` events were added. + .. versionchanged:: 3.13 + Added the :meth:`!close` method. + .. function:: parse(source, parser=None) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40f0cd37fe9318..0710335f12724c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -456,6 +456,14 @@ warnings warning may also be emitted when a decorated function or class is used at runtime. See :pep:`702`. (Contributed by Jelle Zijlstra in :gh:`104003`.) +xml.etree.ElementTree +--------------------- + +* Add the :meth:`!close` method for the iterator returned by + :func:`~xml.etree.ElementTree.iterparse` for explicit cleaning up. + (Contributed by Serhiy Storchaka in :gh:`69893`.) + + Optimizations ============= diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b9e7937b0bbc00..44131230be26a8 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -553,6 +553,17 @@ def test_iterparse(self): ('end', '{namespace}root'), ]) + with open(SIMPLE_XMLFILE, 'rb') as source: + context = iterparse(source) + action, elem = next(context) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in context], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(context.root.tag, 'root') + events = () context = iterparse(SIMPLE_XMLFILE, events) self.assertEqual([(action, elem.tag) for action, elem in context], []) @@ -608,6 +619,7 @@ def test_iterparse(self): iterparse(SIMPLE_XMLFILE, events) self.assertEqual(str(cm.exception), "unknown event 'bogus'") del cm + gc_collect() source = io.BytesIO( b"\n" @@ -641,15 +653,89 @@ def test_iterparse(self): self.assertEqual(str(cm.exception), 'junk after document element: line 1, column 12') del cm, it + gc_collect() # Not exhausting the iterator still closes the resource (bpo-43292) with warnings_helper.check_no_resource_warning(self): - it = iterparse(TESTFN) + it = iterparse(SIMPLE_XMLFILE) del it + gc_collect() + + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + it.close() + del it + gc_collect() + + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() + + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + it.close() + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() with self.assertRaises(FileNotFoundError): iterparse("nonexistent") + def test_iterparse_close(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + list(it) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + list(it) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + def test_writefile(self): elem = ET.Element("tag") elem.text = "text" @@ -3042,8 +3128,7 @@ def test_basic(self): # With an explicit parser too (issue #9708) sourcefile = serialize(doc, to_string=False) parser = ET.XMLParser(target=ET.TreeBuilder()) - self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], - 'end') + self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 'end') tree = ET.ElementTree(None) self.assertRaises(AttributeError, tree.iter) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ae6575028be11c..b4fdb95299dc0f 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1248,10 +1248,16 @@ def iterator(source): if close_source: source.close() + gen = iterator(source) class IterParseIterator(collections.abc.Iterator): - __next__ = iterator(source).__next__ + __next__ = gen.__next__ + def close(self): + if close_source: + source.close() + gen.close() def __del__(self): + # TODO: Emit a ResourceWarning if it was not explicitly closed. if close_source: source.close() diff --git a/Misc/NEWS.d/next/Library/2024-01-24-17-25-18.gh-issue-69893.PQq5fR.rst b/Misc/NEWS.d/next/Library/2024-01-24-17-25-18.gh-issue-69893.PQq5fR.rst new file mode 100644 index 00000000000000..1ebf434c33187b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-24-17-25-18.gh-issue-69893.PQq5fR.rst @@ -0,0 +1,2 @@ +Add the :meth:`!close` method for the iterator returned by +:func:`xml.etree.ElementTree.iterparse`.