Skip to content

Commit

Permalink
bpo-14465: Add an indent() function to xml.etree.ElementTree to prett…
Browse files Browse the repository at this point in the history
…y-print XML trees (GH-15200)
  • Loading branch information
scoder committed Aug 23, 2019
1 parent 81446fd commit b5d3cee
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 1 deletion.
12 changes: 12 additions & 0 deletions Doc/library/xml.etree.elementtree.rst
Expand Up @@ -572,6 +572,18 @@ Functions
.. versionadded:: 3.2


.. function:: indent(tree, space=" ", level=0)

Appends whitespace to the subtree to indent the tree visually.
This can be used to generate pretty-printed XML output.
*tree* can be an Element or ElementTree. *space* is the whitespace
string that will be inserted for each indentation level, two space
characters by default. For indenting partial subtrees inside of an
already indented tree, pass the initial indentation level as *level*.

.. versionadded:: 3.9


.. function:: iselement(element)

Checks if an object appears to be a valid element object. *element* is an
Expand Down
117 changes: 117 additions & 0 deletions Lib/test/test_xml_etree.py
Expand Up @@ -788,6 +788,123 @@ def test_writestring(self):
elem = ET.fromstring("<html><body>text</body></html>")
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')

def test_indent(self):
elem = ET.XML("<root></root>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<root />')

elem = ET.XML("<html><body>text</body></html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')

elem = ET.XML("<html> <body>text</body> </html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')

elem = ET.XML("<html><body>text</body>tail</html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')

elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>par</p>\n'
b' <p>text</p>\n'
b' <p>\n'
b' <br />\n'
b' </p>\n'
b' </body>\n'
b'</html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b'</html>'
)

def test_indent_space(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='\t')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'\t<body>\n'
b'\t\t<p>pre<br />post</p>\n'
b'\t\t<p>text</p>\n'
b'\t</body>\n'
b'</html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'<body>\n'
b'<p>pre<br />post</p>\n'
b'<p>text</p>\n'
b'</body>\n'
b'</html>'
)

def test_indent_space_caching(self):
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
ET.indent(elem)
self.assertEqual(
{el.tail for el in elem.iter()},
{None, "\n", "\n ", "\n "}
)
self.assertEqual(
{el.text for el in elem.iter()},
{None, "\n ", "\n ", "\n ", "par", "text"}
)
self.assertEqual(
len({el.tail for el in elem.iter()}),
len({id(el.tail) for el in elem.iter()}),
)

def test_indent_level(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
with self.assertRaises(ValueError):
ET.indent(elem, level=-1)
self.assertEqual(
ET.tostring(elem),
b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
)

ET.indent(elem, level=2)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)

elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, level=1, space=' ')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)

def test_tostring_default_namespace(self):
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
self.assertEqual(
Expand Down
53 changes: 52 additions & 1 deletion Lib/xml/etree/ElementTree.py
Expand Up @@ -76,7 +76,7 @@
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
"iselement", "iterparse",
"indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
Expand Down Expand Up @@ -1185,6 +1185,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")


def indent(tree, space=" ", level=0):
"""Indent an XML document by inserting newlines and indentation space
after elements.
*tree* is the ElementTree or Element to modify. The (root) element
itself will not be changed, but the tail text of all elements in its
subtree will be adapted.
*space* is the whitespace to insert for each indentation level, two
space characters by default.
*level* is the initial indentation level. Setting this to a higher
value than 0 can be used for indenting subtrees that are more deeply
nested inside of a document.
"""
if isinstance(tree, ElementTree):
tree = tree.getroot()
if level < 0:
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
if not len(tree):
return

# Reduce the memory consumption by reusing indentation strings.
indentations = ["\n" + level * space]

def _indent_children(elem, level):
# Start a new indentation level for the first child.
child_level = level + 1
try:
child_indentation = indentations[child_level]
except IndexError:
child_indentation = indentations[level] + space
indentations.append(child_indentation)

if not elem.text or not elem.text.strip():
elem.text = child_indentation

for child in elem:
if len(child):
_indent_children(child, child_level)
if not child.tail or not child.tail.strip():
child.tail = child_indentation

# Dedent after the last child by overwriting the previous indentation.
if not child.tail.strip():
child.tail = indentations[level]

_indent_children(tree, 0)


# --------------------------------------------------------------------
# parsing

Expand Down
@@ -0,0 +1,2 @@
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
Contributed by Stefan Behnel.

0 comments on commit b5d3cee

Please sign in to comment.