Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,17 @@ def test_unknown_event(self):
</document>
""".format(html.escape(SIMPLE_XMLFILE, True))

XINCLUDE["include_c1_repeated.xml"] = """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>The following is the source code of Recursive1.xml:</p>
<xi:include href="C1.xml"/>
<xi:include href="C1.xml"/>
<xi:include href="C1.xml"/>
<xi:include href="C1.xml"/>
</document>
"""

#
# badly formatted xi:include tags

Expand All @@ -1281,6 +1292,31 @@ def test_unknown_event(self):
</div>
"""

XINCLUDE["Recursive1.xml"] = """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>The following is the source code of Recursive2.xml:</p>
<xi:include href="Recursive2.xml"/>
</document>
"""

XINCLUDE["Recursive2.xml"] = """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>The following is the source code of Recursive3.xml:</p>
<xi:include href="Recursive3.xml"/>
</document>
"""

XINCLUDE["Recursive3.xml"] = """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>The following is the source code of Recursive1.xml:</p>
<xi:include href="Recursive1.xml"/>
</document>
"""


class XIncludeTest(unittest.TestCase):

def xinclude_loader(self, href, parse="xml", encoding=None):
Expand Down Expand Up @@ -1382,6 +1418,13 @@ def test_xinclude(self):
' </ns0:include>\n'
'</div>') # C5

def test_xinclude_repeated(self):
from xml.etree import ElementInclude

document = self.xinclude_loader("include_c1_repeated.xml")
ElementInclude.include(document, self.xinclude_loader)
self.assertEqual(1+4*2, len(document.findall(".//p")))

def test_xinclude_failures(self):
from xml.etree import ElementInclude

Expand Down Expand Up @@ -1414,6 +1457,45 @@ def test_xinclude_failures(self):
"xi:fallback tag must be child of xi:include "
"('{http://www.w3.org/2001/XInclude}fallback')")

# Test infinitely recursive includes.
document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader)
self.assertEqual(str(cm.exception),
"recursive include of Recursive2.xml")

# Test 'max_depth' limitation.
document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader, max_depth=None)
self.assertEqual(str(cm.exception),
"recursive include of Recursive2.xml")

document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader, max_depth=0)
self.assertEqual(str(cm.exception),
"maximum xinclude depth reached when including file Recursive2.xml")

document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader, max_depth=1)
self.assertEqual(str(cm.exception),
"maximum xinclude depth reached when including file Recursive3.xml")

document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader, max_depth=2)
self.assertEqual(str(cm.exception),
"maximum xinclude depth reached when including file Recursive1.xml")

document = self.xinclude_loader("Recursive1.xml")
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
ElementInclude.include(document, self.xinclude_loader, max_depth=3)
self.assertEqual(str(cm.exception),
"recursive include of Recursive2.xml")


# --------------------------------------------------------------------
# reported bugs

Expand Down
56 changes: 49 additions & 7 deletions Lib/xml/etree/ElementInclude.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,28 @@

import copy
from . import ElementTree
from urllib.parse import urljoin

XINCLUDE = "{http://www.w3.org/2001/XInclude}"

XINCLUDE_INCLUDE = XINCLUDE + "include"
XINCLUDE_FALLBACK = XINCLUDE + "fallback"

# For security reasons, the inclusion depth is limited to this read-only value by default.
DEFAULT_MAX_INCLUSION_DEPTH = 6


##
# Fatal include error.

class FatalIncludeError(SyntaxError):
pass


class LimitedRecursiveIncludeError(FatalIncludeError):
pass


##
# Default loader. This loader reads an included resource from disk.
#
Expand Down Expand Up @@ -92,28 +102,58 @@ def default_loader(href, parse, encoding=None):
# @param loader Optional resource loader. If omitted, it defaults
# to {@link default_loader}. If given, it should be a callable
# that implements the same interface as <b>default_loader</b>.
# @param base_url The base URL of the original file, to resolve
# relative include file references.
# @param max_depth The maximum number of recursive inclusions.
# Limited to reduce the risk of malicious content explosion.
# Pass a negative value to disable the limitation.
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
# @throws FatalIncludeError If the function fails to include a given
# resource, or if the tree contains malformed XInclude elements.
# @throws OSError If the function fails to load a given resource.
# @throws IOError If the function fails to load a given resource.
# @returns the node or its replacement if it was an XInclude node

def include(elem, loader=None):
def include(elem, loader=None, base_url=None,
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
if max_depth is None:
max_depth = -1
elif max_depth < 0:
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)

if hasattr(elem, 'getroot'):
elem = elem.getroot()
if loader is None:
loader = default_loader

_include(elem, loader, base_url, max_depth, set())


def _include(elem, loader, base_url, max_depth, _parent_hrefs):
# look for xinclude elements
i = 0
while i < len(elem):
e = elem[i]
if e.tag == XINCLUDE_INCLUDE:
# process xinclude directive
href = e.get("href")
if base_url:
href = urljoin(base_url, href)
parse = e.get("parse", "xml")
if parse == "xml":
if href in _parent_hrefs:
raise FatalIncludeError("recursive include of %s" % href)
if max_depth == 0:
raise LimitedRecursiveIncludeError(
"maximum xinclude depth reached when including file %s" % href)
_parent_hrefs.add(href)
node = loader(href, parse)
if node is None:
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
node = copy.copy(node)
node = copy.copy(node) # FIXME: this makes little sense with recursive includes
_include(node, loader, href, max_depth - 1, _parent_hrefs)
_parent_hrefs.remove(href)
if e.tail:
node.tail = (node.tail or "") + e.tail
elem[i] = node
Expand All @@ -123,11 +163,13 @@ def include(elem, loader=None):
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
if e.tail:
text += e.tail
if i:
node = elem[i-1]
node.tail = (node.tail or "") + text + (e.tail or "")
node.tail = (node.tail or "") + text
else:
elem.text = (elem.text or "") + text + (e.tail or "")
elem.text = (elem.text or "") + text
del elem[i]
continue
else:
Expand All @@ -139,5 +181,5 @@ def include(elem, loader=None):
"xi:fallback tag must be child of xi:include (%r)" % e.tag
)
else:
include(e, loader)
i = i + 1
_include(e, loader, base_url, max_depth, _parent_hrefs)
i += 1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ElementTree supports recursive XInclude processing. Patch by Stefan Behnel.