Skip to content
Permalink
Browse files

bpo-36676: Namespace prefix aware parsing support for the ET.XMLParse…

…r target (GH-12885)

* bpo-36676: Implement namespace prefix aware parsing support for the XMLParser target in ElementTree.
  • Loading branch information
scoder committed May 1, 2019
1 parent 43851a2 commit dde3eebdaa8d2c51971ca704d53af7cbcda8bb34
@@ -1086,7 +1086,7 @@ TreeBuilder Objects


In addition, a custom :class:`TreeBuilder` object can provide the
following method:
following methods:

.. method:: doctype(name, pubid, system)

@@ -1096,6 +1096,23 @@ TreeBuilder Objects

.. versionadded:: 3.2

.. method:: start_ns(prefix, uri)

Is called whenever the parser encounters a new namespace declaration,
before the ``start()`` callback for the opening element that defines it.
*prefix* is ``''`` for the default namespace and the declared
namespace prefix name otherwise. *uri* is the namespace URI.

.. versionadded:: 3.8

.. method:: end_ns(prefix)

Is called after the ``end()`` callback of an element that declared
a namespace prefix mapping, with the name of the *prefix* that went
out of scope.

.. versionadded:: 3.8


.. _elementtree-xmlparser-objects:

@@ -1131,7 +1148,8 @@ XMLParser Objects

:meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method
for each opening tag, its ``end(tag)`` method for each closing tag, and data
is processed by method ``data(data)``. :meth:`XMLParser.close` calls
is processed by method ``data(data)``. For further supported callback
methods, see the :class:`TreeBuilder` class. :meth:`XMLParser.close` calls
*target*\'s method ``close()``. :class:`XMLParser` can be used not only for
building a tree structure. This is an example of counting the maximum depth
of an XML file::
@@ -14,12 +14,13 @@
import operator
import pickle
import sys
import textwrap
import types
import unittest
import warnings
import weakref

from itertools import product
from itertools import product, islice
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr

@@ -694,12 +695,17 @@ def pi(self, target, data):
self.append(("pi", target, data))
def comment(self, data):
self.append(("comment", data))
def start_ns(self, prefix, uri):
self.append(("start-ns", prefix, uri))
def end_ns(self, prefix):
self.append(("end-ns", prefix))
builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(data)
self.assertEqual(builder, [
('pi', 'pi', 'data'),
('comment', ' comment '),
('start-ns', '', 'namespace'),
('start', '{namespace}root'),
('start', '{namespace}element'),
('end', '{namespace}element'),
@@ -708,8 +714,30 @@ def comment(self, data):
('start', '{namespace}empty-element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
('end-ns', ''),
])

def test_custom_builder_only_end_ns(self):
class Builder(list):
def end_ns(self, prefix):
self.append(("end-ns", prefix))

builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(textwrap.dedent("""\
<?pi data?>
<!-- comment -->
<root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
<a:element key='value'>text</a:element>
<p:element>text</p:element>tail
<empty-element/>
</root>
"""))
self.assertEqual(builder, [
('end-ns', 'a'),
('end-ns', 'p'),
('end-ns', ''),
])

# Element.getchildren() and ElementTree.getiterator() are deprecated.
@checkwarnings(("This method will be removed in future versions. "
@@ -1194,14 +1222,19 @@ def _feed(self, parser, data, chunk_size=None):
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])

def assert_events(self, parser, expected):
def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
[(event, (elem.tag, elem.text))
for event, elem in parser.read_events()],
for event, elem in islice(parser.read_events(), max_events)],
expected)

def assert_event_tags(self, parser, expected):
events = parser.read_events()
def assert_event_tuples(self, parser, expected, max_events=None):
self.assertEqual(
list(islice(parser.read_events(), max_events)),
expected)

def assert_event_tags(self, parser, expected, max_events=None):
events = islice(parser.read_events(), max_events)
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)

@@ -1276,6 +1309,56 @@ def test_ns_events(self):
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
self.assertIsNone(parser.close())

def test_ns_events_start(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
self.assert_event_tuples(parser, [
('start-ns', ('', 'abc')),
('start-ns', ('p', 'xyz')),
], max_events=2)
self.assert_event_tags(parser, [
('start', '{abc}tag'),
], max_events=1)

self._feed(parser, "<child />\n")
self.assert_event_tags(parser, [
('start', '{abc}child'),
('end', '{abc}child'),
])

self._feed(parser, "</tag>\n")
parser.close()
self.assert_event_tags(parser, [
('end', '{abc}tag'),
])

def test_ns_events_start_end(self):
parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
self.assert_event_tuples(parser, [
('start-ns', ('', 'abc')),
('start-ns', ('p', 'xyz')),
], max_events=2)
self.assert_event_tags(parser, [
('start', '{abc}tag'),
], max_events=1)

self._feed(parser, "<child />\n")
self.assert_event_tags(parser, [
('start', '{abc}child'),
('end', '{abc}child'),
])

self._feed(parser, "</tag>\n")
parser.close()
self.assert_event_tags(parser, [
('end', '{abc}tag'),
], max_events=1)
self.assert_event_tuples(parser, [
('end-ns', None),
('end-ns', None),
])

def test_events(self):
parser = ET.XMLPullParser(events=())
self._feed(parser, "<root/>\n")
@@ -1518,6 +1518,10 @@ def __init__(self, *, target=None, encoding=None):
parser.StartElementHandler = self._start
if hasattr(target, 'end'):
parser.EndElementHandler = self._end
if hasattr(target, 'start_ns'):
parser.StartNamespaceDeclHandler = self._start_ns
if hasattr(target, 'end_ns'):
parser.EndNamespaceDeclHandler = self._end_ns
if hasattr(target, 'data'):
parser.CharacterDataHandler = target.data
# miscellaneous callbacks
@@ -1559,12 +1563,24 @@ def handler(tag, event=event_name, append=append,
append((event, end(tag)))
parser.EndElementHandler = handler
elif event_name == "start-ns":
def handler(prefix, uri, event=event_name, append=append):
append((event, (prefix or "", uri or "")))
# TreeBuilder does not implement .start_ns()
if hasattr(self.target, "start_ns"):
def handler(prefix, uri, event=event_name, append=append,
start_ns=self._start_ns):
append((event, start_ns(prefix, uri)))
else:
def handler(prefix, uri, event=event_name, append=append):
append((event, (prefix or '', uri or '')))
parser.StartNamespaceDeclHandler = handler
elif event_name == "end-ns":
def handler(prefix, event=event_name, append=append):
append((event, None))
# TreeBuilder does not implement .end_ns()
if hasattr(self.target, "end_ns"):
def handler(prefix, event=event_name, append=append,
end_ns=self._end_ns):
append((event, end_ns(prefix)))
else:
def handler(prefix, event=event_name, append=append):
append((event, None))
parser.EndNamespaceDeclHandler = handler
elif event_name == 'comment':
def handler(text, event=event_name, append=append, self=self):
@@ -1595,6 +1611,12 @@ def _fixname(self, key):
self._names[key] = name
return name

def _start_ns(self, prefix, uri):
return self.target.start_ns(prefix or '', uri or '')

def _end_ns(self, prefix):
return self.target.end_ns(prefix or '')

def _start(self, tag, attr_list):
# Handler for expat's StartElementHandler. Since ordered_attributes
# is set, the attributes are reported as a list of alternating
@@ -0,0 +1,3 @@
The XMLParser() in xml.etree.ElementTree provides namespace prefix context to the
parser target if it defines the callback methods "start_ns()" and/or "end_ns()".
Patch by Stefan Behnel.

0 comments on commit dde3eeb

Please sign in to comment.
You can’t perform that action at this time.