Skip to content

Commit

Permalink
Let ElementTreeProducer use the available namespaces
Browse files Browse the repository at this point in the history
ElementTreeProducer would ignore the namespace prefixes that were available in the element tree, and always generate new prefixes like ns00, ns01 etc.
  • Loading branch information
regebro committed Oct 17, 2018
1 parent 0cf95e6 commit 18d7a6d
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ Bugs fixed
and the parser participates in a reference cycle.
Original patch by Julien Greard.

* ElementTreeProducer no longer ignores the namespace prefixes that were available
in the element tree, and now only generates nsXX prefixes if undefined prefixes
are encountered.


4.2.1 (2018-03-21)
==================
Expand Down
29 changes: 21 additions & 8 deletions src/lxml/sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,19 @@ def _recursive_saxify(self, element, prefixes):
content_handler.characters(element.tail)
return

# Get a new copy in this call, so changes doesn't propagate upwards
prefixes = prefixes.copy()
new_prefixes = []
for prefix, ns_uri in element.nsmap.items():
if prefixes.get(prefix) != ns_uri:
# New or updated namespace
new_prefixes.append( (prefix, ns_uri) )
if ns_uri in prefixes and prefixes[ns_uri] is None:
# This URI is the default URI,
# don't use the assigned prefix
continue
prefixes[ns_uri] = prefix

build_qname = self._build_qname
attribs = element.items()
if attribs:
Expand All @@ -210,13 +222,13 @@ def _recursive_saxify(self, element, prefixes):
attr_ns_tuple = _getNsTag(attr_ns_name)
attr_values[attr_ns_tuple] = value
attr_qnames[attr_ns_tuple] = build_qname(
attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
attr_ns_tuple[0], attr_ns_tuple[1], prefixes)
sax_attributes = self._attr_class(attr_values, attr_qnames)
else:
sax_attributes = self._empty_attributes

ns_uri, local_name = _getNsTag(tag)
qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
qname = build_qname(ns_uri, local_name, prefixes)

for prefix, uri in new_prefixes:
content_handler.startPrefixMapping(prefix, uri)
Expand All @@ -232,14 +244,15 @@ def _recursive_saxify(self, element, prefixes):
if element.tail:
content_handler.characters(element.tail)

def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
def _build_qname(self, ns_uri, local_name, prefixes):
if ns_uri is None:
return local_name
try:
prefix = prefixes[ns_uri]
except KeyError:
prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
new_prefixes.append( (prefix, ns_uri) )

prefix = prefixes[ns_uri]

if prefix is None:
# Default namespace
return local_name
return prefix + ':' + local_name

def saxify(element_or_tree, content_handler):
Expand Down
31 changes: 31 additions & 0 deletions src/lxml/tests/test_sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,37 @@ def test_element_sax_ns(self):
self.assertEqual(0,
len(root))

def test_element_sax_ns_prefix(self):
# The name of the prefix should be preserved
tree = self.parse('<a:a xmlns:a="blaA"><b/><c:c xmlns:c="blaC">'
'<d/></c:c></a:a>')
a = tree.getroot()

self.assertEqual(b'<a:a xmlns:a="blaA"><b/><c:c xmlns:c="blaC">'
b'<d/></c:c></a:a>',
self._saxify_serialize(a))

def test_element_sax_default_ns_prefix(self):
# Default prefixes should also not get a generated prefix
tree = self.parse('<a xmlns="blaA"><b/><c:c xmlns:c="blaC">'
'<d/></c:c></a>')
a = tree.getroot()

self.assertEqual(b'<a xmlns="blaA"><b/><c:c xmlns:c="blaC">'
b'<d/></c:c></a>',
self._saxify_serialize(a))

def test_element_sax_unknown_ns_prefix(self):
# Make an element with an unregister prefix
tree = self.parse('<a xmlns="blaA"><b/><c:c xmlns:c="blaC">'
'<d/></c:c></a>')
a = tree.getroot()
a.append(a.makeelement('{blaE}e'))

self.assertEqual(b'<a xmlns="blaA"><b/><c:c xmlns:c="blaC">'
b'<d/></c:c><ns0:e xmlns:ns0="blaE"/></a>',
self._saxify_serialize(a))

def test_etree_sax_handler_default_ns(self):
handler = sax.ElementTreeContentHandler()
handler.startDocument()
Expand Down

0 comments on commit 18d7a6d

Please sign in to comment.