Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not blindly copy all of the namespaces when tostring():ing a subtree. #184

Merged
merged 9 commits into from Aug 20, 2016
47 changes: 39 additions & 8 deletions src/lxml/proxy.pxi
Expand Up @@ -81,10 +81,11 @@ cdef inline int _unregisterProxy(_Element proxy) except -1:
# temporarily make a node the root node of its document

cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
return _plainFakeRootDoc(c_base_doc, c_node, 1)
return _plainFakeRootDoc(c_base_doc=c_base_doc, c_node=c_node,
with_siblings=1, used_only=0)

cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
bint with_siblings) except NULL:
bint with_siblings, bint used_only=1) except NULL:
# build a temporary document that has the given node as root node
# note that copy and original must not be modified during its lifetime!!
# always call _destroyFakeDoc() after use!
Expand All @@ -101,7 +102,7 @@ cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
c_doc = _copyDoc(c_base_doc, 0) # non recursive!
c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
tree.xmlDocSetRootElement(c_doc, c_new_root)
_copyParentNamespaces(c_node, c_new_root)
_copyParentNamespaces(c_node, c_new_root, used_only)

c_new_root.children = c_node.children
c_new_root.last = c_node.last
Expand Down Expand Up @@ -223,20 +224,50 @@ cdef int canDeallocateChildNodes(xmlNode* c_parent):
################################################################################
# fix _Document references and namespaces when a node changes documents

cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil:
u"""Copy the namespaces of all ancestors of c_from_node to c_to_node.
cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node, bint used_only=1) nogil:
u"""Copy the namespaces of all ancestors of c_from_node to c_to_node that are used by c_to_node.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... used by c_from_node, it seems.

"""
cdef xmlNode* c_parent
cdef xmlNode* c_node
cdef xmlNs* c_ns
cdef xmlNs* c_new_ns
cdef int prefix_known
c_parent = c_from_node.parent
cdef _nscache c_ns_cache = [NULL, 0, 0]
cdef _ns_update_map c_ns_entry
cdef bint should_copy = 0

if used_only:
# Build up a cache for each of the ns prefixes used in the elements we will
# copy over, then use prescence in the cache as a basis for allowing the
# copy of the namespace.
c_node = c_from_node.children
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_from_node, c_node, 1)
if c_node.ns is not NULL:
c_ns = NULL
for c_ns_entry in c_ns_cache.ns_map[:c_ns_cache.last]:
if c_node.ns is c_ns_entry.old:
c_ns = c_ns_entry.old
break

if c_ns is NULL:
with gil:
_appendToNsCache(&c_ns_cache, c_node.ns, NULL)
tree.END_FOR_EACH_ELEMENT_FROM(c_node)

while c_parent and (tree._isElementOrXInclude(c_parent) or
c_parent.type == tree.XML_DOCUMENT_NODE):
c_new_ns = c_parent.nsDef
while c_new_ns:
# libxml2 will check if the prefix is already defined
tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
if used_only:
should_copy = 0
for c_ns_entry in c_ns_cache.ns_map[:c_ns_cache.last]:
if c_new_ns is c_ns_entry.old:
should_copy = 1
break

if not used_only or should_copy:
# libxml2 will check if the prefix is already defined
tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
c_new_ns = c_new_ns.next
c_parent = c_parent.parent

Expand Down
5 changes: 3 additions & 2 deletions src/lxml/serializer.pxi
Expand Up @@ -159,7 +159,8 @@ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, in
if isinstance(element_or_tree, _Element):
_assertValidNode(<_Element>element_or_tree)
doc = (<_Element>element_or_tree)._doc
c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
c_doc = _plainFakeRootDoc(c_base_doc=doc._c_doc, c_node=(<_Element>element_or_tree)._c_node,
with_siblings=0, used_only=1)
else:
doc = _documentOrRaise(element_or_tree)
_assertValidDoc(doc)
Expand Down Expand Up @@ -234,7 +235,7 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
if not c_nsdecl_node:
c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
return
_copyParentNamespaces(c_node, c_nsdecl_node)
_copyParentNamespaces(c_from_node=c_node, c_to_node=c_nsdecl_node, used_only=0)

c_nsdecl_node.parent = c_node.parent
c_nsdecl_node.children = c_node.children
Expand Down
4 changes: 2 additions & 2 deletions src/lxml/tests/test_etree.py
Expand Up @@ -4194,14 +4194,14 @@ def test_c14n_element_tostring_exclusive(self):
s)

s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
s)
s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
s)

s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
s)

def test_c14n_tostring_inclusive_ns_prefixes(self):
Expand Down