Skip to content

Commit

Permalink
Fix bug #55294 and #47530 and #47847: namespace reconciliation issues
Browse files Browse the repository at this point in the history
We'll use the DOM wrapper version of libxml2 instead of the regular one.
It's conforming to the behaviour we expect of DOM.
Most of this patch is tests.

I based and extended the tests on the code attached with the aforementioned
bug reports. Therefore the credits for the tests:
Co-authored-by: hilse at web dot de
Co-authored-by: robin2008 at altruists dot org
Co-authored-by: sgunderson at bigfoot dot com

We'll also change the searching point of the internal reconciliation to
start at the top of the added tree to avoid redundant work now that the
function is changed.

Closes GH-11454.
  • Loading branch information
nielsdos committed Jun 15, 2023
1 parent 9b18466 commit b30be40
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NEWS
Expand Up @@ -36,6 +36,8 @@ PHP NEWS
. Fixed bug #78577 (Crash in DOMNameSpace debug info handlers). (nielsdos)
. Fix lifetime issue with getAttributeNodeNS(). (nielsdos)
. Fix "invalid state error" with cloned namespace declarations. (nielsdos)
. Fixed bug #55294 and #47530 and #47847 (various namespace reconciliation
issues). (nielsdos)

- Opcache:
. Fix allocation loop in zend_shared_alloc_startup(). (nielsdos)
Expand Down
33 changes: 23 additions & 10 deletions ext/dom/php_dom.c
Expand Up @@ -1441,7 +1441,7 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) {
}
/* }}} end dom_set_old_ns */

static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep)
static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr search_parent)
{
xmlNsPtr nsptr, nsdftptr, curns, prevns = NULL;

Expand All @@ -1451,7 +1451,7 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep)
while (curns) {
nsdftptr = curns->next;
if (curns->href != NULL) {
if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) &&
if((nsptr = xmlSearchNsByHref(doc, search_parent, curns->href)) &&
(curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) {
curns->next = NULL;
if (prevns == NULL) {
Expand All @@ -1469,23 +1469,34 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep)
}
}

static void dom_libxml_reconcile_ensure_namespaces_are_declared(xmlNodePtr nodep)
{
/* Put on stack to avoid allocation.
* Although libxml2 currently does not use this for the reconciliation, it still
* makes sense to do this just in case libxml2's internal change in the future. */
xmlDOMWrapCtxt dummy_ctxt = {0};
xmlDOMWrapReconcileNamespaces(&dummy_ctxt, nodep, /* options */ 0);
}

void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */
{
/* Although the node type will be checked by the libxml2 API,
* we still want to do the internal reconciliation conditionally. */
if (nodep->type == XML_ELEMENT_NODE) {
dom_reconcile_ns_internal(doc, nodep);
xmlReconciliateNs(doc, nodep);
dom_reconcile_ns_internal(doc, nodep, nodep->parent);
dom_libxml_reconcile_ensure_namespaces_are_declared(nodep);
}
}
/* }}} */

static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last)
static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last, xmlNodePtr search_parent)
{
ZEND_ASSERT(nodep != NULL);
while (true) {
if (nodep->type == XML_ELEMENT_NODE) {
dom_reconcile_ns_internal(doc, nodep);
dom_reconcile_ns_internal(doc, nodep, search_parent);
if (nodep->children) {
dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */);
dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */, search_parent);
}
}
if (nodep == last) {
Expand All @@ -1497,10 +1508,12 @@ static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlN

void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last)
{
dom_reconcile_ns_list_internal(doc, nodep, last);
/* Outside of the recursion above because xmlReconciliateNs() performs its own recursion. */
dom_reconcile_ns_list_internal(doc, nodep, last, nodep->parent);
/* The loop is outside of the recursion in the above call because
* dom_libxml_reconcile_ensure_namespaces_are_declared() performs its own recursion. */
while (true) {
xmlReconciliateNs(doc, nodep);
/* The internal libxml2 call will already check the node type, no need for us to do it here. */
dom_libxml_reconcile_ensure_namespaces_are_declared(nodep);
if (nodep == last) {
break;
}
Expand Down
152 changes: 152 additions & 0 deletions ext/dom/tests/bug47530.phpt
@@ -0,0 +1,152 @@
--TEST--
Bug #47530 (Importing objects into document fragments creates bogus "default" namespace)
--EXTENSIONS--
dom
--FILE--
<?php

function test_document_fragment_with_import() {
$doc = new DOMDocument;
$doc->loadXML('<html xmlns="https://php.net/something" xmlns:ns="https://php.net/whatever"><element ns:foo="https://php.net/bar"/></html>');
$root = $doc->documentElement;
$frag = $doc->createDocumentFragment();
$frag->appendChild($doc->importNode($root->firstChild));
$root->appendChild($frag);
echo $doc->saveXML();
}

function test_document_fragment_without_import() {
$doc = new DOMDocument;
$doc->loadXML('<html xmlns=""><element xmlns:foo="https://php.net/bar"/></html>');
$frag = $doc->createDocumentFragment();
$frag->appendChild($doc->createElementNS('https://php.net/bar', 'bar'));
$frag->appendChild($doc->createElementNS('', 'bar'));
$element = $doc->documentElement->firstChild;
$element->appendChild($frag);
unset($frag); // Free fragment, should not break getting the namespaceURI below
echo $doc->saveXML();
unset($doc);
var_dump($element->firstChild->tagName);
var_dump($element->firstChild->namespaceURI);
}

function test_document_import() {
$xml = <<<XML
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<div xmlns="http://www.w3.org/1999/xhtml">
<p>Test-Text</p>
</div>
</feed>
XML;

$dom = new DOMDocument();
$dom->loadXML($xml);

$dom2 = new DOMDocument();
$importedNode = $dom2->importNode($dom->documentElement, true);
$dom2->appendChild($importedNode);

echo $dom2->saveXML();
}

function test_partial_document_import() {
$xml = <<<XML
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/1999/xhtml" xmlns:test="https://php.net/test" xmlns:example="https://php.net/example">
<div>
<p>Test-Text</p>
<example:p>More test text</example:p>
<test:p>Even more test text</test:p>
</div>
</feed>
XML;

$dom = new DOMDocument();
$dom->loadXML($xml);

$dom2 = new DOMDocument();
$dom2->loadXML('<?xml version="1.0"?><container xmlns:test="https://php.net/test" xmlns="https://php.net/example"/>');
$importedNode = $dom2->importNode($dom->documentElement, true);
$dom2->documentElement->appendChild($importedNode);

// Freeing the original document shouldn't break the other document
unset($importedNode);
unset($dom);

echo $dom2->saveXML();
}

function test_document_import_with_attributes() {
$dom = new DOMDocument();
$dom->loadXML('<?xml version="1.0"?><div xmlns="https://php.net/default" xmlns:example="https://php.net/example"><p example:test="test"/><i/></div>');
$dom2 = new DOMDocument();
$dom2->loadXML('<?xml version="1.0"?><div xmlns:example="https://php.net/somethingelse"/>');
$dom2->documentElement->appendChild($dom2->importNode($dom->documentElement->firstChild));
echo $dom2->saveXML(), "\n";

$dom2->documentElement->firstChild->appendChild($dom2->importNode($dom->documentElement->firstChild->nextSibling));
echo $dom2->saveXML(), "\n";
}

function test_appendChild_with_shadowing() {
$dom = new DOMDocument();
$dom->loadXML('<?xml version="1.0"?><container xmlns:default="http://php.net/default"><a xmlns:foo="http://php.net/bar"/><b xmlns:foo="http://php.net/foo"><default:test foo:bar=""/><foo:test2/></b></container>');

$a = $dom->documentElement->firstElementChild;
$b = $a->nextSibling;
$b->remove();
$a->appendChild($b);

echo $dom->saveXML(), "\n";
}

echo "-- Test document fragment with import --\n";
test_document_fragment_with_import();
echo "-- Test document fragment without import --\n";
test_document_fragment_without_import();
echo "-- Test document import --\n";
test_document_import();
echo "-- Test partial document import --\n";
test_partial_document_import();
echo "-- Test document import with attributes --\n";
test_document_import_with_attributes();
echo "-- Test appendChild with shadowing --\n";
test_appendChild_with_shadowing();

?>
--EXPECT--
-- Test document fragment with import --
<?xml version="1.0"?>
<html xmlns="https://php.net/something" xmlns:ns="https://php.net/whatever"><element ns:foo="https://php.net/bar"/></html>
-- Test document fragment without import --
<?xml version="1.0"?>
<html xmlns=""><element xmlns:foo="https://php.net/bar"><foo:bar/><bar xmlns=""/></element></html>
string(7) "foo:bar"
string(19) "https://php.net/bar"
-- Test document import --
<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<div xmlns="http://www.w3.org/1999/xhtml">
<p>Test-Text</p>
</div>
</feed>
-- Test partial document import --
<?xml version="1.0"?>
<container xmlns:test="https://php.net/test" xmlns="https://php.net/example"><feed xmlns="http://www.w3.org/1999/xhtml" xmlns:example="https://php.net/example">
<div>
<p>Test-Text</p>
<example:p>More test text</example:p>
<test:p>Even more test text</test:p>
</div>
</feed></container>
-- Test document import with attributes --
<?xml version="1.0"?>
<div xmlns:example="https://php.net/somethingelse"><p xmlns="https://php.net/default" xmlns:example="https://php.net/example" example:test="test"/></div>

<?xml version="1.0"?>
<div xmlns:example="https://php.net/somethingelse"><p xmlns="https://php.net/default" xmlns:example="https://php.net/example" example:test="test"><i/></p></div>

-- Test appendChild with shadowing --
<?xml version="1.0"?>
<container xmlns:default="http://php.net/default"><a xmlns:foo="http://php.net/bar"><b xmlns:foo="http://php.net/foo"><default:test foo:bar=""/><foo:test2/></b></a></container>
27 changes: 27 additions & 0 deletions ext/dom/tests/bug47847.phpt
@@ -0,0 +1,27 @@
--TEST--
Bug #47847 (importNode loses the namespace of an XML element)
--EXTENSIONS--
dom
--FILE--
<?php
$fromdom = new DOMDocument();
$fromdom->loadXML(<<<XML
<?xml version="1.0"?>
<ns:container xmlns:ns="http://php.net">
<ns:inner xmlns="http://php.net">
<ns:WATCH-MY-NAMESPACE xmlns=""/>
</ns:inner>
</ns:container>
XML);

$aDOM = new DOMDocument();
$imported = $aDOM->importNode($fromdom->documentElement->firstElementChild, true);
$aDOM->appendChild($imported);

echo $aDOM->saveXML();
?>
--EXPECT--
<?xml version="1.0"?>
<ns:inner xmlns="http://php.net" xmlns:ns="http://php.net">
<ns:WATCH-MY-NAMESPACE xmlns=""/>
</ns:inner>
29 changes: 29 additions & 0 deletions ext/dom/tests/bug55294.phpt
@@ -0,0 +1,29 @@
--TEST--
Bug #55294 (DOMDocument::importNode shifts namespaces when "default" namespace exists)
--EXTENSIONS--
dom
--FILE--
<?php

$aDOM = new DOMDocument();
$aDOM->loadXML(<<<EOXML
<A xmlns="http://example.com/A">
<B>
<C xmlns="http://example.com/C" xmlns:default="http://example.com/Z" />
</B>
</A>
EOXML
);

$bDOM = new DOMDocument();
$node = $bDOM->importNode($aDOM->getElementsByTagNameNS('http://example.com/A', 'B')->item(0), true);
$bDOM->appendChild($node);

echo $bDOM->saveXML(), "\n";

?>
--EXPECT--
<?xml version="1.0"?>
<B xmlns="http://example.com/A">
<C xmlns="http://example.com/C" xmlns:default="http://example.com/Z"/>
</B>

2 comments on commit b30be40

@hwhw
Copy link

@hwhw hwhw commented on b30be40 Jun 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay! Thanks for your work! (Totally lost track of this., this being #55294 - I'm hilse@web.de)

@nielsdos
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it was quite an old bug 😅 . Glad I could help, even though it took a long time :)

Please sign in to comment.