From 74aafaf9d36af6eb36808366e6ab480b12524177 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 28 Oct 2023 00:21:16 +0200
Subject: [PATCH] Fix #47531: No way of removing redundant xmlns: declarations
Now it's possible via removeAttribute("xmlns:prefix").
It was not possible to reuse a libxml2 function to reconcile because it
does not align with DOM behaviour.
---
ext/dom/element.c | 133 +++++++++++++-----
ext/dom/tests/DOMElement_toggleAttribute.phpt | 14 +-
ext/dom/tests/bug47531_a.phpt | 66 +++++++++
ext/dom/tests/bug47531_b.phpt | 67 +++++++++
4 files changed, 238 insertions(+), 42 deletions(-)
create mode 100644 ext/dom/tests/bug47531_a.phpt
create mode 100644 ext/dom/tests/bug47531_b.phpt
diff --git a/ext/dom/element.c b/ext/dom/element.c
index b009ca1d771dc..cad154a51b161 100644
--- a/ext/dom/element.c
+++ b/ext/dom/element.c
@@ -418,9 +418,68 @@ PHP_METHOD(DOMElement, setAttribute)
}
/* }}} end dom_element_set_attribute */
-static bool dom_remove_attribute(xmlNodePtr attrp)
+typedef struct {
+ xmlNodePtr current_node;
+ xmlNsPtr defined_ns;
+} dom_deep_ns_redef_item;
+
+/* Reconciliation for a *single* namespace, but reconciles *closest* to the subtree needing it. */
+static void dom_deep_ns_redef(xmlNodePtr node, xmlNsPtr ns_to_redefine)
{
+ size_t worklist_capacity = 128;
+ dom_deep_ns_redef_item *worklist = emalloc(sizeof(dom_deep_ns_redef_item) * worklist_capacity);
+ worklist[0].current_node = node;
+ worklist[0].defined_ns = NULL;
+ size_t worklist_size = 1;
+
+ while (worklist_size > 0) {
+ worklist_size--;
+ dom_deep_ns_redef_item *current_worklist_item = &worklist[worklist_size];
+ ZEND_ASSERT(current_worklist_item->current_node->type == XML_ELEMENT_NODE);
+ xmlNsPtr defined_ns = current_worklist_item->defined_ns;
+
+ if (current_worklist_item->current_node->ns == ns_to_redefine) {
+ if (defined_ns == NULL) {
+ defined_ns = xmlNewNs(current_worklist_item->current_node, ns_to_redefine->href, ns_to_redefine->prefix);
+ }
+ current_worklist_item->current_node->ns = defined_ns;
+ }
+
+ for (xmlAttrPtr attr = current_worklist_item->current_node->properties; attr; attr = attr->next) {
+ if (attr->ns == ns_to_redefine) {
+ if (defined_ns == NULL) {
+ defined_ns = xmlNewNs(current_worklist_item->current_node, ns_to_redefine->href, ns_to_redefine->prefix);
+ }
+ attr->ns = defined_ns;
+ }
+ }
+
+ for (xmlNodePtr child = current_worklist_item->current_node->children; child; child = child->next) {
+ if (child->type != XML_ELEMENT_NODE) {
+ continue;
+ }
+ if (worklist_size == worklist_capacity) {
+ if (UNEXPECTED(worklist_capacity >= SIZE_MAX / 3 * 2 / sizeof(dom_deep_ns_redef_item))) {
+ /* Shouldn't be possible to hit, but checked for safety anyway */
+ return;
+ }
+ worklist_capacity = worklist_capacity * 3 / 2;
+ worklist = erealloc(worklist, sizeof(dom_deep_ns_redef_item) * worklist_capacity);
+ }
+ worklist[worklist_size].current_node = child;
+ worklist[worklist_size].defined_ns = defined_ns;
+ worklist_size++;
+ }
+ }
+
+ efree(worklist);
+}
+
+static bool dom_remove_attribute(xmlNodePtr thisp, xmlNodePtr attrp)
+{
+ ZEND_ASSERT(thisp != NULL);
ZEND_ASSERT(attrp != NULL);
+
switch (attrp->type) {
case XML_ATTRIBUTE_NODE:
if (php_dom_object_get_data(attrp) == NULL) {
@@ -431,8 +490,42 @@ static bool dom_remove_attribute(xmlNodePtr attrp)
xmlUnlinkNode(attrp);
}
break;
- case XML_NAMESPACE_DECL:
- return false;
+ case XML_NAMESPACE_DECL: {
+ /* They will always be removed, but can be re-added.
+ *
+ * If any reference was left to the namespace, the only effect is that
+ * the definition is potentially moved closer to the element using it.
+ * If no reference was left, it is actually removed. */
+ xmlNsPtr ns = (xmlNsPtr) attrp;
+ if (thisp->nsDef == ns) {
+ thisp->nsDef = ns->next;
+ } else if (thisp->nsDef != NULL) {
+ xmlNsPtr prev = thisp->nsDef;
+ xmlNsPtr cur = prev->next;
+ while (cur) {
+ if (cur == ns) {
+ prev->next = cur->next;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ } else {
+ /* defensive: attrp not defined in thisp ??? */
+#if ZEND_DEBUG
+ ZEND_UNREACHABLE();
+#endif
+ break; /* defensive */
+ }
+
+ ns->next = NULL;
+ php_libxml_set_old_ns(thisp->doc, ns); /* note: can't deallocate as it might be referenced by a "fake namespace node" */
+ /* xmlReconciliateNs() redefines at the top of the tree instead of closest to the child, own reconciliation here.
+ * Similarly, the DOM version has other issues too (see dom_libxml_reconcile_ensure_namespaces_are_declared). */
+ dom_deep_ns_redef(thisp, ns);
+
+ break;
+ }
EMPTY_SWITCH_DEFAULT_CASE();
}
return true;
@@ -461,7 +554,7 @@ PHP_METHOD(DOMElement, removeAttribute)
RETURN_FALSE;
}
- RETURN_BOOL(dom_remove_attribute(attrp));
+ RETURN_BOOL(dom_remove_attribute(nodep, attrp));
}
/* }}} end dom_element_remove_attribute */
@@ -1425,37 +1518,7 @@ PHP_METHOD(DOMElement, toggleAttribute)
/* Step 5 */
if (force_is_null || !force) {
- if (attribute->type == XML_NAMESPACE_DECL) {
- /* The behaviour isn't defined by spec, but by observing browsers I found
- * that you can remove the nodes, but they'll get reconciled.
- * So if any reference was left to the namespace, the only effect is that
- * the definition is potentially moved closer to the element using it.
- * If no reference was left, it is actually removed. */
- xmlNsPtr ns = (xmlNsPtr) attribute;
- if (thisp->nsDef == ns) {
- thisp->nsDef = ns->next;
- } else if (thisp->nsDef != NULL) {
- xmlNsPtr prev = thisp->nsDef;
- xmlNsPtr cur = prev->next;
- while (cur) {
- if (cur == ns) {
- prev->next = cur->next;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- ns->next = NULL;
- php_libxml_set_old_ns(thisp->doc, ns);
- dom_reconcile_ns(thisp->doc, thisp);
- } else {
- /* TODO: in the future when namespace bugs are fixed,
- * the above if-branch should be merged into this called function
- * such that the removal will work properly with all APIs. */
- dom_remove_attribute(attribute);
- }
+ dom_remove_attribute(thisp, attribute);
retval = false;
goto out;
}
diff --git a/ext/dom/tests/DOMElement_toggleAttribute.phpt b/ext/dom/tests/DOMElement_toggleAttribute.phpt
index a5a1ac5dd1e1a..ed29be899dac2 100644
--- a/ext/dom/tests/DOMElement_toggleAttribute.phpt
+++ b/ext/dom/tests/DOMElement_toggleAttribute.phpt
@@ -123,26 +123,26 @@ bool(true)
Toggling namespaces:
bool(false)
-
+
bool(false)
-
+
bool(true)
-
+
bool(false)
-
+
bool(false)
-
+
Toggling namespaced attributes:
bool(true)
bool(true)
bool(true)
bool(false)
-
+
namespace of test:test = NULL
namespace of foo:test = string(8) "some:ns2"
namespace of doesnotexist:test = NULL
@@ -153,6 +153,6 @@ bool(false)
bool(true)
bool(false)
-
+
Checking toggled namespace:
string(0) ""
diff --git a/ext/dom/tests/bug47531_a.phpt b/ext/dom/tests/bug47531_a.phpt
new file mode 100644
index 0000000000000..aa482109f41de
--- /dev/null
+++ b/ext/dom/tests/bug47531_a.phpt
@@ -0,0 +1,66 @@
+--TEST--
+Bug #47531 (No way of removing redundant xmlns: declarations)
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(<<
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+XML);
+$root = $doc->documentElement;
+var_dump($root->removeAttribute("xmlns:foo"));
+echo $doc->saveXML();
+
+?>
+--EXPECT--
+bool(true)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/bug47531_b.phpt b/ext/dom/tests/bug47531_b.phpt
new file mode 100644
index 0000000000000..4a9caa8b3018b
--- /dev/null
+++ b/ext/dom/tests/bug47531_b.phpt
@@ -0,0 +1,67 @@
+--TEST--
+Bug #47531 (No way of removing redundant xmlns: declarations)
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(<<
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+XML);
+$root = $doc->documentElement;
+$elem = $root->firstElementChild->nextElementSibling;
+var_dump($elem->removeAttribute("xmlns:foo"));
+echo $doc->saveXML();
+
+?>
+--EXPECT--
+bool(true)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+