From 74aafaf9d36af6eb36808366e6ab480b12524177 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 28 Oct 2023 00:21:16 +0200 Subject: [PATCH] Fix #47531: No way of removing redundant xmlns: declarations Now it's possible via removeAttribute("xmlns:prefix"). It was not possible to reuse a libxml2 function to reconcile because it does not align with DOM behaviour. --- ext/dom/element.c | 133 +++++++++++++----- ext/dom/tests/DOMElement_toggleAttribute.phpt | 14 +- ext/dom/tests/bug47531_a.phpt | 66 +++++++++ ext/dom/tests/bug47531_b.phpt | 67 +++++++++ 4 files changed, 238 insertions(+), 42 deletions(-) create mode 100644 ext/dom/tests/bug47531_a.phpt create mode 100644 ext/dom/tests/bug47531_b.phpt diff --git a/ext/dom/element.c b/ext/dom/element.c index b009ca1d771dc..cad154a51b161 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -418,9 +418,68 @@ PHP_METHOD(DOMElement, setAttribute) } /* }}} end dom_element_set_attribute */ -static bool dom_remove_attribute(xmlNodePtr attrp) +typedef struct { + xmlNodePtr current_node; + xmlNsPtr defined_ns; +} dom_deep_ns_redef_item; + +/* Reconciliation for a *single* namespace, but reconciles *closest* to the subtree needing it. */ +static void dom_deep_ns_redef(xmlNodePtr node, xmlNsPtr ns_to_redefine) { + size_t worklist_capacity = 128; + dom_deep_ns_redef_item *worklist = emalloc(sizeof(dom_deep_ns_redef_item) * worklist_capacity); + worklist[0].current_node = node; + worklist[0].defined_ns = NULL; + size_t worklist_size = 1; + + while (worklist_size > 0) { + worklist_size--; + dom_deep_ns_redef_item *current_worklist_item = &worklist[worklist_size]; + ZEND_ASSERT(current_worklist_item->current_node->type == XML_ELEMENT_NODE); + xmlNsPtr defined_ns = current_worklist_item->defined_ns; + + if (current_worklist_item->current_node->ns == ns_to_redefine) { + if (defined_ns == NULL) { + defined_ns = xmlNewNs(current_worklist_item->current_node, ns_to_redefine->href, ns_to_redefine->prefix); + } + current_worklist_item->current_node->ns = defined_ns; + } + + for (xmlAttrPtr attr = current_worklist_item->current_node->properties; attr; attr = attr->next) { + if (attr->ns == ns_to_redefine) { + if (defined_ns == NULL) { + defined_ns = xmlNewNs(current_worklist_item->current_node, ns_to_redefine->href, ns_to_redefine->prefix); + } + attr->ns = defined_ns; + } + } + + for (xmlNodePtr child = current_worklist_item->current_node->children; child; child = child->next) { + if (child->type != XML_ELEMENT_NODE) { + continue; + } + if (worklist_size == worklist_capacity) { + if (UNEXPECTED(worklist_capacity >= SIZE_MAX / 3 * 2 / sizeof(dom_deep_ns_redef_item))) { + /* Shouldn't be possible to hit, but checked for safety anyway */ + return; + } + worklist_capacity = worklist_capacity * 3 / 2; + worklist = erealloc(worklist, sizeof(dom_deep_ns_redef_item) * worklist_capacity); + } + worklist[worklist_size].current_node = child; + worklist[worklist_size].defined_ns = defined_ns; + worklist_size++; + } + } + + efree(worklist); +} + +static bool dom_remove_attribute(xmlNodePtr thisp, xmlNodePtr attrp) +{ + ZEND_ASSERT(thisp != NULL); ZEND_ASSERT(attrp != NULL); + switch (attrp->type) { case XML_ATTRIBUTE_NODE: if (php_dom_object_get_data(attrp) == NULL) { @@ -431,8 +490,42 @@ static bool dom_remove_attribute(xmlNodePtr attrp) xmlUnlinkNode(attrp); } break; - case XML_NAMESPACE_DECL: - return false; + case XML_NAMESPACE_DECL: { + /* They will always be removed, but can be re-added. + * + * If any reference was left to the namespace, the only effect is that + * the definition is potentially moved closer to the element using it. + * If no reference was left, it is actually removed. */ + xmlNsPtr ns = (xmlNsPtr) attrp; + if (thisp->nsDef == ns) { + thisp->nsDef = ns->next; + } else if (thisp->nsDef != NULL) { + xmlNsPtr prev = thisp->nsDef; + xmlNsPtr cur = prev->next; + while (cur) { + if (cur == ns) { + prev->next = cur->next; + break; + } + prev = cur; + cur = cur->next; + } + } else { + /* defensive: attrp not defined in thisp ??? */ +#if ZEND_DEBUG + ZEND_UNREACHABLE(); +#endif + break; /* defensive */ + } + + ns->next = NULL; + php_libxml_set_old_ns(thisp->doc, ns); /* note: can't deallocate as it might be referenced by a "fake namespace node" */ + /* xmlReconciliateNs() redefines at the top of the tree instead of closest to the child, own reconciliation here. + * Similarly, the DOM version has other issues too (see dom_libxml_reconcile_ensure_namespaces_are_declared). */ + dom_deep_ns_redef(thisp, ns); + + break; + } EMPTY_SWITCH_DEFAULT_CASE(); } return true; @@ -461,7 +554,7 @@ PHP_METHOD(DOMElement, removeAttribute) RETURN_FALSE; } - RETURN_BOOL(dom_remove_attribute(attrp)); + RETURN_BOOL(dom_remove_attribute(nodep, attrp)); } /* }}} end dom_element_remove_attribute */ @@ -1425,37 +1518,7 @@ PHP_METHOD(DOMElement, toggleAttribute) /* Step 5 */ if (force_is_null || !force) { - if (attribute->type == XML_NAMESPACE_DECL) { - /* The behaviour isn't defined by spec, but by observing browsers I found - * that you can remove the nodes, but they'll get reconciled. - * So if any reference was left to the namespace, the only effect is that - * the definition is potentially moved closer to the element using it. - * If no reference was left, it is actually removed. */ - xmlNsPtr ns = (xmlNsPtr) attribute; - if (thisp->nsDef == ns) { - thisp->nsDef = ns->next; - } else if (thisp->nsDef != NULL) { - xmlNsPtr prev = thisp->nsDef; - xmlNsPtr cur = prev->next; - while (cur) { - if (cur == ns) { - prev->next = cur->next; - break; - } - prev = cur; - cur = cur->next; - } - } - - ns->next = NULL; - php_libxml_set_old_ns(thisp->doc, ns); - dom_reconcile_ns(thisp->doc, thisp); - } else { - /* TODO: in the future when namespace bugs are fixed, - * the above if-branch should be merged into this called function - * such that the removal will work properly with all APIs. */ - dom_remove_attribute(attribute); - } + dom_remove_attribute(thisp, attribute); retval = false; goto out; } diff --git a/ext/dom/tests/DOMElement_toggleAttribute.phpt b/ext/dom/tests/DOMElement_toggleAttribute.phpt index a5a1ac5dd1e1a..ed29be899dac2 100644 --- a/ext/dom/tests/DOMElement_toggleAttribute.phpt +++ b/ext/dom/tests/DOMElement_toggleAttribute.phpt @@ -123,26 +123,26 @@ bool(true) Toggling namespaces: bool(false) - + bool(false) - + bool(true) - + bool(false) - + bool(false) - + Toggling namespaced attributes: bool(true) bool(true) bool(true) bool(false) - + namespace of test:test = NULL namespace of foo:test = string(8) "some:ns2" namespace of doesnotexist:test = NULL @@ -153,6 +153,6 @@ bool(false) bool(true) bool(false) - + Checking toggled namespace: string(0) "" diff --git a/ext/dom/tests/bug47531_a.phpt b/ext/dom/tests/bug47531_a.phpt new file mode 100644 index 0000000000000..aa482109f41de --- /dev/null +++ b/ext/dom/tests/bug47531_a.phpt @@ -0,0 +1,66 @@ +--TEST-- +Bug #47531 (No way of removing redundant xmlns: declarations) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + + + + + + + + +

+ + +

+ + + + +
+
+ + + +
+ +XML); +$root = $doc->documentElement; +var_dump($root->removeAttribute("xmlns:foo")); +echo $doc->saveXML(); + +?> +--EXPECT-- +bool(true) + + + + + + + + + + +

+ + +

+ + + + +
+
+ + + +
+
diff --git a/ext/dom/tests/bug47531_b.phpt b/ext/dom/tests/bug47531_b.phpt new file mode 100644 index 0000000000000..4a9caa8b3018b --- /dev/null +++ b/ext/dom/tests/bug47531_b.phpt @@ -0,0 +1,67 @@ +--TEST-- +Bug #47531 (No way of removing redundant xmlns: declarations) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + + + + + + + + +

+ + +

+ + + + +
+
+ + + +
+ +XML); +$root = $doc->documentElement; +$elem = $root->firstElementChild->nextElementSibling; +var_dump($elem->removeAttribute("xmlns:foo")); +echo $doc->saveXML(); + +?> +--EXPECT-- +bool(true) + + + + + + + + + + +

+ + +

+ + + + +
+
+ + + +
+