diff --git a/NEWS b/NEWS
index 28263cc2c0d24..7d0fc8ce6309d 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,10 @@ PHP NEWS
- Tidy:
. Fixed bug GH-20374 (PHP with tidy and custom-tags). (ndossche)
+- XML:
+ . Fixed bug GH-20439 (xml_set_default_handler() does not properly handle
+ special characters in attributes when passing data to callback). (ndossche)
+
20 Nov 2025, PHP 8.3.28
- Core:
diff --git a/ext/xml/compat.c b/ext/xml/compat.c
index 3de77d0723e5f..25add45f0340a 100644
--- a/ext/xml/compat.c
+++ b/ext/xml/compat.c
@@ -45,6 +45,28 @@ _qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, x
}
}
+static void start_element_emit_default(XML_Parser parser)
+{
+ if (parser->h_default) {
+ /* Grammar does not allow embedded '<' and '>' in elements, so we can seek to the start and end positions.
+ * Since the parser in the current mode mode is non-progressive, it contains the entire input. */
+ const xmlChar *cur = parser->parser->input->cur;
+ const xmlChar *end = cur;
+ for (const xmlChar *base = parser->parser->input->base; cur > base && *cur != '<'; cur--);
+ if (*end == '/') {
+ /* BC: Keep split between start & end element.
+ * TODO: In the future this could be aligned with expat and only emit a start event, or vice versa.
+ * See gh20439_2.phpt */
+ xmlChar *tmp = BAD_CAST estrndup((const char *) cur, end - cur + 1);
+ tmp[end - cur] = '>';
+ parser->h_default(parser->user, tmp, end - cur + 1);
+ efree(tmp);
+ } else {
+ parser->h_default(parser->user, cur, end - cur + 1);
+ }
+ }
+}
+
static void
_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes)
{
@@ -52,29 +74,7 @@ _start_element_handler(void *user, const xmlChar *name, const xmlChar **attribut
xmlChar *qualified_name = NULL;
if (parser->h_start_element == NULL) {
- if (parser->h_default) {
- int attno = 0;
-
- qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
- if (attributes) {
- while (attributes[attno] != NULL) {
- int att_len;
- char *att_string, *att_name, *att_value;
-
- att_name = (char *)attributes[attno++];
- att_value = (char *)attributes[attno++];
-
- att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value);
-
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
- efree(att_string);
- }
-
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
- parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
- xmlFree(qualified_name);
- }
+ start_element_emit_default(parser);
return;
}
@@ -104,65 +104,7 @@ _start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix
}
if (parser->h_start_element == NULL) {
- if (parser->h_default) {
-
- if (prefix) {
- qualified_name = xmlStrncatNew((xmlChar *)"<", prefix, xmlStrlen(prefix));
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)":", 1);
- qualified_name = xmlStrncat(qualified_name, name, xmlStrlen(name));
- } else {
- qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
- }
-
- if (namespaces) {
- int i, j;
- for (i = 0,j = 0;j < nb_namespaces;j++) {
- int ns_len;
- char *ns_string, *ns_prefix, *ns_url;
-
- ns_prefix = (char *) namespaces[i++];
- ns_url = (char *) namespaces[i++];
-
- if (ns_prefix) {
- ns_len = spprintf(&ns_string, 0, " xmlns:%s=\"%s\"", ns_prefix, ns_url);
- } else {
- ns_len = spprintf(&ns_string, 0, " xmlns=\"%s\"", ns_url);
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)ns_string, ns_len);
-
- efree(ns_string);
- }
- }
-
- if (attributes) {
- for (i = 0; i < nb_attributes; i += 1) {
- int att_len;
- char *att_string, *att_name, *att_value, *att_prefix, *att_valueend;
-
- att_name = (char *) attributes[y++];
- att_prefix = (char *)attributes[y++];
- y++;
- att_value = (char *)attributes[y++];
- att_valueend = (char *)attributes[y++];
-
- if (att_prefix) {
- att_len = spprintf(&att_string, 0, " %s:%s=\"", att_prefix, att_name);
- } else {
- att_len = spprintf(&att_string, 0, " %s=\"", att_name);
- }
-
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_value, att_valueend - att_value);
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)"\"", 1);
-
- efree(att_string);
- }
-
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
- parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
- xmlFree(qualified_name);
- }
+ start_element_emit_default(parser);
return;
}
_qualify_namespace(parser, name, URI, &qualified_name);
diff --git a/ext/xml/tests/gh20439_1.phpt b/ext/xml/tests/gh20439_1.phpt
new file mode 100644
index 0000000000000..cda6803e9d078
--- /dev/null
+++ b/ext/xml/tests/gh20439_1.phpt
@@ -0,0 +1,24 @@
+--TEST--
+GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback)
+--EXTENSIONS--
+xml
+--FILE--
+";
+$inputs = str_split($input);
+
+// Test chunked parser wrt non-progressive parser
+foreach ($inputs as $input) {
+ xml_parse($x, $input, false);
+}
+xml_parse($x, "", true);
+
+?>
+--EXPECT--
+string(12) ""
+string(71) ""
+string(6) ""
diff --git a/ext/xml/tests/gh20439_2.phpt b/ext/xml/tests/gh20439_2.phpt
new file mode 100644
index 0000000000000..dce4f5976a140
--- /dev/null
+++ b/ext/xml/tests/gh20439_2.phpt
@@ -0,0 +1,22 @@
+--TEST--
+GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) - closing solidus variant
+--EXTENSIONS--
+xml
+--SKIPIF--
+
+--FILE--
+";
+xml_parse($x, $input, true);
+
+?>
+--EXPECT--
+string(29) ""
+string(10) ""