From d03e7404e3e3d404dbb26be7db5d12976efde179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Mon, 25 Aug 2025 23:40:10 +0200 Subject: [PATCH] uri: Stop touching the `lxb_url_parser_t.idna` field Looking at the Lexbor implementation, the `lxb_url_parser_t.idna` field is private and must not be touched from the outside. Lexbor expects to be able to manage it by itself when destroying a parser object. Fix the issue by putting the `lxb_unicode_idna_t` into a thread-local variable that we own. This also avoids one level of dynamic allocation. The same is done for the mraw. --- ext/uri/uri_parser_whatwg.c | 67 ++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/ext/uri/uri_parser_whatwg.c b/ext/uri/uri_parser_whatwg.c index 9a7dccdb74146..964787b6bf920 100644 --- a/ext/uri/uri_parser_whatwg.c +++ b/ext/uri/uri_parser_whatwg.c @@ -24,7 +24,9 @@ #include #endif -ZEND_TLS lxb_url_parser_t lexbor_parser; +ZEND_TLS lexbor_mraw_t lexbor_mraw = {0}; +ZEND_TLS lxb_url_parser_t lexbor_parser = {0}; +ZEND_TLS lxb_unicode_idna_t lexbor_idna = {0}; ZEND_TLS unsigned short int parsed_urls; static const unsigned short int maximum_parses_before_cleanup = 500; @@ -333,17 +335,6 @@ static zend_result php_uri_parser_whatwg_password_write(struct uri_internal_t *i return SUCCESS; } -static zend_result init_idna(void) -{ - if (lexbor_parser.idna != NULL) { - return SUCCESS; - } - - lexbor_parser.idna = lxb_unicode_idna_create(); - - return lxb_unicode_idna_init(lexbor_parser.idna) == LXB_STATUS_OK ? SUCCESS : FAILURE; -} - static zend_result php_uri_parser_whatwg_host_read(const struct uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) { const lxb_url_t *lexbor_uri = internal_uri->uri; @@ -368,11 +359,8 @@ static zend_result php_uri_parser_whatwg_host_read(const struct uri_internal_t * switch (read_mode) { case URI_COMPONENT_READ_NORMALIZED_UNICODE: { smart_str host_str = {0}; - if (init_idna() == FAILURE) { - return FAILURE; - } - lxb_url_serialize_host_unicode(lexbor_parser.idna, &lexbor_uri->host, serialize_to_smart_str_callback, &host_str); - lxb_unicode_idna_clean(lexbor_parser.idna); + lxb_url_serialize_host_unicode(&lexbor_idna, &lexbor_uri->host, serialize_to_smart_str_callback, &host_str); + lxb_unicode_idna_clean(&lexbor_idna); ZVAL_NEW_STR(retval, smart_str_extract(&host_str)); break; @@ -525,29 +513,49 @@ static zend_result php_uri_parser_whatwg_fragment_write(struct uri_internal_t *i PHP_RINIT_FUNCTION(uri_parser_whatwg) { - lexbor_mraw_t *mraw = lexbor_mraw_create(); - lxb_status_t status = lexbor_mraw_init(mraw, lexbor_mraw_byte_size); + lxb_status_t status; + + status = lexbor_mraw_init(&lexbor_mraw, lexbor_mraw_byte_size); if (status != LXB_STATUS_OK) { - lexbor_mraw_destroy(mraw, true); - return FAILURE; + goto fail; } - status = lxb_url_parser_init(&lexbor_parser, mraw); + status = lxb_url_parser_init(&lexbor_parser, &lexbor_mraw); if (status != LXB_STATUS_OK) { - lxb_url_parser_destroy(&lexbor_parser, false); - lexbor_mraw_destroy(mraw, true); - return FAILURE; + goto fail; + } + + status = lxb_unicode_idna_init(&lexbor_idna); + if (status != LXB_STATUS_OK) { + goto fail; } parsed_urls = 0; return SUCCESS; + + fail: + + /* Unconditionally calling the _destroy() functions is + * safe on a zeroed structure. */ + lxb_unicode_idna_destroy(&lexbor_idna, false); + memset(&lexbor_idna, 0, sizeof(lexbor_idna)); + lxb_url_parser_destroy(&lexbor_parser, false); + memset(&lexbor_parser, 0, sizeof(lexbor_parser)); + lexbor_mraw_destroy(&lexbor_mraw, false); + memset(&lexbor_mraw, 0, sizeof(lexbor_mraw)); + + return FAILURE; } PHP_RSHUTDOWN_FUNCTION(uri_parser_whatwg) { - lxb_url_parser_memory_destroy(&lexbor_parser); + lxb_unicode_idna_destroy(&lexbor_idna, false); + memset(&lexbor_idna, 0, sizeof(lexbor_idna)); lxb_url_parser_destroy(&lexbor_parser, false); + memset(&lexbor_parser, 0, sizeof(lexbor_parser)); + lexbor_mraw_destroy(&lexbor_mraw, false); + memset(&lexbor_mraw, 0, sizeof(lexbor_mraw)); parsed_urls = 0; @@ -600,11 +608,8 @@ static zend_string *php_uri_parser_whatwg_to_string(void *uri, uri_recomposition case URI_RECOMPOSITION_RAW_UNICODE: ZEND_FALLTHROUGH; case URI_RECOMPOSITION_NORMALIZED_UNICODE: - if (init_idna() == FAILURE) { - return NULL; - } - lxb_url_serialize_idna(lexbor_parser.idna, lexbor_uri, serialize_to_smart_str_callback, &uri_str, exclude_fragment); - lxb_unicode_idna_clean(lexbor_parser.idna); + lxb_url_serialize_idna(&lexbor_idna, lexbor_uri, serialize_to_smart_str_callback, &uri_str, exclude_fragment); + lxb_unicode_idna_clean(&lexbor_idna); break; case URI_RECOMPOSITION_RAW_ASCII: ZEND_FALLTHROUGH;