From 69283ba0a3fa0858938ca513b4f0a610950b2e14 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 10:38:15 +0000 Subject: [PATCH 01/10] ext/pcre: Refactor php_pcre_replace_func_impl() to not rely on an FCI --- ext/pcre/php_pcre.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 19068b90c0d0b..ade9b248c9eab 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1544,40 +1544,26 @@ static int preg_get_backref(char **str, int *backref) } /* }}} */ -/* {{{ preg_do_repl_func */ -static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) +/* Return NULL if an exception has occurred */ +static zend_string *preg_do_repl_func(zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { - zend_string *result_str; + zend_string *result_str = NULL; zval retval; /* Function return value */ zval arg; /* Argument to pass to function */ array_init_size(&arg, count + (mark ? 1 : 0)); populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags); - fci->retval = &retval; - fci->param_count = 1; - fci->params = &arg; - - if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) { - if (EXPECTED(Z_TYPE(retval) == IS_STRING)) { - result_str = Z_STR(retval); - } else { - result_str = zval_get_string_func(&retval); - zval_ptr_dtor(&retval); - } - } else { - if (!EG(exception)) { - php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); - } - - result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0); - } - + zend_call_known_fcc(fcc, &retval, 1, &arg, NULL); zval_ptr_dtor(&arg); + /* No Exception has occurred */ + if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) { + result_str = zval_try_get_string(&retval); + } + zval_ptr_dtor(&retval); return result_str; } -/* }}} */ /* {{{ php_pcre_replace */ PHPAPI zend_string *php_pcre_replace(zend_string *regex, @@ -1940,10 +1926,12 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin /* Use custom function to get replacement string and its length. */ eval_result = preg_do_repl_func( - fci, fcc, subject, offsets, subpat_names, num_subpats, count, + fcc, subject, offsets, subpat_names, num_subpats, count, pcre2_get_mark(match_data), flags); - ZEND_ASSERT(eval_result); + if (UNEXPECTED(eval_result == NULL)) { + goto error; + } new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD; if (new_len >= alloc_len) { alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD; From 61c0db97a2772d72d369f5d15b9927a799cf4fd2 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 10:55:47 +0000 Subject: [PATCH 02/10] ext/pcre: Refactor populate_subpat_array() to take subject as a HashTable* This makes the assumption the zval is always an array explicit --- ext/pcre/php_pcre.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index ade9b248c9eab..c743856066761 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1032,13 +1032,12 @@ static inline void add_offset_pair( /* }}} */ static void populate_subpat_array( - zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, + HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { zend_long offset_capture = flags & PREG_OFFSET_CAPTURE; zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL; zval val; int i; - HashTable *subpats_ht = Z_ARRVAL_P(subpats); if (subpat_names) { if (offset_capture) { for (i = 0; i < count; i++) { @@ -1088,15 +1087,17 @@ static void populate_subpat_array( zend_hash_next_index_insert_new(subpats_ht, &val); } if (unmatched_as_null) { + ZVAL_NULL(&val); for (i = count; i < num_subpats; i++) { - add_next_index_null(subpats); + zend_hash_next_index_insert_new(subpats_ht, &val); } } } } /* Add MARK, if available */ if (mark) { - add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark); + ZVAL_STRING(&val, (char *)mark); + zend_hash_str_add_new(subpats_ht, ZEND_STRL("MARK"), &val); } } @@ -1350,7 +1351,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, mark = pcre2_get_mark(match_data); array_init_size(&result_set, count + (mark ? 1 : 0)); populate_subpat_array( - &result_set, subject, offsets, subpat_names, + Z_ARRVAL(result_set), subject, offsets, subpat_names, num_subpats, count, mark, flags); /* And add it to the output array */ zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set); @@ -1359,7 +1360,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, /* For each subpattern, insert it into the subpatterns array. */ mark = pcre2_get_mark(match_data); populate_subpat_array( - subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags); + Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags); break; } } @@ -1552,7 +1553,7 @@ static zend_string *preg_do_repl_func(zend_fcall_info_cache *fcc, const char *su zval arg; /* Argument to pass to function */ array_init_size(&arg, count + (mark ? 1 : 0)); - populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags); + populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags); zend_call_known_fcc(fcc, &retval, 1, &arg, NULL); zval_ptr_dtor(&arg); From ca2af277ca4597cc83784b0b4789ee06e9f2fb90 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 11:16:59 +0000 Subject: [PATCH 03/10] ext/pcre: Refactor php_pcre_replace_func_impl() We don't need the FCI anymore, and we always have the subject as a zend_string. --- ext/pcre/php_pcre.c | 50 +++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index c743856066761..ed7f4a631a653 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1835,14 +1835,12 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } /* }}} */ -/* {{{ php_pcre_replace_func_impl() */ -static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) +static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ zend_string **subpat_names; /* Array for named subpatterns */ uint32_t num_subpats; /* Number of captured subpatterns */ - size_t new_len; /* Length of needed storage */ size_t alloc_len; /* Actual allocated length */ PCRE2_SIZE start_offset; /* Where the new search starts */ size_t last_end_offset; /* Where the last search ended */ @@ -1850,7 +1848,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin *piece; /* The current piece of subject */ size_t result_len; /* Length of result */ zend_string *result; /* Result of replacement */ - zend_string *eval_result; /* Result of custom function */ pcre2_match_data *match_data; bool old_mdata_used; @@ -1889,15 +1886,15 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, options, match_data, mctx); while (1) { - piece = subject + last_end_offset; + piece = ZSTR_VAL(subject_str) + last_end_offset; if (count >= 0 && limit) { /* Check for too many substrings condition. */ @@ -1921,13 +1918,14 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin } /* Set the match location in subject */ - match = subject + offsets[0]; + match = ZSTR_VAL(subject_str) + offsets[0]; - new_len = result_len + offsets[0] - last_end_offset; /* part before the match */ + /* Length of needed storage */ + size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */ /* Use custom function to get replacement string and its length. */ - eval_result = preg_do_repl_func( - fcc, subject, offsets, subpat_names, num_subpats, count, + zend_string *eval_result = preg_do_repl_func( + fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count, pcre2_get_mark(match_data), flags); if (UNEXPECTED(eval_result == NULL)) { @@ -1964,10 +1962,10 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin the match again at the same point. If this fails (picked up above) we advance to the next character. */ if (start_offset == offsets[0]) { - count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); - piece = subject + start_offset; + piece = ZSTR_VAL(subject_str) + start_offset; if (count >= 0 && limit) { goto matched; } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { @@ -1975,7 +1973,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ - if (start_offset < subject_len) { + if (start_offset < ZSTR_LEN(subject_str)) { size_t unit_len = calculate_unit_length(pce, piece); start_offset += unit_len; } else { @@ -1988,20 +1986,17 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { not_matched: - if (!result && subject_str) { + if (result == NULL) { result = zend_string_copy(subject_str); break; } /* now we know exactly how long it is */ - alloc_len = result_len + subject_len - last_end_offset; - if (NULL != result) { - result = zend_string_realloc(result, alloc_len, 0); - } else { - result = zend_string_alloc(alloc_len, 0); - } + size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset; + alloc_len = result_len + segment_len; + result = zend_string_realloc(result, alloc_len, 0); /* stick that last bit of string on our output */ - memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset); - result_len += subject_len - last_end_offset; + memcpy(ZSTR_VAL(result) + result_len, piece, segment_len); + result_len += segment_len; ZSTR_VAL(result)[result_len] = '\0'; ZSTR_LEN(result) = result_len; break; @@ -2016,11 +2011,11 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin } #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT)) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } if (match_data != mdata) { @@ -2030,7 +2025,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin return result; } -/* }}} */ /* {{{ php_pcre_replace_func */ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, @@ -2046,9 +2040,7 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, return NULL; } pce->refcount++; - result = php_pcre_replace_func_impl( - pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc, - limit, replace_count, flags); + result = php_pcre_replace_func_impl(pce, subject_str, fcc, limit, replace_count, flags); pce->refcount--; return result; From d7596286519760bbc4117403577b589859a252c2 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 13:21:44 +0000 Subject: [PATCH 04/10] ext/pcre: Refactor php_pcre_replace_func() We don't need the FCI anymore --- ext/pcre/php_pcre.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index ed7f4a631a653..0114ffea7652e 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2026,10 +2026,9 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin return result; } -/* {{{ php_pcre_replace_func */ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, zend_string *subject_str, - zend_fcall_info *fci, zend_fcall_info_cache *fcc, + zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) { pcre_cache_entry *pce; /* Compiled regular expression */ @@ -2045,7 +2044,6 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, return result; } -/* }}} */ /* {{{ php_pcre_replace_array */ static zend_string *php_pcre_replace_array(HashTable *regex, @@ -2152,8 +2150,7 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTabl zend_string *result; if (regex_str) { - result = php_pcre_replace_func( - regex_str, subject, fci, fcc, limit, replace_count, flags); + result = php_pcre_replace_func(regex_str, subject, fcc, limit, replace_count, flags); return result; } else { /* If regex is an array */ @@ -2172,7 +2169,7 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTabl /* Do the actual replacement and put the result back into subject for further replacements. */ result = php_pcre_replace_func( - regex_entry_str, subject, fci, fcc, limit, replace_count, flags); + regex_entry_str, subject, fcc, limit, replace_count, flags); zend_tmp_string_release(tmp_regex_entry_str); zend_string_release(subject); subject = result; From 0980e458f173d4d11eeb90dbfae8d1be892d678f Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 13:22:50 +0000 Subject: [PATCH 05/10] ext/pcre: Refactor php_replace_in_subject_func() We don't need the FCI anymore Make the Hashtable param const Throw exception on non string entries --- ext/pcre/php_pcre.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 0114ffea7652e..6c11a0450dbfc 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2142,9 +2142,8 @@ static zend_always_inline zend_string *php_replace_in_subject( } /* }}} */ -/* {{{ php_replace_in_subject_func */ -static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTable *regex_ht, - zend_fcall_info *fci, zend_fcall_info_cache *fcc, +static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht, + zend_fcall_info_cache *fcc, zend_string *subject, size_t limit, size_t *replace_count, zend_long flags) { zend_string *result; @@ -2164,7 +2163,10 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTabl ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) { /* Make sure we're dealing with strings. */ zend_string *tmp_regex_entry_str; - zend_string *regex_entry_str = zval_get_tmp_string(regex_entry, &tmp_regex_entry_str); + zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str); + if (UNEXPECTED(regex_entry_str == NULL)) { + break; + } /* Do the actual replacement and put the result back into subject for further replacements. */ @@ -2181,7 +2183,6 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, HashTabl return subject; } } -/* }}} */ /* {{{ preg_replace_func_impl */ static size_t preg_replace_func_impl(zval *return_value, @@ -2194,7 +2195,7 @@ static size_t preg_replace_func_impl(zval *return_value, if (subject_str) { result = php_replace_in_subject_func( - regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags); + regex_str, regex_ht, fcc, subject_str, limit_val, &replace_count, flags); if (result != NULL) { RETVAL_STR(result); } else { @@ -2218,7 +2219,7 @@ static size_t preg_replace_func_impl(zval *return_value, zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str); result = php_replace_in_subject_func( - regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags); + regex_str, regex_ht, fcc, subject_entry_str, limit_val, &replace_count, flags); if (result != NULL) { /* Add to return array */ ZVAL_STR(&zv, result); From f3c981a0b0f57e1f169f90688288799791151b8e Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 11:36:12 +0000 Subject: [PATCH 06/10] ext/pcre: Refactor preg_replace_func_impl() We don't need the FCI anymore Make the Hashtable params const Rename function to indicate it is a PHP pcre function --- ext/pcre/php_pcre.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 6c11a0450dbfc..a6da10a0be7d9 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2184,11 +2184,10 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, const Ha } } -/* {{{ preg_replace_func_impl */ -static size_t preg_replace_func_impl(zval *return_value, - zend_string *regex_str, HashTable *regex_ht, - zend_fcall_info *fci, zend_fcall_info_cache *fcc, - zend_string *subject_str, HashTable *subject_ht, zend_long limit_val, zend_long flags) +static size_t php_preg_replace_func_impl(zval *return_value, + zend_string *regex_str, const HashTable *regex_ht, + zend_fcall_info_cache *fcc, + zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags) { zend_string *result; size_t replace_count = 0; @@ -2216,7 +2215,10 @@ static size_t preg_replace_func_impl(zval *return_value, and add the result to the return_value array. */ ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) { zend_string *tmp_subject_entry_str; - zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str); + zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str); + if (UNEXPECTED(subject_entry_str == NULL)) { + break; + } result = php_replace_in_subject_func( regex_str, regex_ht, fcc, subject_entry_str, limit_val, &replace_count, flags); @@ -2235,7 +2237,6 @@ static size_t preg_replace_func_impl(zval *return_value, return replace_count; } -/* }}} */ static void _preg_replace_common( zval *return_value, @@ -2393,8 +2394,8 @@ PHP_FUNCTION(preg_replace_callback) Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); - replace_count = preg_replace_func_impl(return_value, regex_str, regex_ht, - &fci, &fcc, + replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht, + &fcc, subject_str, subject_ht, limit, flags); if (zcount) { ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); @@ -2445,7 +2446,7 @@ PHP_FUNCTION(preg_replace_callback_array) ZVAL_COPY_VALUE(&fci.function_name, replace); - replace_count += preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc, + replace_count += php_preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fcc, subject_str, subject_ht, limit, flags); switch (Z_TYPE(zv)) { case IS_ARRAY: From 8766b4cdad66bc8ec92827dfe2b7d03414fdd7c7 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 13:08:45 +0000 Subject: [PATCH 07/10] ext/pcre: Add trampoline tests for preg_replace_callback(_array)() --- ext/pcre/tests/preg_replace_callback3.phpt | 28 ---- ...reg_replace_callback_array_trampoline.phpt | 111 +++++++++++++++ .../preg_replace_callback_trampoline.phpt | 133 ++++++++++++++++++ 3 files changed, 244 insertions(+), 28 deletions(-) delete mode 100644 ext/pcre/tests/preg_replace_callback3.phpt create mode 100644 ext/pcre/tests/preg_replace_callback_array_trampoline.phpt create mode 100644 ext/pcre/tests/preg_replace_callback_trampoline.phpt diff --git a/ext/pcre/tests/preg_replace_callback3.phpt b/ext/pcre/tests/preg_replace_callback3.phpt deleted file mode 100644 index 9a26555cb5cbf..0000000000000 --- a/ext/pcre/tests/preg_replace_callback3.phpt +++ /dev/null @@ -1,28 +0,0 @@ ---TEST-- -preg_replace_callback() 3 ---FILE-- -getMessage() . \PHP_EOL; -} -try { - var_dump(preg_replace_callback(1,2,3,4)); -} catch (\TypeError $e) { - echo $e->getMessage() . \PHP_EOL; -} - -$a = 5; -try { - var_dump(preg_replace_callback(1,2,3,4,$a)); -} catch (\TypeError $e) { - echo $e->getMessage() . \PHP_EOL; -} - -?> ---EXPECT-- -preg_replace_callback(): Argument #2 ($callback) must be a valid callback, no array or string given -preg_replace_callback(): Argument #2 ($callback) must be a valid callback, no array or string given -preg_replace_callback(): Argument #2 ($callback) must be a valid callback, no array or string given diff --git a/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt b/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt new file mode 100644 index 0000000000000..eb13a803a0d41 --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt @@ -0,0 +1,111 @@ +--TEST-- +preg_replace_callback_array() with a trampoline +--FILE-- + $callback, + '~\A.~' => $callback, + ], + [ + '@\b\w{1,2}\b@' => $callback, + '~\A.~' => $trampolineThrow, + ], + [ + '@\b\w{1,2}\b@' => $callback, + '~\A.~' => new stdClass(), + ], +]; + +$subjectsToTest = [ + [ + 'a b3 bcd', + 'v' => 'aksfjk', + 12 => 'aa bb', + ['xyz'], + ], + 'a b3 bcd', + [ + new stdClass(), + ], + new stdClass(), +]; + +foreach ($regexesToTest as $regex) { + foreach ($subjectsToTest as $subject) { + try { + $matches = preg_replace_callback_array($regex, $subject); + var_dump($matches); + } catch (Throwable $e) { + echo $e::class, ': ', $e->getMessage(), PHP_EOL; + } + } +} + +?> +--EXPECTF-- +Warning: Undefined variable $trampolineThrow in %s on line %d +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline + +Warning: Array to string conversion in %s on line %d +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +array(4) { + [0]=> + string(14) "'''a' 'b3' bcd" + ["v"]=> + string(8) "'a'ksfjk" + [12]=> + string(11) "'''aa' 'bb'" + [13]=> + string(7) "'A'rray" +} +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +string(14) "'''a' 'b3' bcd" +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback_array(): Argument #2 ($subject) must be of type array|string, stdClass given +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline + +Warning: Array to string conversion in %s on line %d +TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Trampoline for trampoline +Trampoline for trampoline +TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback_array(): Argument #2 ($subject) must be of type array|string, stdClass given +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline + +Warning: Array to string conversion in %s on line %d +TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Trampoline for trampoline +Trampoline for trampoline +TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback_array(): Argument #2 ($subject) must be of type array|string, stdClass given diff --git a/ext/pcre/tests/preg_replace_callback_trampoline.phpt b/ext/pcre/tests/preg_replace_callback_trampoline.phpt new file mode 100644 index 0000000000000..122b782ff9622 --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_trampoline.phpt @@ -0,0 +1,133 @@ +--TEST-- +preg_replace_callback() with a trampoline +--FILE-- + 'aksfjk', + 12 => 'aa bb', + ['xyz'], + ], + 'a b3 bcd', + [ + new stdClass(), + ], + new stdClass(), +]; + +foreach ([$callback, $callbackThrow] as $fn) { + foreach ($regexesToTest as $regex) { + foreach ($subjectsToTest as $subject) { + try { + $matches = preg_replace_callback($regex, $fn, $subject); + var_dump($matches); + } catch (Throwable $e) { + echo $e::class, ': ', $e->getMessage(), PHP_EOL; + } + } + } +} + +?> +--EXPECTF-- +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline + +Warning: Array to string conversion in %s on line %d +Trampoline for trampoline +array(4) { + [0]=> + string(14) "'''a' 'b3' bcd" + ["v"]=> + string(8) "'a'ksfjk" + [12]=> + string(11) "'''aa' 'bb'" + [13]=> + string(7) "'A'rray" +} +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +string(14) "'''a' 'b3' bcd" +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline +Trampoline for trampoline + +Warning: Array to string conversion in %s on line %d +array(4) { + [0]=> + string(12) "'a' 'b3' bcd" + ["v"]=> + string(6) "aksfjk" + [12]=> + string(9) "'aa' 'bb'" + [13]=> + string(5) "Array" +} +Trampoline for trampoline +Trampoline for trampoline +string(12) "'a' 'b3' bcd" +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given + +Warning: Array to string conversion in %s on line %d +Error: Object of class stdClass could not be converted to string +Error: Object of class stdClass could not be converted to string +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given +Trampoline for trampolineThrow + +Warning: Array to string conversion in %s on line %d +Exception: boo +Trampoline for trampolineThrow +Exception: boo +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given +Trampoline for trampolineThrow + +Warning: Array to string conversion in %s on line %d +Exception: boo +Trampoline for trampolineThrow +Exception: boo +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given + +Warning: Array to string conversion in %s on line %d +Error: Object of class stdClass could not be converted to string +Error: Object of class stdClass could not be converted to string +Error: Object of class stdClass could not be converted to string +TypeError: preg_replace_callback(): Argument #3 ($subject) must be of type array|string, stdClass given From 10435d280853b150a1f02546c80b7608cc506362 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Sun, 5 Jan 2025 13:09:42 +0000 Subject: [PATCH 08/10] ext/pcre: Handle trampolines properly for preg_replace_callback(_array)() --- ext/pcre/php_pcre.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index a6da10a0be7d9..ddc9cc8012ed3 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2381,18 +2381,18 @@ PHP_FUNCTION(preg_replace_callback) zend_long limit = -1, flags = 0; size_t replace_count; zend_fcall_info fci; - zend_fcall_info_cache fcc; + zend_fcall_info_cache fcc = empty_fcall_info_cache; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(3, 6) Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str) - Z_PARAM_FUNC(fci, fcc) + Z_PARAM_FUNC_NO_TRAMPOLINE_FREE(fci, fcc) Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str) Z_PARAM_OPTIONAL Z_PARAM_LONG(limit) Z_PARAM_ZVAL(zcount) Z_PARAM_LONG(flags) - ZEND_PARSE_PARAMETERS_END(); + ZEND_PARSE_PARAMETERS_END_EX(goto free_trampoline); replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht, &fcc, @@ -2400,19 +2400,19 @@ PHP_FUNCTION(preg_replace_callback) if (zcount) { ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); } + free_trampoline: + zend_release_fcall_info_cache(&fcc); } /* }}} */ /* {{{ Perform Perl-style regular expression replacement using replacement callback. */ PHP_FUNCTION(preg_replace_callback_array) { - zval zv, *replace, *zcount = NULL; + zval *replace, *zcount = NULL; HashTable *pattern, *subject_ht; zend_string *subject_str, *str_idx_regex; zend_long limit = -1, flags = 0; size_t replace_count = 0; - zend_fcall_info fci; - zend_fcall_info_cache fcc; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(2, 5) @@ -2424,10 +2424,6 @@ PHP_FUNCTION(preg_replace_callback_array) Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); - fci.size = sizeof(fci); - fci.object = NULL; - fci.named_params = NULL; - if (subject_ht) { GC_TRY_ADDREF(subject_ht); } else { @@ -2435,29 +2431,32 @@ PHP_FUNCTION(preg_replace_callback_array) } ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) { - if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { - zend_argument_type_error(1, "must contain only valid callbacks"); - goto error; - } if (!str_idx_regex) { zend_argument_type_error(1, "must contain only string patterns as keys"); goto error; } - ZVAL_COPY_VALUE(&fci.function_name, replace); + zend_fcall_info_cache fcc = empty_fcall_info_cache; + if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { + zend_argument_type_error(1, "must contain only valid callbacks"); + goto error; + } - replace_count += php_preg_replace_func_impl(&zv, str_idx_regex, /* regex_ht */ NULL, &fcc, + zval retval; + replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fcc, subject_str, subject_ht, limit, flags); - switch (Z_TYPE(zv)) { + zend_release_fcall_info_cache(&fcc); + + switch (Z_TYPE(retval)) { case IS_ARRAY: ZEND_ASSERT(subject_ht); zend_array_release(subject_ht); - subject_ht = Z_ARR(zv); + subject_ht = Z_ARR(retval); break; case IS_STRING: ZEND_ASSERT(subject_str); zend_string_release(subject_str); - subject_str = Z_STR(zv); + subject_str = Z_STR(retval); break; case IS_NULL: RETVAL_NULL(); From 06a72afcf042163fa74b7c239adfffde0ae8c668 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Tue, 7 Jan 2025 09:26:35 +0000 Subject: [PATCH 09/10] Fix test --- .../tests/preg_replace_callback_array_trampoline.phpt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt b/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt index eb13a803a0d41..753006175b878 100644 --- a/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt +++ b/ext/pcre/tests/preg_replace_callback_array_trampoline.phpt @@ -23,7 +23,7 @@ $regexesToTest = [ ], [ '@\b\w{1,2}\b@' => $callback, - '~\A.~' => $trampolineThrow, + '~\A.~' => $callbackThrow, ], [ '@\b\w{1,2}\b@' => $callback, @@ -58,7 +58,6 @@ foreach ($regexesToTest as $regex) { ?> --EXPECTF-- -Warning: Undefined variable $trampolineThrow in %s on line %d Trampoline for trampoline Trampoline for trampoline Trampoline for trampoline @@ -91,10 +90,12 @@ Trampoline for trampoline Trampoline for trampoline Warning: Array to string conversion in %s on line %d -TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Trampoline for trampolineThrow +Exception: boo Trampoline for trampoline Trampoline for trampoline -TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks +Trampoline for trampolineThrow +Exception: boo Error: Object of class stdClass could not be converted to string TypeError: preg_replace_callback_array(): Argument #2 ($subject) must be of type array|string, stdClass given Trampoline for trampoline From e60ebc06c8de7b8d6e2ed26b3140c51a0becead1 Mon Sep 17 00:00:00 2001 From: Gina Peter Banyard Date: Wed, 8 Jan 2025 10:42:07 +0000 Subject: [PATCH 10/10] Revert FCI passing removal --- ext/pcre/php_pcre.c | 48 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index ddc9cc8012ed3..efd071005d25d 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1546,7 +1546,7 @@ static int preg_get_backref(char **str, int *backref) /* }}} */ /* Return NULL if an exception has occurred */ -static zend_string *preg_do_repl_func(zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) +static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { zend_string *result_str = NULL; zval retval; /* Function return value */ @@ -1555,7 +1555,10 @@ static zend_string *preg_do_repl_func(zend_fcall_info_cache *fcc, const char *su array_init_size(&arg, count + (mark ? 1 : 0)); populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags); - zend_call_known_fcc(fcc, &retval, 1, &arg, NULL); + fci->retval = &retval; + fci->param_count = 1; + fci->params = &arg; + zend_call_function(fci, fcc); zval_ptr_dtor(&arg); /* No Exception has occurred */ if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) { @@ -1835,8 +1838,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } /* }}} */ -static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) -{ +static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, + zend_fcall_info *fci, zend_fcall_info_cache *fcc, + size_t limit, size_t *replace_count, zend_long flags +) { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ zend_string **subpat_names; /* Array for named subpatterns */ @@ -1925,7 +1930,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin /* Use custom function to get replacement string and its length. */ zend_string *eval_result = preg_do_repl_func( - fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count, + fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count, pcre2_get_mark(match_data), flags); if (UNEXPECTED(eval_result == NULL)) { @@ -2028,7 +2033,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, zend_string *subject_str, - zend_fcall_info_cache *fcc, + zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) { pcre_cache_entry *pce; /* Compiled regular expression */ @@ -2039,7 +2044,7 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, return NULL; } pce->refcount++; - result = php_pcre_replace_func_impl(pce, subject_str, fcc, limit, replace_count, flags); + result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags); pce->refcount--; return result; @@ -2143,13 +2148,13 @@ static zend_always_inline zend_string *php_replace_in_subject( /* }}} */ static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht, - zend_fcall_info_cache *fcc, + zend_fcall_info *fci, zend_fcall_info_cache *fcc, zend_string *subject, size_t limit, size_t *replace_count, zend_long flags) { zend_string *result; if (regex_str) { - result = php_pcre_replace_func(regex_str, subject, fcc, limit, replace_count, flags); + result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags); return result; } else { /* If regex is an array */ @@ -2171,7 +2176,7 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, const Ha /* Do the actual replacement and put the result back into subject for further replacements. */ result = php_pcre_replace_func( - regex_entry_str, subject, fcc, limit, replace_count, flags); + regex_entry_str, subject, fci, fcc, limit, replace_count, flags); zend_tmp_string_release(tmp_regex_entry_str); zend_string_release(subject); subject = result; @@ -2186,7 +2191,7 @@ static zend_string *php_replace_in_subject_func(zend_string *regex_str, const Ha static size_t php_preg_replace_func_impl(zval *return_value, zend_string *regex_str, const HashTable *regex_ht, - zend_fcall_info_cache *fcc, + zend_fcall_info *fci, zend_fcall_info_cache *fcc, zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags) { zend_string *result; @@ -2194,7 +2199,7 @@ static size_t php_preg_replace_func_impl(zval *return_value, if (subject_str) { result = php_replace_in_subject_func( - regex_str, regex_ht, fcc, subject_str, limit_val, &replace_count, flags); + regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags); if (result != NULL) { RETVAL_STR(result); } else { @@ -2221,7 +2226,7 @@ static size_t php_preg_replace_func_impl(zval *return_value, } result = php_replace_in_subject_func( - regex_str, regex_ht, fcc, subject_entry_str, limit_val, &replace_count, flags); + regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags); if (result != NULL) { /* Add to return array */ ZVAL_STR(&zv, result); @@ -2380,28 +2385,26 @@ PHP_FUNCTION(preg_replace_callback) HashTable *subject_ht; zend_long limit = -1, flags = 0; size_t replace_count; - zend_fcall_info fci; + zend_fcall_info fci = empty_fcall_info; zend_fcall_info_cache fcc = empty_fcall_info_cache; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(3, 6) Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str) - Z_PARAM_FUNC_NO_TRAMPOLINE_FREE(fci, fcc) + Z_PARAM_FUNC(fci, fcc) Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str) Z_PARAM_OPTIONAL Z_PARAM_LONG(limit) Z_PARAM_ZVAL(zcount) Z_PARAM_LONG(flags) - ZEND_PARSE_PARAMETERS_END_EX(goto free_trampoline); + ZEND_PARSE_PARAMETERS_END(); replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht, - &fcc, + &fci, &fcc, subject_str, subject_ht, limit, flags); if (zcount) { ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); } - free_trampoline: - zend_release_fcall_info_cache(&fcc); } /* }}} */ @@ -2437,13 +2440,18 @@ PHP_FUNCTION(preg_replace_callback_array) } zend_fcall_info_cache fcc = empty_fcall_info_cache; + zend_fcall_info fci = empty_fcall_info; + fci.size = sizeof(zend_fcall_info); + /* Copy potential trampoline */ + ZVAL_COPY_VALUE(&fci.function_name, replace); + if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { zend_argument_type_error(1, "must contain only valid callbacks"); goto error; } zval retval; - replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fcc, + replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc, subject_str, subject_ht, limit, flags); zend_release_fcall_info_cache(&fcc);