From 5bb3e233db60593087bb786d6421abbff215af3b Mon Sep 17 00:00:00 2001 From: tobil4sk Date: Sun, 24 Apr 2022 20:52:53 +0200 Subject: [PATCH] Implement #77726: Allow null character in regex patterns In 8b3c1a3, this was disallowed to fix #55856, which was a security issue caused by the /e modifier. The fix that was made was the "Easier fix" as described in the original report. With this fix, pattern strings are no longer treated as null terminated, so null characters can be placed inside and matched against with regex patterns without security problems, so there is no longer a reason to give the error. Allowing this is consistent with the behaviour of many other languages, including JavaScript, and thanks to PCRE2[0], it does not require manually escaping null characters. Now that we can avoid the error here without the cost of escaping characters, there is really no need anymore to stray here from the conventional behaviour. Currently, null characters are still disallowed before the first delimiter and in the options section at the end of a regex string, but these error messages have been updated. [0] Since PCRE2, pattern strings no longer have to be null terminated, and raw null characters match as normal. Closes GH-8114. --- NEWS | 3 + UPGRADING | 3 + ext/pcre/php_pcre.c | 38 ++++------ ext/pcre/tests/bug73392.phpt | 2 +- ext/pcre/tests/delimiters.phpt | 6 +- ext/pcre/tests/null_bytes.phpt | 76 ++++++++++++------- ext/pcre/tests/preg_grep_error1.phpt | 2 +- ext/pcre/tests/preg_match_all_error1.phpt | 2 +- ext/pcre/tests/preg_match_error1.phpt | 2 +- .../preg_replace_callback_array_error.phpt | 66 ++++++++++++++++ ...eg_replace_callback_array_fatal_error.phpt | 21 +++++ .../tests/preg_replace_callback_error1.phpt | 2 +- ext/pcre/tests/preg_replace_error1.phpt | 2 +- ext/pcre/tests/preg_split_error1.phpt | 2 +- 14 files changed, 170 insertions(+), 57 deletions(-) create mode 100644 ext/pcre/tests/preg_replace_callback_array_error.phpt create mode 100644 ext/pcre/tests/preg_replace_callback_array_fatal_error.phpt diff --git a/NEWS b/NEWS index 0558f682fabb7..cc16bfee8c82f 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,9 @@ GD: - ODBC: . Fixed handling of single-key connection strings. (Calvin Buckley) +- PCRE: + . Implemented FR #77726 (Allow null character in regex patterns). (cmb) + - PDO_ODBC: . Fixed handling of single-key connection strings. (Calvin Buckley) diff --git a/UPGRADING b/UPGRADING index 7ae56581edad4..1f47c862c7af1 100644 --- a/UPGRADING +++ b/UPGRADING @@ -221,6 +221,9 @@ PHP 8.2 UPGRADE NOTES - OCI8: . The minimum Oracle Client library version required is now 11.2. +- PCRE: + . NUL characters (\0) in pattern strings are now supported. + - SQLite3: . sqlite3.defensive is now PHP_INI_USER. diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 840d696019796..1061441074f9d 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -624,7 +624,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in pcre_cache_entry new_entry; int rc; zend_string *key; - pcre_cache_entry *ret; + pcre_cache_entry *ret; if (locale_aware && BG(ctype_string)) { key = zend_string_concat2( @@ -645,16 +645,16 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in } p = ZSTR_VAL(regex); + const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex); /* Parse through the leading whitespace, and display a warning if we get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; - if (*p == 0) { + if (p >= end_p) { if (key != regex) { zend_string_release_ex(key, 0); } - php_error_docref(NULL, E_WARNING, - p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression"); + php_error_docref(NULL, E_WARNING, "Empty regular expression"); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); return NULL; } @@ -662,11 +662,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in /* Get the delimiter and display a warning if it is alphanumeric or a backslash. */ delimiter = *p++; - if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { + if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') { if (key != regex) { zend_string_release_ex(key, 0); } - php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash"); + php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL"); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); return NULL; } @@ -682,8 +682,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in /* We need to iterate through the pattern, searching for the ending delimiter, but skipping the backslashed delimiters. If the ending delimiter is not found, display a warning. */ - while (*pp != 0) { - if (*pp == '\\' && pp[1] != 0) pp++; + while (pp < end_p) { + if (*pp == '\\' && pp + 1 < end_p) pp++; else if (*pp == delimiter) break; pp++; @@ -695,8 +695,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in * reach the end of the pattern without matching, display a warning. */ int brackets = 1; /* brackets nesting level */ - while (*pp != 0) { - if (*pp == '\\' && pp[1] != 0) pp++; + while (pp < end_p) { + if (*pp == '\\' && pp + 1 < end_p) pp++; else if (*pp == end_delimiter && --brackets <= 0) break; else if (*pp == start_delimiter) @@ -705,13 +705,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in } } - if (*pp == 0) { + if (pp >= end_p) { if (key != regex) { zend_string_release_ex(key, 0); } - if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) { - php_error_docref(NULL,E_WARNING, "Null byte in regex"); - } else if (start_delimiter == end_delimiter) { + if (start_delimiter == end_delimiter) { php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter); } else { php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter); @@ -729,7 +727,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in /* Parse through the options, setting appropriate flags. Display a warning if we encounter an unknown modifier. */ - while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) { + while (pp < end_p) { switch (*pp++) { /* Perl compatible options */ case 'i': coptions |= PCRE2_CASELESS; break; @@ -764,9 +762,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in default: if (pp[-1]) { - php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]); + php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]); } else { - php_error_docref(NULL,E_WARNING, "Null byte in regex"); + php_error_docref(NULL, E_WARNING, "NUL is not a valid modifier"); } pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); efree(pattern); @@ -2438,12 +2436,6 @@ PHP_FUNCTION(preg_replace_callback_array) } ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) { - if (!str_idx_regex) { - php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash"); - RETVAL_NULL(); - goto error; - } - if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { zend_argument_type_error(1, "must contain only valid callbacks"); goto error; diff --git a/ext/pcre/tests/bug73392.phpt b/ext/pcre/tests/bug73392.phpt index 7546f5d99fb8d..a1cb3ac8480a0 100644 --- a/ext/pcre/tests/bug73392.phpt +++ b/ext/pcre/tests/bug73392.phpt @@ -21,5 +21,5 @@ var_dump(preg_replace_callback_array( ), 'a')); ?> --EXPECTF-- -Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric or backslash in %sbug73392.php on line %d +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %sbug73392.php on line %d NULL diff --git a/ext/pcre/tests/delimiters.phpt b/ext/pcre/tests/delimiters.phpt index 1826f8730a337..beeebc8cbc2e3 100644 --- a/ext/pcre/tests/delimiters.phpt +++ b/ext/pcre/tests/delimiters.phpt @@ -12,6 +12,7 @@ var_dump(preg_match('~a', '')); var_dump(preg_match('@\@\@@', '@@')); var_dump(preg_match('//z', '@@')); var_dump(preg_match('{', '')); +var_dump(preg_match("\0\0", '')); ?> --EXPECTF-- @@ -22,7 +23,7 @@ Warning: preg_match(): Empty regular expression in %sdelimiters.php on line 4 bool(false) int(1) -Warning: preg_match(): Delimiter must not be alphanumeric or backslash in %sdelimiters.php on line 6 +Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %sdelimiters.php on line 6 bool(false) int(1) @@ -35,3 +36,6 @@ bool(false) Warning: preg_match(): No ending matching delimiter '}' found in %sdelimiters.php on line 11 bool(false) + +Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %sdelimiters.php on line 12 +bool(false) diff --git a/ext/pcre/tests/null_bytes.phpt b/ext/pcre/tests/null_bytes.phpt index 9a3f433ffb1bc..e9db61dae045e 100644 --- a/ext/pcre/tests/null_bytes.phpt +++ b/ext/pcre/tests/null_bytes.phpt @@ -3,40 +3,64 @@ Zero byte test --FILE-- ---EXPECTF-- -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 3 +var_dump(preg_match("\0[]i", "")); +var_dump(preg_match("[\0]i", "")); +var_dump(preg_match("[\0]i", "\0")); +var_dump(preg_match("[]\0i", "")); +var_dump(preg_match("[]i\0", "")); +var_dump(preg_match("[\\\0]i", "")); +var_dump(preg_match("[\\\0]i", "\\\0")); -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 4 +var_dump(preg_match("/abc\0def/", "abc")); +var_dump(preg_match("/abc\0def/", "abc\0def")); +var_dump(preg_match("/abc\0def/", "abc\0fed")); -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 5 +var_dump(preg_match("[abc\0def]", "abc")); +var_dump(preg_match("[abc\0def]", "abc\0def")); +var_dump(preg_match("[abc\0def]", "abc\0fed")); -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 6 +preg_replace("/foo/e\0/i", "echo('Eek');", ""); -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 7 +?> +--EXPECTF-- +Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %snull_bytes.php on line 3 +bool(false) +int(0) +int(1) -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 9 +Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 6 +bool(false) -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 10 +Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 7 +bool(false) +int(0) +int(1) -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 11 +Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %snull_bytes.php on line 11 +bool(false) +int(0) +int(1) -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 12 +Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 14 +bool(false) -Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 13 +Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 15 +bool(false) +int(0) +int(1) +int(0) +int(1) +int(0) +int(0) +int(1) +int(0) -Warning: preg_replace(): Null byte in regex in %snull_bytes.php on line 15 +Warning: preg_replace(): NUL is not a valid modifier in %snull_bytes.php on line 27 diff --git a/ext/pcre/tests/preg_grep_error1.phpt b/ext/pcre/tests/preg_grep_error1.phpt index 3079c0b4b0249..0042536450756 100644 --- a/ext/pcre/tests/preg_grep_error1.phpt +++ b/ext/pcre/tests/preg_grep_error1.phpt @@ -37,7 +37,7 @@ echo "Done" Arg value is abcdef -Warning: preg_grep(): Delimiter must not be alphanumeric or backslash in %spreg_grep_error1.php on line %d +Warning: preg_grep(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_grep_error1.php on line %d bool(false) Arg value is /[a-zA-Z] diff --git a/ext/pcre/tests/preg_match_all_error1.phpt b/ext/pcre/tests/preg_match_all_error1.phpt index d25bfe99c6e6d..01e0615ad38f2 100644 --- a/ext/pcre/tests/preg_match_all_error1.phpt +++ b/ext/pcre/tests/preg_match_all_error1.phpt @@ -38,7 +38,7 @@ var_dump($matches); Arg value is abcdef -Warning: preg_match_all(): Delimiter must not be alphanumeric or backslash in %spreg_match_all_error1.php on line %d +Warning: preg_match_all(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_match_all_error1.php on line %d bool(false) NULL diff --git a/ext/pcre/tests/preg_match_error1.phpt b/ext/pcre/tests/preg_match_error1.phpt index 7a7106270f0e8..e97ddb9267512 100644 --- a/ext/pcre/tests/preg_match_error1.phpt +++ b/ext/pcre/tests/preg_match_error1.phpt @@ -34,7 +34,7 @@ try { Arg value is abcdef -Warning: preg_match(): Delimiter must not be alphanumeric or backslash in %spreg_match_error1.php on line %d +Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_match_error1.php on line %d bool(false) Arg value is /[a-zA-Z] diff --git a/ext/pcre/tests/preg_replace_callback_array_error.phpt b/ext/pcre/tests/preg_replace_callback_array_error.phpt new file mode 100644 index 0000000000000..7a73d5b00c211 --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array_error.phpt @@ -0,0 +1,66 @@ +--TEST-- +preg_replace_callback_array() errors +--FILE-- + 'b', + "" => function () { return "ok"; }), 'a')); + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + null => function () { return "ok"; }), 'a')); + +// backslashes + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "\\b\\" => function () { return "ok"; }), 'a')); + +// alphanumeric delimiters + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "aba" => function () { return "ok"; }), 'a')); + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "1b1" => function () { return "ok"; }), 'a')); + +// null character delimiter + +var_dump(preg_replace_callback_array( + array( + "/a/" => 'b', + "\0b\0" => function () { return "ok"; }), 'a')); + +?> +--EXPECTF-- +Warning: preg_replace_callback_array(): Empty regular expression in %spreg_replace_callback_array_error.php on line 12 +NULL + +Warning: preg_replace_callback_array(): Empty regular expression in %spreg_replace_callback_array_error.php on line 17 +NULL + +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 24 +NULL + +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 31 +NULL + +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 36 +NULL + +Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 43 +NULL \ No newline at end of file diff --git a/ext/pcre/tests/preg_replace_callback_array_fatal_error.phpt b/ext/pcre/tests/preg_replace_callback_array_fatal_error.phpt new file mode 100644 index 0000000000000..0b018627de74a --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array_fatal_error.phpt @@ -0,0 +1,21 @@ +--TEST-- +preg_replace_callback_array() invalid callable +--FILE-- + 'b', + "/b/" => 'invalid callable'), 'a')); + +--EXPECTF-- +Fatal error: Uncaught TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks in %spreg_replace_callback_array_fatal_error.php:11 +Stack trace: +#0 %spreg_replace_callback_array_fatal_error.php(11): preg_replace_callback_array(Array, 'a') +#1 {main} + thrown in %spreg_replace_callback_array_fatal_error.php on line 11 diff --git a/ext/pcre/tests/preg_replace_callback_error1.phpt b/ext/pcre/tests/preg_replace_callback_error1.phpt index eb6478e506952..bd659bec9e5a8 100644 --- a/ext/pcre/tests/preg_replace_callback_error1.phpt +++ b/ext/pcre/tests/preg_replace_callback_error1.phpt @@ -30,7 +30,7 @@ foreach($regex_array as $regex_value) { Arg value is abcdef -Warning: preg_replace_callback(): Delimiter must not be alphanumeric or backslash in %s on line %d +Warning: preg_replace_callback(): Delimiter must not be alphanumeric, backslash, or NUL in %s on line %d NULL Arg value is /[a-zA-Z] diff --git a/ext/pcre/tests/preg_replace_error1.phpt b/ext/pcre/tests/preg_replace_error1.phpt index ccd3554745606..6ea93d1544977 100644 --- a/ext/pcre/tests/preg_replace_error1.phpt +++ b/ext/pcre/tests/preg_replace_error1.phpt @@ -33,7 +33,7 @@ try { Arg value is abcdef -Warning: preg_replace(): Delimiter must not be alphanumeric or backslash in %spreg_replace_error1.php on line %d +Warning: preg_replace(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_error1.php on line %d NULL Arg value is /[a-zA-Z] diff --git a/ext/pcre/tests/preg_split_error1.phpt b/ext/pcre/tests/preg_split_error1.phpt index 4d0fb9e22a45e..5ffb9f08ac3b1 100644 --- a/ext/pcre/tests/preg_split_error1.phpt +++ b/ext/pcre/tests/preg_split_error1.phpt @@ -36,7 +36,7 @@ try { Arg value is abcdef -Warning: preg_split(): Delimiter must not be alphanumeric or backslash in %spreg_split_error1.php on line %d +Warning: preg_split(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_split_error1.php on line %d bool(false) Arg value is /[a-zA-Z]