From f66ea6e3120b013f925976aebb90c124c2fb50c2 Mon Sep 17 00:00:00 2001 From: Sjon Hortensius Date: Wed, 17 Jul 2019 09:30:22 +0200 Subject: [PATCH 1/2] Ref #77388 - disallow passing BAD_ESCAPE_IS_LITERAL, esp by default this option is considered dangerous and unwanted --- NEWS | 3 +++ UPGRADING | 5 +++++ ext/pcre/php_pcre.c | 21 --------------------- ext/pcre/tests/pcre_extra.phpt | 7 ++++--- 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/NEWS b/NEWS index fd429440278d1..f7d1991ae82aa 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,9 @@ PHP NEWS - mysqlnd: . Fixed #60594 (mysqlnd exposes 160 lines of stats in phpinfo). (PeeHaa) +- PCRE: + . Remove X modifier and enable it by default. (sjon) + - PDO: . Fixed bug #77849 (Disable cloning of PDO handle/connection objects). (camporter) diff --git a/UPGRADING b/UPGRADING index 4d0b05839c3e9..f987a0d6bfbad 100644 --- a/UPGRADING +++ b/UPGRADING @@ -145,6 +145,11 @@ PHP 8.0 UPGRADE NOTES as a string instead of an ASCII codepoint. The previous behavior may be restored with an explicit call to chr(). +- PCRE: + . When passing invalid escape sequences they are no longer intepreted as + literals. This behaviour previously required the X modifier - which has + been removed as well. + - PDO: . The method PDOStatement::setFetchMode() now accepts the following signature: diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index cae276134c38a..e45cb4c763141 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -50,7 +50,6 @@ struct _pcre_cache_entry { uint32_t capture_count; uint32_t name_count; uint32_t compile_options; - uint32_t extra_compile_options; uint32_t refcount; }; @@ -167,7 +166,6 @@ static void php_pcre_free(void *block, void *data) pefree(block, 1); }/*}}}*/ -#define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL #define PHP_PCRE_PREALLOC_MDATA_SIZE 32 static void php_pcre_init_pcre2(uint8_t jit) @@ -188,12 +186,6 @@ static void php_pcre_init_pcre2(uint8_t jit) } } - /* XXX The 'X' modifier is the default behavior in PCRE2. This option is - called dangerous in the manual, as typos in patterns can cause - unexpected results. We might want to to switch to the default PCRE2 - behavior, too, thus causing a certain BC break. */ - pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS); - if (!mctx) { mctx = pcre2_match_context_create(gctx); if (!mctx) { @@ -569,7 +561,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) { pcre2_code *re = NULL; uint32_t coptions = 0; - uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS; PCRE2_UCHAR error[128]; PCRE2_SIZE erroffset; int errnumber; @@ -704,7 +695,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break; case 'S': /* Pass. */ break; case 'U': coptions |= PCRE2_UNGREEDY; break; - case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break; case 'u': coptions |= PCRE2_UTF; /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII characters, even in UTF-8 mode. However, this can be changed by setting @@ -767,19 +757,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) pcre2_set_character_tables(cctx, tables); } - /* Set extra options for the compile context. */ - if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) { - pcre2_set_compile_extra_options(cctx, extra_coptions); - } - /* Compile pattern and display a warning if compilation failed. */ re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx); - /* Reset the compile context extra options to default. */ - if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) { - pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS); - } - if (re == NULL) { if (key != regex) { zend_string_release_ex(key, 0); @@ -823,7 +803,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) new_entry.re = re; new_entry.preg_options = poptions; new_entry.compile_options = coptions; - new_entry.extra_compile_options = extra_coptions; new_entry.refcount = 0; rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count); diff --git a/ext/pcre/tests/pcre_extra.phpt b/ext/pcre/tests/pcre_extra.phpt index 2bee408fbffcb..588316c939be0 100644 --- a/ext/pcre/tests/pcre_extra.phpt +++ b/ext/pcre/tests/pcre_extra.phpt @@ -1,5 +1,5 @@ --TEST-- -X (PCRE_EXTRA) modififer +X (PCRE_EXTRA) modififier is no longer functional --FILE-- --EXPECTF-- -int(1) +Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 1 in %spcre_extra.php on line 3 +bool(false) -Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 1 in %spcre_extra.php on line 4 +Warning: preg_match(): Unknown modifier 'X' in %spcre_extra.php on line 4 bool(false) From 5e9ba5125fb6db7b7874cfe65962a2a0c453bce4 Mon Sep 17 00:00:00 2001 From: Sjon Hortensius Date: Wed, 17 Jul 2019 12:08:13 +0200 Subject: [PATCH 2/2] allow for more graceful migration - don't err on now ignored X modifier --- UPGRADING | 4 ++-- ext/pcre/php_pcre.c | 1 + ext/pcre/tests/pcre_extra.phpt | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/UPGRADING b/UPGRADING index f987a0d6bfbad..6f6d8a57a5c8c 100644 --- a/UPGRADING +++ b/UPGRADING @@ -147,8 +147,8 @@ PHP 8.0 UPGRADE NOTES - PCRE: . When passing invalid escape sequences they are no longer intepreted as - literals. This behaviour previously required the X modifier - which has - been removed as well. + literals. This behaviour previously required the X modifier - which is + now ignored. - PDO: . The method PDOStatement::setFetchMode() now accepts the following signature: diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e45cb4c763141..6c265df48507f 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -694,6 +694,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) case 'A': coptions |= PCRE2_ANCHORED; break; case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break; case 'S': /* Pass. */ break; + case 'X': /* Pass. */ break; case 'U': coptions |= PCRE2_UNGREEDY; break; case 'u': coptions |= PCRE2_UTF; /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII diff --git a/ext/pcre/tests/pcre_extra.phpt b/ext/pcre/tests/pcre_extra.phpt index 588316c939be0..a1102424e85fb 100644 --- a/ext/pcre/tests/pcre_extra.phpt +++ b/ext/pcre/tests/pcre_extra.phpt @@ -1,5 +1,5 @@ --TEST-- -X (PCRE_EXTRA) modififier is no longer functional +X (PCRE_EXTRA) modififier is ignored (no error, no change) --FILE--