Skip to content

Commit

Permalink
mb_scrub does not attempt to scrub known-valid UTF-8 strings
Browse files Browse the repository at this point in the history
  • Loading branch information
alexdowad committed Jan 22, 2023
1 parent f4dd35e commit 6f53dbb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
13 changes: 8 additions & 5 deletions ext/mbstring/mbstring.c
Expand Up @@ -5066,12 +5066,10 @@ PHP_FUNCTION(mb_chr)
/* {{{ */
PHP_FUNCTION(mb_scrub)
{
char* str;
size_t str_len;
zend_string *enc_name = NULL;
zend_string *str, *enc_name = NULL;

ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(str, str_len)
Z_PARAM_STR(str)
Z_PARAM_OPTIONAL
Z_PARAM_STR_OR_NULL(enc_name)
ZEND_PARSE_PARAMETERS_END();
Expand All @@ -5081,7 +5079,12 @@ PHP_FUNCTION(mb_scrub)
RETURN_THROWS();
}

RETURN_STR(php_mb_convert_encoding_ex(str, str_len, enc, enc));
if (enc == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) {
/* A valid UTF-8 string will not be changed by mb_scrub; so just increment the refcount and return it */
RETURN_STR_COPY(str);
}

RETURN_STR(php_mb_convert_encoding_ex(ZSTR_VAL(str), ZSTR_LEN(str), enc, enc));
}
/* }}} */

Expand Down
8 changes: 8 additions & 0 deletions ext/mbstring/tests/mb_scrub.phpt
Expand Up @@ -8,7 +8,15 @@ var_dump(
"?" === mb_scrub("\x80"),
"?" === mb_scrub("\x80", 'UTF-8')
);

$utf8str = "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞";
// Check $utf8str so it is marked as 'valid UTF-8'
// This will enable optimized implementation of mb_scrub
if (!mb_check_encoding($utf8str, 'UTF-8'))
die("Test string should be valid UTF-8");
var_dump(mb_scrub($utf8str));
?>
--EXPECT--
bool(true)
bool(true)
string(122) "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞"

0 comments on commit 6f53dbb

Please sign in to comment.