diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e498c402b6269..3bfaaa0349937 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -104,6 +104,11 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); + +static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc); + +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -1992,6 +1997,73 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ +static inline int php_mb_check_code_point(long cp) +{ + enum mbfl_no_encoding no_enc; + char* buf; + char buf_len; + + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + + if (php_mb_is_no_encoding_utf8(no_enc)) { + + if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) { + return 1; + } + + return 0; + } else if (php_mb_is_no_encoding_unicode(no_enc)) { + + if (0 > cp || cp > 0x10ffff) { + return 0; + } + + return 1; + + // backward compatibility + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { + return cp < 0xffff && cp > 0x0; + } + + if (cp < 0x100) { + buf_len = 1; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp; + buf[1] = 0; + } else if (cp < 0x10000) { + buf_len = 2; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 8; + buf[1] = cp & 0xff; + buf[2] = 0; + } else if (cp < 0x1000000) { + buf_len = 3; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 16; + buf[1] = (cp >> 8) & 0xff; + buf[2] = cp & 0xff; + buf[3] = 0; + } else { + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 24; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + } + + if (php_mb_check_encoding(buf, buf_len, NULL)) { + efree(buf); + + return 1; + } + + efree(buf); + + return 0; +} + /* {{{ proto mixed mb_substitute_character([mixed substchar]) Sets the current substitute_character or returns the current substitute_character */ PHP_FUNCTION(mb_substitute_character) @@ -2026,7 +2098,7 @@ PHP_FUNCTION(mb_substitute_character) } else { convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { @@ -2037,7 +2109,7 @@ PHP_FUNCTION(mb_substitute_character) break; default: convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { @@ -3117,7 +3189,7 @@ PHP_FUNCTION(mb_strimwidth) if (from < 0) { from += swidth; } - + if (from < 0 || (size_t)from > str_len) { php_error_docref(NULL, E_WARNING, "Start position is out of range"); RETURN_FALSE; diff --git a/ext/mbstring/tests/bug69079.phpt b/ext/mbstring/tests/bug69079.phpt new file mode 100644 index 0000000000000..67c4d0cc2dbc2 --- /dev/null +++ b/ext/mbstring/tests/bug69079.phpt @@ -0,0 +1,14 @@ +--TEST-- +Bug #69079 (enhancement for mb_substitute_character) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) \ No newline at end of file