From cb02838147831688111a8d173645f1be0dbbd0b6 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 12:42:19 +0900 Subject: [PATCH 01/19] added php_mb_check_encoding --- ext/mbstring/mbstring.c | 55 ++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 20bca129ffbef..7c138cbcc24a8 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4513,40 +4513,32 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) - Check if the string is valid for the specified encoding */ -PHP_FUNCTION(mb_check_encoding) +static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc) { - char *var = NULL; - size_t var_len; - char *enc = NULL; - size_t enc_len; - mbfl_buffer_converter *convd; const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; mbfl_string string, result, *ret = NULL; long illegalchars = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { - return; - } - - if (var == NULL) { - RETURN_BOOL(MBSTRG(illegalchars) == 0); + if (input == NULL) { + return MBSTRG(illegalchars) == 0; } if (enc != NULL) { encoding = mbfl_name2encoding(enc); if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc); - RETURN_FALSE; + return false; } } convd = mbfl_buffer_converter_new2(encoding, encoding, 0); + if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); - RETURN_FALSE; + return false; } + mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); mbfl_buffer_converter_illegal_substchar(convd, 0); @@ -4554,19 +4546,42 @@ PHP_FUNCTION(mb_check_encoding) mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); mbfl_string_init(&result); - string.val = (unsigned char *)var; - string.len = var_len; + string.val = (unsigned char *) input; + string.len = length; + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); illegalchars = mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); - RETVAL_FALSE; if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - RETVAL_TRUE; + return true; } + mbfl_string_clear(&result); } + + return false; +} + +/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) + Check if the string is valid for the specified encoding */ +PHP_FUNCTION(mb_check_encoding) +{ + char *var = NULL; + size_t var_len; + char *enc = NULL; + size_t enc_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { + return; + } + + RETVAL_FALSE; + + if (php_mb_check_encoding(var, var_len, enc)) { + RETVAL_TRUE; + } } /* }}} */ From c948e44807ea23faf670d3d917e24937bb690502 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 13:18:39 +0900 Subject: [PATCH 02/19] added mb_ord --- ext/mbstring/mbstring.c | 73 +++++++++++++++++++++++++++++++++++++++++ ext/mbstring/mbstring.h | 1 + 2 files changed, 74 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 7c138cbcc24a8..49b2ed5afb4dd 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -437,6 +437,11 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -562,6 +567,7 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_ord, arginfo_mb_ord) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -4585,6 +4591,73 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ +static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) +{ + enum mbfl_no_encoding no_enc; + zend_bool supported = false; + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return -1; + } + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + ) { + supported = true; + } + + if (!supported) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return -1; + } + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + return (unsigned char) ret[0] << 24 | + (unsigned char) ret[1] << 16 | + (unsigned char) ret[2] << 8 | + (unsigned char) ret[3]; +} + +/* {{{ proto bool mb_ord([string str[, string encoding]]) */ +PHP_FUNCTION(mb_ord) +{ + char* str; + size_t str_len; + char* enc = NULL; + size_t enc_len; + long cp; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } + + cp = php_mb_ord(str, str_len, enc); + + if (0 > cp) { + RETURN_FALSE; + } + + RETURN_LONG(cp); +} +/* }}} */ + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 9685c64d7b4a9..8599e46881eae 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -127,6 +127,7 @@ PHP_FUNCTION(mb_decode_numericentity); PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); +PHP_FUNCTION(mb_ord); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); From b06595675476cbce391390a35d8ae3ac9ad83c4c Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:32:54 +0900 Subject: [PATCH 03/19] added utf32 and ucs4 for available encodings --- ext/mbstring/mbstring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 49b2ed5afb4dd..b44d4751cd763 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4614,6 +4614,12 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf8_kddi_a || no_enc == mbfl_no_encoding_utf8_kddi_b || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le ) { supported = true; } From ec4f74b29684933f62230db6341389f9d5388d63 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:37:12 +0900 Subject: [PATCH 04/19] added check for forbidden encodings --- ext/mbstring/mbstring.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index b44d4751cd763..9c62b06bfa0b9 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4622,6 +4622,32 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf32le ) { supported = true; + } else if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + supported = false; } if (!supported) { From 674b67ce0c1c2f7d22a88db1b5a51eae87c7b4c9 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:45:33 +0900 Subject: [PATCH 05/19] added utf16 and ucs2 for supported encodings --- ext/mbstring/mbstring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 9c62b06bfa0b9..3fdfdcb6f585f 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4620,6 +4620,12 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf32 || no_enc == mbfl_no_encoding_utf32be || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le ) { supported = true; } else if (no_enc == mbfl_no_encoding_pass From b9b47c84a0d637c3a9b4d4e828b38361c1ba834e Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 15:03:35 +0900 Subject: [PATCH 06/19] added support for various encodings other than unicode --- ext/mbstring/mbstring.c | 56 +++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 3fdfdcb6f585f..55dc1d693140f 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4594,9 +4594,11 @@ PHP_FUNCTION(mb_check_encoding) static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; - zend_bool supported = false; char* ret; size_t ret_len; + const mbfl_encoding *encoding; + unsigned char char_len; + long cp; if (enc == NULL) { no_enc = MBSTRG(current_internal_encoding)->no_encoding; @@ -4627,7 +4629,18 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf16be || no_enc == mbfl_no_encoding_utf16le ) { - supported = true; + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + return (unsigned char) ret[0] << 24 | + (unsigned char) ret[1] << 16 | + (unsigned char) ret[2] << 8 | + (unsigned char) ret[3]; + } else if (no_enc == mbfl_no_encoding_pass || no_enc == mbfl_no_encoding_auto || no_enc == mbfl_no_encoding_wchar @@ -4653,24 +4666,41 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_cp50221 || no_enc == mbfl_no_encoding_cp50222 ) { - supported = false; - } - - if (!supported) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } - ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + if (!php_mb_check_encoding(str, str_len, enc)) { - if (ret == NULL) { - return -1; + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + + return cp; + } + + encoding = mbfl_no2encoding(no_enc); + char_len = php_mb_mbchar_bytes_ex(str, encoding); + + if (char_len == 1) { + cp = (unsigned char) str[0]; + } else if (char_len == 2) { + cp = ((unsigned char) str[0] << 8) | + (unsigned char) str[1]; + } else if (char_len == 3) { + cp = ((unsigned char) str[0] << 16) | + ((unsigned char) str[1] << 8) | + (unsigned char) str[2]; + } else { + cp = ((unsigned char) str[0] << 24) | + ((unsigned char) str[1] << 16) | + ((unsigned char) str[2] << 8) | + (unsigned char) str[3]; } - return (unsigned char) ret[0] << 24 | - (unsigned char) ret[1] << 16 | - (unsigned char) ret[2] << 8 | - (unsigned char) ret[3]; + return cp; } /* {{{ proto bool mb_ord([string str[, string encoding]]) */ From a0890c7c59674623fdf4422fd2b6fa74090e9c8a Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 15:13:17 +0900 Subject: [PATCH 07/19] added php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 59 ++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 55dc1d693140f..6e6873156224c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4591,6 +4591,39 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ +static inline zend_bool php_mb_check_forbidden_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + return true; + } + + return false; +} + static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; @@ -4641,31 +4674,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) (unsigned char) ret[2] << 8 | (unsigned char) ret[3]; - } else if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { + } else if (php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } From c6e94bfb4ca6f68b8b2b295a4d261181ff855504 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 16:01:47 +0900 Subject: [PATCH 08/19] added mb_chr --- ext/mbstring/mbstring.c | 98 +++++++++++++++++++++++++++++++++++++++++ ext/mbstring/mbstring.h | 1 + 2 files changed, 99 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 6e6873156224c..277ed47645c5e 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -442,6 +442,11 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) + ZEND_ARG_INFO(0, cp) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -568,6 +573,7 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) PHP_FE(mb_ord, arginfo_mb_ord) + PHP_FE(mb_chr, arginfo_mb_chr) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -4735,6 +4741,98 @@ PHP_FUNCTION(mb_ord) } /* }}} */ +static inline char* php_mb_chr(long cp, const char* enc) +{ + enum mbfl_no_encoding no_enc; + zend_bool supported = false; + zend_string *buf = zend_string_alloc(4, 0); + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return NULL; + } + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le + ) { + supported = true; + } + + if (!supported) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + ) { + + if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } + } + + buf->val[0] = (cp >> 24) & 0xff; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + + ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + zend_string_release(buf); + + return ret; +} +/* {{{ proto bool mb_ord([int cp[, string encoding]]) */ +PHP_FUNCTION(mb_chr) +{ + long cp; + char* enc = NULL; + long enc_len; + char* ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { + return; + } + + ret = php_mb_chr(cp, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETURN_STRING(ret); +} +/* }}} */ + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 8599e46881eae..bf28c1b51d805 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -128,6 +128,7 @@ PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); PHP_FUNCTION(mb_ord); +PHP_FUNCTION(mb_chr); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); From a8ef8a25e5ebf03bfb6092ab24fbbe1a0f56e162 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 18:43:55 +0900 Subject: [PATCH 09/19] added check by php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 277ed47645c5e..78ffd5cd9870c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4780,7 +4780,7 @@ static inline char* php_mb_chr(long cp, const char* enc) supported = true; } - if (!supported) { + if (!supported || php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return NULL; } From f10a182674475b200c67f651aa8fd0d90891368a Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 18:59:13 +0900 Subject: [PATCH 10/19] added various encoding support other than unicode --- ext/mbstring/mbstring.c | 73 ++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 78ffd5cd9870c..03ed229381274 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4744,8 +4744,7 @@ PHP_FUNCTION(mb_ord) static inline char* php_mb_chr(long cp, const char* enc) { enum mbfl_no_encoding no_enc; - zend_bool supported = false; - zend_string *buf = zend_string_alloc(4, 0); + zend_string *buf; char* ret; size_t ret_len; @@ -4759,6 +4758,11 @@ static inline char* php_mb_chr(long cp, const char* enc) } } + if (php_mb_check_forbidden_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo || no_enc == mbfl_no_encoding_utf8_kddi_a @@ -4777,19 +4781,6 @@ static inline char* php_mb_chr(long cp, const char* enc) || no_enc == mbfl_no_encoding_utf16be || no_enc == mbfl_no_encoding_utf16le ) { - supported = true; - } - - if (!supported || php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return NULL; - } - - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - ) { if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { @@ -4798,18 +4789,56 @@ static inline char* php_mb_chr(long cp, const char* enc) cp = 0x3f; } } + + buf = zend_string_alloc(4, 0); + buf->val[0] = (cp >> 24) & 0xff; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + + ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + zend_string_release(buf); + + return ret; + } + + if (0 > cp || cp > 0x100000000) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } } - buf->val[0] = (cp >> 24) & 0xff; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; + if (cp < 0x100) { + buf = zend_string_alloc(1, 0); + buf->val[0] = cp; + buf->val[1] = 0; + } else if (cp < 0x10000) { + buf = zend_string_alloc(2, 0); + buf->val[0] = cp >> 8; + buf->val[1] = cp & 0xff; + buf->val[2] = 0; + } else if (cp < 0x1000000) { + buf = zend_string_alloc(3, 0); + buf->val[0] = cp >> 16; + buf->val[1] = (cp >> 8) & 0xff; + buf->val[2] = cp & 0xff; + buf->val[3] = 0; + } else { + buf = zend_string_alloc(4, 0); + buf->val[0] = cp >> 24; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + } - ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); zend_string_release(buf); - return ret; + return ret; } /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ PHP_FUNCTION(mb_chr) From 89e9746f746c1424c257792557548f5895f7bbd2 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 19:08:50 +0900 Subject: [PATCH 11/19] use php_mb_convert_encoding instead of php_mb_check_encoding --- ext/mbstring/mbstring.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 03ed229381274..dbff4eded25ab 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4675,46 +4675,47 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return -1; } - return (unsigned char) ret[0] << 24 | - (unsigned char) ret[1] << 16 | - (unsigned char) ret[2] << 8 | - (unsigned char) ret[3]; + cp = (unsigned char) ret[0] << 24 | \ + (unsigned char) ret[1] << 16 | \ + (unsigned char) ret[2] << 8 | \ + (unsigned char) ret[3]; + + efree(ret); + + return cp; } else if (php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } - if (!php_mb_check_encoding(str, str_len, enc)) { - - if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } + ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); - return cp; + if (ret == NULL) { + return -1; } encoding = mbfl_no2encoding(no_enc); - char_len = php_mb_mbchar_bytes_ex(str, encoding); + char_len = php_mb_mbchar_bytes_ex(ret, encoding); if (char_len == 1) { - cp = (unsigned char) str[0]; + cp = (unsigned char) ret[0]; } else if (char_len == 2) { - cp = ((unsigned char) str[0] << 8) | - (unsigned char) str[1]; + cp = ((unsigned char) ret[0] << 8) | \ + (unsigned char) ret[1]; } else if (char_len == 3) { - cp = ((unsigned char) str[0] << 16) | - ((unsigned char) str[1] << 8) | - (unsigned char) str[2]; + cp = ((unsigned char) ret[0] << 16) | \ + ((unsigned char) ret[1] << 8) | \ + (unsigned char) ret[2]; } else { - cp = ((unsigned char) str[0] << 24) | - ((unsigned char) str[1] << 16) | - ((unsigned char) str[2] << 8) | - (unsigned char) str[3]; + cp = ((unsigned char) ret[0] << 24) | \ + ((unsigned char) ret[1] << 16) | \ + ((unsigned char) ret[2] << 8) | \ + (unsigned char) ret[3]; } + efree(ret); + return cp; } From f303f59813b388484764601b6fd05345e90ed450 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 19:16:14 +0900 Subject: [PATCH 12/19] changed the position of calling php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index dbff4eded25ab..7029b01e7b556 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4685,7 +4685,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return cp; } else if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } @@ -4759,11 +4759,6 @@ static inline char* php_mb_chr(long cp, const char* enc) } } - if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return NULL; - } - if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo || no_enc == mbfl_no_encoding_utf8_kddi_a @@ -4802,6 +4797,9 @@ static inline char* php_mb_chr(long cp, const char* enc) zend_string_release(buf); return ret; + } else if (php_mb_check_forbidden_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; } if (0 > cp || cp > 0x100000000) { @@ -4839,7 +4837,7 @@ static inline char* php_mb_chr(long cp, const char* enc) ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); zend_string_release(buf); - return ret; + return ret; } /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ PHP_FUNCTION(mb_chr) From aa7eceb3a554c6bc6f4415b3263feb333b075ea6 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 22:49:13 +0900 Subject: [PATCH 13/19] added php_mb_check_code_point for mb_substitute_character --- ext/mbstring/mbstring.c | 110 ++++++++++++++++++++++++++++++- ext/mbstring/tests/bug69079.phpt | 14 ++++ 2 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 ext/mbstring/tests/bug69079.phpt diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 7029b01e7b556..c5901cc0d7cdb 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -104,6 +104,7 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); +static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -1981,6 +1982,111 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ +static inline zend_bool php_mb_check_code_point(long cp) +{ + enum mbfl_no_encoding no_enc; + const char* enc; + char* buf; + char buf_len; + + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + ) { + if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) { + return true; + } + + return false; + } else if (no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le + ) { + + if (0 > cp || cp > 0x10ffff) { + return false; + } + + return true; + + } else if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + return cp < 0xffff && cp > 0x0; + } + + if (cp < 0x100) { + buf_len = 1; + buf = emalloc(buf_len); + buf[0] = cp; + buf[1] = 0; + } else if (cp < 0x10000) { + buf_len = 2; + buf = emalloc(buf_len); + buf[0] = cp >> 8; + buf[1] = cp & 0xff; + buf[2] = 0; + } else if (cp < 0x1000000) { + buf_len = 3; + buf = emalloc(buf_len); + buf[0] = cp >> 16; + buf[1] = (cp >> 8) & 0xff; + buf[2] = cp & 0xff; + buf[3] = 0; + } else { + buf_len = 4; + buf = emalloc(buf_len); + buf[0] = cp >> 24; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + } + + enc = MBSTRG(current_internal_encoding)->name; + + if (php_mb_check_encoding(buf, buf_len, enc)) { + return true; + } + + return false; +} + /* {{{ proto mixed mb_substitute_character([mixed substchar]) Sets the current substitute_character or returns the current substitute_character */ PHP_FUNCTION(mb_substitute_character) @@ -2015,7 +2121,7 @@ PHP_FUNCTION(mb_substitute_character) } else { convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { @@ -2026,7 +2132,7 @@ PHP_FUNCTION(mb_substitute_character) break; default: convert_to_long_ex(arg1); - if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) { + if (php_mb_check_code_point(Z_LVAL_P(arg1))) { MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1); } else { diff --git a/ext/mbstring/tests/bug69079.phpt b/ext/mbstring/tests/bug69079.phpt new file mode 100644 index 0000000000000..67c4d0cc2dbc2 --- /dev/null +++ b/ext/mbstring/tests/bug69079.phpt @@ -0,0 +1,14 @@ +--TEST-- +Bug #69079 (enhancement for mb_substitute_character) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) \ No newline at end of file From 99d90f13586a43d347e385b9e15091a11be2611b Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 22 Feb 2015 16:13:32 +0900 Subject: [PATCH 14/19] fix memory leak --- ext/mbstring/mbstring.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index c5901cc0d7cdb..956cfb3c9cfb6 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1982,10 +1982,9 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ -static inline zend_bool php_mb_check_code_point(long cp) +static inline int php_mb_check_code_point(long cp) { enum mbfl_no_encoding no_enc; - const char* enc; char* buf; char buf_len; @@ -1998,10 +1997,10 @@ static inline zend_bool php_mb_check_code_point(long cp) || no_enc == mbfl_no_encoding_utf8_sb ) { if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) { - return true; + return 1; } - return false; + return 0; } else if (no_enc == mbfl_no_encoding_ucs4 || no_enc == mbfl_no_encoding_ucs4be || no_enc == mbfl_no_encoding_ucs4le @@ -2017,10 +2016,10 @@ static inline zend_bool php_mb_check_code_point(long cp) ) { if (0 > cp || cp > 0x10ffff) { - return false; + return 0; } - return true; + return 1; } else if (no_enc == mbfl_no_encoding_pass || no_enc == mbfl_no_encoding_auto @@ -2078,13 +2077,15 @@ static inline zend_bool php_mb_check_code_point(long cp) buf[4] = 0; } - enc = MBSTRG(current_internal_encoding)->name; + if (php_mb_check_encoding(buf, buf_len, NULL)) { + efree(buf); - if (php_mb_check_encoding(buf, buf_len, enc)) { - return true; + return 1; } - return false; + efree(buf); + + return 0; } /* {{{ proto mixed mb_substitute_character([mixed substchar]) @@ -4673,6 +4674,7 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { + mbfl_string_clear(&result); return true; } From 6be0f8dc9a9210d15a83519b11ed15d9db5dca05 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 22 Feb 2015 16:58:22 +0900 Subject: [PATCH 15/19] fix return type --- ext/mbstring/mbstring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 956cfb3c9cfb6..644835522afae 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -104,7 +104,7 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); -static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc); +static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -4632,7 +4632,7 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc) +static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc) { const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; @@ -4647,7 +4647,7 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, encoding = mbfl_name2encoding(enc); if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc); - return false; + return 0; } } @@ -4655,7 +4655,7 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); - return false; + return 0; } mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); @@ -4675,13 +4675,13 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { mbfl_string_clear(&result); - return true; + return 1; } mbfl_string_clear(&result); } - return false; + return 0; } /* {{{ proto bool mb_check_encoding([string var[, string encoding]]) From 32de1cf57a78d518466c9dca13e9bfedb14f2de3 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 22 Feb 2015 19:55:13 +0900 Subject: [PATCH 16/19] delete unnecessary functions --- ext/mbstring/mbstring.c | 396 ++++++++-------------------------------- ext/mbstring/mbstring.h | 2 - 2 files changed, 76 insertions(+), 322 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 644835522afae..25802187fdf50 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -438,16 +438,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) - ZEND_ARG_INFO(0, str) - ZEND_ARG_INFO(0, encoding) -ZEND_END_ARG_INFO() - -ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) - ZEND_ARG_INFO(0, cp) - ZEND_ARG_INFO(0, encoding) -ZEND_END_ARG_INFO() - ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -573,8 +563,6 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) - PHP_FE(mb_ord, arginfo_mb_ord) - PHP_FE(mb_chr, arginfo_mb_chr) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -1982,6 +1970,77 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ +static inline int php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le + ) { + return 1; + } + + return 0; +} + +static inline int php_mb_check_utf8_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + ) { + return 1; + } + + return 0; +} +static inline int php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + return 1; + } + + return 0; +} static inline int php_mb_check_code_point(long cp) { enum mbfl_no_encoding no_enc; @@ -1990,30 +2049,14 @@ static inline int php_mb_check_code_point(long cp) no_enc = MBSTRG(current_internal_encoding)->no_encoding; - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - ) { + if (php_mb_check_utf8_encoding(no_enc)) { + if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) { return 1; } return 0; - } else if (no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { + } else if (php_mb_check_unicode_encoding(no_enc)) { if (0 > cp || cp > 0x10ffff) { return 0; @@ -2021,31 +2064,8 @@ static inline int php_mb_check_code_point(long cp) return 1; - } else if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { + // backward compatibility + } else if (php_mb_check_unsupported_encoding(no_enc)) { return cp < 0xffff && cp > 0x0; } @@ -4705,270 +4725,6 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ -static inline zend_bool php_mb_check_forbidden_encoding(enum mbfl_no_encoding no_enc) -{ - if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { - return true; - } - - return false; -} - -static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) -{ - enum mbfl_no_encoding no_enc; - char* ret; - size_t ret_len; - const mbfl_encoding *encoding; - unsigned char char_len; - long cp; - - if (enc == NULL) { - no_enc = MBSTRG(current_internal_encoding)->no_encoding; - } else { - no_enc = mbfl_name2no_encoding(enc); - - if (no_enc == mbfl_no_encoding_invalid) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); - return -1; - } - } - - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { - - ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); - - if (ret == NULL) { - return -1; - } - - cp = (unsigned char) ret[0] << 24 | \ - (unsigned char) ret[1] << 16 | \ - (unsigned char) ret[2] << 8 | \ - (unsigned char) ret[3]; - - efree(ret); - - return cp; - - } else if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return -1; - } - - ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); - - if (ret == NULL) { - return -1; - } - - encoding = mbfl_no2encoding(no_enc); - char_len = php_mb_mbchar_bytes_ex(ret, encoding); - - if (char_len == 1) { - cp = (unsigned char) ret[0]; - } else if (char_len == 2) { - cp = ((unsigned char) ret[0] << 8) | \ - (unsigned char) ret[1]; - } else if (char_len == 3) { - cp = ((unsigned char) ret[0] << 16) | \ - ((unsigned char) ret[1] << 8) | \ - (unsigned char) ret[2]; - } else { - cp = ((unsigned char) ret[0] << 24) | \ - ((unsigned char) ret[1] << 16) | \ - ((unsigned char) ret[2] << 8) | \ - (unsigned char) ret[3]; - } - - efree(ret); - - return cp; -} - -/* {{{ proto bool mb_ord([string str[, string encoding]]) */ -PHP_FUNCTION(mb_ord) -{ - char* str; - size_t str_len; - char* enc = NULL; - size_t enc_len; - long cp; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { - return; - } - - cp = php_mb_ord(str, str_len, enc); - - if (0 > cp) { - RETURN_FALSE; - } - - RETURN_LONG(cp); -} -/* }}} */ - -static inline char* php_mb_chr(long cp, const char* enc) -{ - enum mbfl_no_encoding no_enc; - zend_string *buf; - char* ret; - size_t ret_len; - - if (enc == NULL) { - no_enc = MBSTRG(current_internal_encoding)->no_encoding; - } else { - no_enc = mbfl_name2no_encoding(enc); - if (no_enc == mbfl_no_encoding_invalid) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); - return NULL; - } - } - - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { - - if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { - if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } - } - - buf = zend_string_alloc(4, 0); - buf->val[0] = (cp >> 24) & 0xff; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; - - ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); - zend_string_release(buf); - - return ret; - } else if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return NULL; - } - - if (0 > cp || cp > 0x100000000) { - if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } - } - - if (cp < 0x100) { - buf = zend_string_alloc(1, 0); - buf->val[0] = cp; - buf->val[1] = 0; - } else if (cp < 0x10000) { - buf = zend_string_alloc(2, 0); - buf->val[0] = cp >> 8; - buf->val[1] = cp & 0xff; - buf->val[2] = 0; - } else if (cp < 0x1000000) { - buf = zend_string_alloc(3, 0); - buf->val[0] = cp >> 16; - buf->val[1] = (cp >> 8) & 0xff; - buf->val[2] = cp & 0xff; - buf->val[3] = 0; - } else { - buf = zend_string_alloc(4, 0); - buf->val[0] = cp >> 24; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; - } - - ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); - zend_string_release(buf); - - return ret; -} -/* {{{ proto bool mb_ord([int cp[, string encoding]]) */ -PHP_FUNCTION(mb_chr) -{ - long cp; - char* enc = NULL; - long enc_len; - char* ret; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { - return; - } - - ret = php_mb_chr(cp, enc); - - if (ret == NULL) { - RETURN_FALSE; - } - - RETURN_STRING(ret); -} -/* }}} */ - /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index bf28c1b51d805..9685c64d7b4a9 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -127,8 +127,6 @@ PHP_FUNCTION(mb_decode_numericentity); PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); -PHP_FUNCTION(mb_ord); -PHP_FUNCTION(mb_chr); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); From 45a8a9c6858a49118330002b59dbd88b37e31e9d Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 27 Feb 2015 19:31:34 +0900 Subject: [PATCH 17/19] update the functions for checking the names of encodings --- ext/mbstring/mbstring.c | 148 ++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 58 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 25802187fdf50..86f69693572ae 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1970,77 +1970,109 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ -static inline int php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) +static const enum mbfl_no_encoding php_mb_unsupported_no_encoding_list[] = { + mbfl_no_encoding_pass, + mbfl_no_encoding_auto, + mbfl_no_encoding_wchar, + mbfl_no_encoding_byte2be, + mbfl_no_encoding_byte2le, + mbfl_no_encoding_byte4be, + mbfl_no_encoding_byte4le, + mbfl_no_encoding_base64, + mbfl_no_encoding_uuencode, + mbfl_no_encoding_html_ent, + mbfl_no_encoding_qprint, + mbfl_no_encoding_utf7, + mbfl_no_encoding_utf7imap, + mbfl_no_encoding_2022kr, + mbfl_no_encoding_jis, + mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_2022jp_2004, + mbfl_no_encoding_2022jp_kddi, + mbfl_no_encoding_cp50220, + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_cp50221, + mbfl_no_encoding_cp50222 +}; + +static inline int php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { - return 1; + int i; + int size = sizeof(php_mb_unsupported_no_encoding_list)/sizeof(php_mb_unsupported_no_encoding_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_unsupported_no_encoding_list[i]) { + return 1; + } + } return 0; } -static inline int php_mb_check_utf8_encoding(enum mbfl_no_encoding no_enc) +static const enum mbfl_no_encoding php_mb_no_encoding_unicode_list[] = { + mbfl_no_encoding_utf8, + mbfl_no_encoding_utf8_docomo, + mbfl_no_encoding_utf8_kddi_a, + mbfl_no_encoding_utf8_kddi_b, + mbfl_no_encoding_utf8_sb, + mbfl_no_encoding_ucs4, + mbfl_no_encoding_ucs4be, + mbfl_no_encoding_ucs4le, + mbfl_no_encoding_utf32, + mbfl_no_encoding_utf32be, + mbfl_no_encoding_utf32le, + mbfl_no_encoding_ucs2, + mbfl_no_encoding_ucs2be, + mbfl_no_encoding_ucs2le, + mbfl_no_encoding_utf16, + mbfl_no_encoding_utf16be, + mbfl_no_encoding_utf16le +}; + +static inline int php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - ) { - return 1; + int i; + int size = sizeof(php_mb_no_encoding_unicode_list)/sizeof(php_mb_no_encoding_unicode_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_no_encoding_unicode_list[i]) { + return 1; + } + } return 0; } -static inline int php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) + +static const enum mbfl_no_encoding php_mb_no_encoding_utf8_list[] = { + mbfl_no_encoding_utf8, + mbfl_no_encoding_utf8_docomo, + mbfl_no_encoding_utf8_kddi_a, + mbfl_no_encoding_utf8_kddi_b, + mbfl_no_encoding_utf8_sb +}; + +static inline int php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { - return 1; + int i; + int size = sizeof(php_mb_no_encoding_utf8_list)/sizeof(php_mb_no_encoding_utf8_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_no_encoding_utf8_list[i]) { + return 1; + } + } return 0; } + static inline int php_mb_check_code_point(long cp) { enum mbfl_no_encoding no_enc; @@ -2049,14 +2081,14 @@ static inline int php_mb_check_code_point(long cp) no_enc = MBSTRG(current_internal_encoding)->no_encoding; - if (php_mb_check_utf8_encoding(no_enc)) { + if (php_mb_is_no_encoding_utf8(no_enc)) { if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) { return 1; } return 0; - } else if (php_mb_check_unicode_encoding(no_enc)) { + } else if (php_mb_is_no_encoding_unicode(no_enc)) { if (0 > cp || cp > 0x10ffff) { return 0; @@ -2065,7 +2097,7 @@ static inline int php_mb_check_code_point(long cp) return 1; // backward compatibility - } else if (php_mb_check_unsupported_encoding(no_enc)) { + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { return cp < 0xffff && cp > 0x0; } From a22d27df4cdabe6cab516c1f967d56cc5a8fb891 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 1 Mar 2015 16:07:18 +0900 Subject: [PATCH 18/19] replace emalloc with safe_emalloc --- ext/mbstring/mbstring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 86f69693572ae..a646d158e5025 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2103,25 +2103,25 @@ static inline int php_mb_check_code_point(long cp) if (cp < 0x100) { buf_len = 1; - buf = emalloc(buf_len); + buf = (char *) safe_emalloc(buf_len, 1, 1); buf[0] = cp; buf[1] = 0; } else if (cp < 0x10000) { buf_len = 2; - buf = emalloc(buf_len); + buf = (char *) safe_emalloc(buf_len, 1, 1); buf[0] = cp >> 8; buf[1] = cp & 0xff; buf[2] = 0; } else if (cp < 0x1000000) { buf_len = 3; - buf = emalloc(buf_len); + buf = (char *) safe_emalloc(buf_len, 1, 1); buf[0] = cp >> 16; buf[1] = (cp >> 8) & 0xff; buf[2] = cp & 0xff; buf[3] = 0; } else { buf_len = 4; - buf = emalloc(buf_len); + buf = (char *) safe_emalloc(buf_len, 1, 1); buf[0] = cp >> 24; buf[1] = (cp >> 16) & 0xff; buf[2] = (cp >> 8) & 0xff; From c28a6f44bb74bb58bbc8e5a6f3177917517802b8 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 14 Aug 2016 08:09:14 +0900 Subject: [PATCH 19/19] add declaration of functions --- ext/mbstring/mbstring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a0a1eee0a10c6..3bfaaa0349937 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -104,6 +104,11 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); + +static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc); + +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); /* }}} */ /* {{{ php_mb_default_identify_list */