From ac87e3388bcdffe1e3d0fdbc146f551d445f1dad Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 20 Feb 2015 17:10:23 +0900 Subject: [PATCH 1/4] added mb_scrub --- ext/mbstring/mbstring.c | 45 ++++++++++++++++++++++++++++++++ ext/mbstring/mbstring.h | 1 + ext/mbstring/tests/mb_scrub.phpt | 14 ++++++++++ 3 files changed, 60 insertions(+) create mode 100644 ext/mbstring/tests/mb_scrub.phpt diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 20bca129ffbef..dc0dac0cc9bf4 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -437,6 +437,11 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -562,6 +567,7 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_scrub, arginfo_mb_scrub) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -4570,6 +4576,45 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ +static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) +{ + char *ret; + size_t ret_len; + + return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); +} + +/* {{{ proto bool mb_scrub([string str[, string encoding]]) */ +PHP_FUNCTION(mb_scrub) +{ + char* str; + long str_len; + const char *enc = NULL; + long enc_len; + + char *ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } + + if (enc == NULL) { + enc = MBSTRG(current_internal_encoding)->name; + } else if (!mbfl_is_support_encoding(enc)) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + RETURN_FALSE; + } + + ret = php_mb_scrub(str, str_len, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETURN_STRING(ret); +} +/* }}} */ + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 9685c64d7b4a9..021324dae5634 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -127,6 +127,7 @@ PHP_FUNCTION(mb_decode_numericentity); PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); +PHP_FUNCTION(mb_scrub); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); diff --git a/ext/mbstring/tests/mb_scrub.phpt b/ext/mbstring/tests/mb_scrub.phpt new file mode 100644 index 0000000000000..131e13d5367ea --- /dev/null +++ b/ext/mbstring/tests/mb_scrub.phpt @@ -0,0 +1,14 @@ +--TEST-- +mb_scrub() +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) From 787b5a03a5dd5c6907b7b6291717c8599e8e06cb Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 20 Feb 2015 17:19:12 +0900 Subject: [PATCH 2/4] replace space with tab --- ext/mbstring/mbstring.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index dc0dac0cc9bf4..bf9110e43fb73 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4578,10 +4578,10 @@ PHP_FUNCTION(mb_check_encoding) static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) { - char *ret; - size_t ret_len; + char *ret; + size_t ret_len; - return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); + return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); } /* {{{ proto bool mb_scrub([string str[, string encoding]]) */ @@ -4594,24 +4594,24 @@ PHP_FUNCTION(mb_scrub) char *ret; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { - return; - } + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } - if (enc == NULL) { - enc = MBSTRG(current_internal_encoding)->name; - } else if (!mbfl_is_support_encoding(enc)) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); - RETURN_FALSE; - } + if (enc == NULL) { + enc = MBSTRG(current_internal_encoding)->name; + } else if (!mbfl_is_support_encoding(enc)) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + RETURN_FALSE; + } - ret = php_mb_scrub(str, str_len, enc); + ret = php_mb_scrub(str, str_len, enc); - if (ret == NULL) { - RETURN_FALSE; - } + if (ret == NULL) { + RETURN_FALSE; + } - RETURN_STRING(ret); + RETURN_STRING(ret); } /* }}} */ From 68e44b6859688bfe5441a34288181bff77656948 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 22 Feb 2015 00:33:20 +0900 Subject: [PATCH 3/4] fix memory leak --- ext/mbstring/mbstring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index bf9110e43fb73..5c2317958c76d 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4578,7 +4578,6 @@ PHP_FUNCTION(mb_check_encoding) static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) { - char *ret; size_t ret_len; return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); @@ -4611,7 +4610,8 @@ PHP_FUNCTION(mb_scrub) RETURN_FALSE; } - RETURN_STRING(ret); + RETVAL_STRING(ret); + efree(ret); } /* }}} */ From 6dd922efad29c0d95c83eec58c4ae27ecbf75201 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Mon, 2 Mar 2015 20:30:40 +0900 Subject: [PATCH 4/4] introduce fast zpp --- ext/mbstring/mbstring.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 5c2317958c76d..e16f89611f092 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4587,18 +4587,26 @@ static inline char* php_mb_scrub(const char* str, size_t str_len, const char* en PHP_FUNCTION(mb_scrub) { char* str; - long str_len; - const char *enc = NULL; - long enc_len; + size_t str_len; + char *enc = NULL; + size_t enc_len; char *ret; +#ifndef FAST_ZPP if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { return; } +#else +ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) +ZEND_PARSE_PARAMETERS_END(); +#endif if (enc == NULL) { - enc = MBSTRG(current_internal_encoding)->name; + enc = (char *) MBSTRG(current_internal_encoding)->name; } else if (!mbfl_is_support_encoding(enc)) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); RETURN_FALSE;