Permalink
Browse files

is_numeric_string() optimization

# Original Patch by Matt Wilmas


git-svn-id: http://svn.php.net/repository/php/php-src/php/php-src/branches/PHP_5_2@225847 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information...
1 parent 442af26 commit 0accb4b0094b8fdda905e0a374843f0c775f4537 iliaa committed Dec 26, 2006
Showing with 207 additions and 98 deletions.
  1. +1 −0 Zend/bench.php
  2. +1 −1 Zend/zend.c
  3. +1 −1 Zend/zend_exceptions.c
  4. +29 −23 Zend/zend_language_scanner.l
  5. +21 −23 Zend/zend_operators.c
  6. +120 −50 Zend/zend_operators.h
  7. +33 −0 Zend/zend_strtod.c
  8. +1 −0 Zend/zend_strtod.h
View
@@ -1,4 +1,5 @@
<?php
+date_default_timezone_set("UTC");
function simple() {
$a = 0;
View
@@ -214,7 +214,7 @@ ZEND_API void zend_make_printable_zval(zval *expr, zval *expr_copy, int *use_cop
}
break;
case IS_RESOURCE:
- expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
+ expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
expr_copy->value.str.len = sprintf(expr_copy->value.str.val, "Resource id #%ld", expr->value.lval);
break;
case IS_ARRAY:
@@ -404,7 +404,7 @@ static int _build_trace_string(zval **frame, int num_args, va_list args, zend_ha
} else {
line = 0;
}
- s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 2 + 1);
+ s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 4 + 1);
sprintf(s_tmp, "%s(%ld): ", Z_STRVAL_PP(file), line);
TRACE_APPEND_STRL(s_tmp, strlen(s_tmp));
efree(s_tmp);
@@ -1236,38 +1236,44 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>{LNUM} {
- errno = 0;
- zendlval->value.lval = strtol(yytext, NULL, 0);
- if (errno == ERANGE) { /* overflow */
- zendlval->value.dval = zend_strtod(yytext, NULL);
- zendlval->type = IS_DOUBLE;
- return T_DNUMBER;
+ if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
+ zendlval->value.lval = strtol(yytext, NULL, 0);
} else {
- zendlval->type = IS_LONG;
- return T_LNUMBER;
- }
-}
-
-<ST_IN_SCRIPTING>{HNUM} {
- errno = 0;
- zendlval->value.lval = strtoul(yytext, NULL, 16);
- if (errno == ERANGE) { /* overflow */
- /* not trying strtod - it returns trash on 0x-es */
- zendlval->value.lval = LONG_MAX; /* maximal long */
- zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
- } else {
- if (zendlval->value.lval < 0) {
- /* maintain consistency with the old way */
- zendlval->value.dval = (unsigned long) zendlval->value.lval;
+ errno = 0;
+ zendlval->value.lval = strtol(yytext, NULL, 0);
+ if (errno == ERANGE) { /* Overflow */
+ zendlval->value.dval = zend_strtod(yytext, NULL);
zendlval->type = IS_DOUBLE;
return T_DNUMBER;
}
- zendlval->type = IS_LONG;
}
+
zendlval->type = IS_LONG;
return T_LNUMBER;
}
+<ST_IN_SCRIPTING>{HNUM} {
+ /* Skip "0x" */
+ yytext += 2;
+ yyleng -= 2;
+
+ /* Skip any leading 0s */
+ while (*yytext == '0') {
+ yytext++;
+ yyleng--;
+ }
+
+ if (yyleng < SIZEOF_LONG * 2 || (yyleng == SIZEOF_LONG * 2 && *yytext <= '7')) {
+ zendlval->value.lval = strtol(yytext, NULL, 16);
+ zendlval->type = IS_LONG;
+ return T_LNUMBER;
+ } else {
+ zendlval->value.dval = zend_hex_strtod(yytext, NULL);
+ zendlval->type = IS_DOUBLE;
+ return T_DNUMBER;
+ }
+}
+
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
View
@@ -123,14 +123,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
char *strval;
strval = op->value.str.val;
- switch ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1))) {
- case IS_DOUBLE:
- case IS_LONG:
- break;
- default:
- op->value.lval = strtol(op->value.str.val, NULL, 10);
- op->type = IS_LONG;
- break;
+ if ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1)) == 0) {
+ op->value.lval = 0;
+ op->type = IS_LONG;
}
STR_FREE(strval);
break;
@@ -161,14 +156,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
switch ((op)->type) { \
case IS_STRING: \
{ \
- switch (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1))) { \
- case IS_DOUBLE: \
- case IS_LONG: \
- break; \
- default: \
- (holder).value.lval = strtol((op)->value.str.val, NULL, 10); \
- (holder).type = IS_LONG; \
- break; \
+ if (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1)) == 0) { \
+ (holder).value.lval = 0; \
+ (holder).type = IS_LONG; \
} \
(op) = &(holder); \
break; \
@@ -560,7 +550,7 @@ ZEND_API void _convert_to_string(zval *op ZEND_FILE_LINE_DC)
TSRMLS_FETCH();
zend_list_delete(op->value.lval);
- op->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
+ op->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
op->value.str.len = sprintf(op->value.str.val, "Resource id #%ld", tmp);
break;
}
@@ -1227,10 +1217,14 @@ ZEND_API int concat_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
zval op1_copy, op2_copy;
- int use_copy1, use_copy2;
+ int use_copy1 = 0, use_copy2 = 0;
- zend_make_printable_zval(op1, &op1_copy, &use_copy1);
- zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+ if (op1->type != IS_STRING) {
+ zend_make_printable_zval(op1, &op1_copy, &use_copy1);
+ }
+ if (op2->type != IS_STRING) {
+ zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+ }
if (use_copy1) {
op1 = &op1_copy;
@@ -1255,10 +1249,14 @@ ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_D
ZEND_API int string_locale_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
zval op1_copy, op2_copy;
- int use_copy1, use_copy2;
+ int use_copy1 = 0, use_copy2 = 0;
- zend_make_printable_zval(op1, &op1_copy, &use_copy1);
- zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+ if (op1->type != IS_STRING) {
+ zend_make_printable_zval(op1, &op1_copy, &use_copy1);
+ }
+ if (op2->type != IS_STRING) {
+ zend_make_printable_zval(op2, &op2_copy, &use_copy2);
+ }
if (use_copy1) {
op1 = &op1_copy;
View
@@ -36,7 +36,16 @@
#include "ext/bcmath/libbcmath/src/bcmath.h"
#endif
+#if SIZEOF_LONG == 4
+#define MAX_LENGTH_OF_LONG 11
+static const char long_min_digits[] = "2147483648";
+#elif SIZEOF_LONG == 8
#define MAX_LENGTH_OF_LONG 20
+static const char long_min_digits[] = "9223372036854775808";
+#else
+#error "Unknown SIZEOF_LONG"
+#endif
+
#define MAX_LENGTH_OF_DOUBLE 32
BEGIN_EXTERN_C()
@@ -66,82 +75,143 @@ ZEND_API zend_bool instanceof_function_ex(zend_class_entry *instance_ce, zend_cl
ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC);
END_EXTERN_C()
+#define ZEND_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ZEND_IS_XDIGIT(c) (((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
+
/**
- * Checks whether the string "str" with the length "length" is a numeric string.
+ * Checks whether the string "str" with length "length" is numeric. The value
+ * of allow_errors determines whether it's required to be entirely numeric, or
+ * just its prefix. Leading whitespace is allowed.
*
- * The function returns 0 if the string did not contain a string; IS_LONG if
- * the string contained a number that fits in the integer range and IS_DOUBLE
- * in case it did not. The long value is returned into the pointer *lval if
- * that pointer was not NULL or into the pointer *dval if that pointer was not
- * NULL.
+ * The function returns 0 if the string did not contain a valid number; IS_LONG
+ * if it contained a number that fits within the range of a long; or IS_DOUBLE
+ * if the number was out of long range or contained a decimal point/exponent.
+ * The number's value is returned into the respective pointer, *lval or *dval,
+ * if that pointer is not NULL.
*/
-static inline zend_bool is_numeric_string(char *str, int length, long *lval, double *dval, int allow_errors)
+
+static inline zend_uchar is_numeric_string(const char *str, int length, long *lval, double *dval, int allow_errors)
{
- long local_lval;
+ const char *ptr;
+ int base = 10, digits = 0, dp_or_e = 0;
double local_dval;
- char *end_ptr_long, *end_ptr_double;
- int conv_base=10;
+ zend_uchar type;
if (!length) {
return 0;
}
- /* handle hex numbers */
- if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
- conv_base=16;
- }
- errno=0;
- local_lval = strtol(str, &end_ptr_long, conv_base);
- if (errno!=ERANGE) {
- if (end_ptr_long == str+length) { /* integer string */
- if (lval) {
- *lval = local_lval;
- }
- return IS_LONG;
- } else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
- return 0;
- }
- } else {
- end_ptr_long=NULL;
+ /* Skip any whitespace
+ * This is much faster than the isspace() function */
+ while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
+ str++;
+ length--;
}
+ ptr = str;
- if (conv_base==16) { /* hex string, under UNIX strtod() messes it up */
- return 0;
+ if (*ptr == '-' || *ptr == '+') {
+ ptr++;
}
- errno=0;
- local_dval = zend_strtod(str, &end_ptr_double);
- if (errno != ERANGE) {
- if (end_ptr_double == str+length) { /* floating point string */
- if (!zend_finite(local_dval)) {
- /* "inf","nan" and maybe other weird ones */
- return 0;
+ if (ZEND_IS_DIGIT(*ptr)) {
+ /* Handle hex numbers
+ * str is used instead of ptr to disallow signs and keep old behavior */
+ if (length > 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
+ base = 16;
+ ptr += 2;
+ }
+
+ /* Skip any leading 0s */
+ while (*ptr == '0') {
+ ptr++;
+ }
+
+ /* Count the number of digits. If a decimal point/exponent is found,
+ * it's a double. Otherwise, if there's a dval or no need to check for
+ * a full match, stop when there are too many digits for a long */
+ for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
+check_digits:
+ if (ZEND_IS_DIGIT(*ptr) || (base == 16 && ZEND_IS_XDIGIT(*ptr))) {
+ continue;
+ } else if (base == 10) {
+ if (*ptr == '.' && dp_or_e < 1) {
+ goto process_double;
+ } else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2) {
+ const char *e = ptr + 1;
+
+ if (*e == '-' || *e == '+') {
+ ptr = e++;
+ }
+ if (ZEND_IS_DIGIT(*e)) {
+ goto process_double;
+ }
+ }
}
+ break;
+ }
+
+ if (base == 10) {
+ if (digits >= MAX_LENGTH_OF_LONG) {
+ dp_or_e = -1;
+ goto process_double;
+ }
+ } else if (!(digits < SIZEOF_LONG * 2 || (digits == SIZEOF_LONG * 2 && ptr[-digits] <= '7'))) {
if (dval) {
- *dval = local_dval;
+ local_dval = zend_hex_strtod(str, (char **)&ptr);
}
- return IS_DOUBLE;
+ type = IS_DOUBLE;
+ }
+ } else if (*ptr == '.' && ZEND_IS_DIGIT(ptr[1])) {
+process_double:
+ type = IS_DOUBLE;
+
+ /* If there's a dval, do the conversion; else continue checking
+ * the digits if we need to check for a full match */
+ if (dval) {
+ local_dval = zend_strtod(str, (char **)&ptr);
+ } else if (allow_errors != 1 && dp_or_e != -1) {
+ dp_or_e = (*ptr++ == '.') ? 1 : 2;
+ goto check_digits;
}
} else {
- end_ptr_double=NULL;
- }
-
- if (!allow_errors) {
return 0;
}
- if (allow_errors == -1) {
- zend_error(E_NOTICE, "A non well formed numeric value encountered");
+
+ if (ptr != str + length) {
+ if (!allow_errors) {
+ return 0;
+ }
+ if (allow_errors == -1) {
+ zend_error(E_NOTICE, "A non well formed numeric value encountered");
+ }
}
- if (end_ptr_double>end_ptr_long && dval) {
- *dval = local_dval;
- return IS_DOUBLE;
- } else if (end_ptr_long && lval) {
- *lval = local_lval;
+ if (type == IS_LONG) {
+ if (digits == MAX_LENGTH_OF_LONG - 1) {
+ int cmp = strcmp(&ptr[-digits], long_min_digits);
+
+ if (!(cmp < 0 || (cmp == 0 && *str == '-'))) {
+ if (dval) {
+ *dval = zend_strtod(str, NULL);
+ }
+
+ return IS_DOUBLE;
+ }
+ }
+
+ if (lval) {
+ *lval = strtol(str, NULL, base);
+ }
+
return IS_LONG;
+ } else {
+ if (dval) {
+ *dval = local_dval;
+ }
+
+ return IS_DOUBLE;
}
- return 0;
}
static inline char *
Oops, something went wrong.

0 comments on commit 0accb4b

Please sign in to comment.