Skip to content

Commit

Permalink
is_numeric_string() optimization
Browse files Browse the repository at this point in the history
# Original Patch by Matt Wilmas


git-svn-id: http://svn.php.net/repository/php/php-src/php/php-src/branches/PHP_5_2@225847 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information
iliaa committed Dec 26, 2006
1 parent 442af26 commit 0accb4b
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 76 deletions.
1 change: 1 addition & 0 deletions Zend/bench.php
@@ -1,4 +1,5 @@
<?php
date_default_timezone_set("UTC");

function simple() {
$a = 0;
Expand Down
2 changes: 1 addition & 1 deletion Zend/zend.c
Expand Up @@ -214,7 +214,7 @@ ZEND_API void zend_make_printable_zval(zval *expr, zval *expr_copy, int *use_cop
}
break;
case IS_RESOURCE:
expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
expr_copy->value.str.len = sprintf(expr_copy->value.str.val, "Resource id #%ld", expr->value.lval);
break;
case IS_ARRAY:
Expand Down
2 changes: 1 addition & 1 deletion Zend/zend_exceptions.c
Expand Up @@ -404,7 +404,7 @@ static int _build_trace_string(zval **frame, int num_args, va_list args, zend_ha
} else {
line = 0;
}
s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 2 + 1);
s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 4 + 1);
sprintf(s_tmp, "%s(%ld): ", Z_STRVAL_PP(file), line);
TRACE_APPEND_STRL(s_tmp, strlen(s_tmp));
efree(s_tmp);
Expand Down
38 changes: 22 additions & 16 deletions Zend/zend_language_scanner.l
Expand Up @@ -1236,36 +1236,42 @@ NEWLINE ("\r"|"\n"|"\r\n")


<ST_IN_SCRIPTING>{LNUM} {
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
zendlval->value.lval = strtol(yytext, NULL, 0);
} else {
errno = 0;
zendlval->value.lval = strtol(yytext, NULL, 0);
if (errno == ERANGE) { /* overflow */
if (errno == ERANGE) { /* Overflow */
zendlval->value.dval = zend_strtod(yytext, NULL);
zendlval->type = IS_DOUBLE;
return T_DNUMBER;
} else {
}
}

zendlval->type = IS_LONG;
return T_LNUMBER;
}
}

<ST_IN_SCRIPTING>{HNUM} {
errno = 0;
zendlval->value.lval = strtoul(yytext, NULL, 16);
if (errno == ERANGE) { /* overflow */
/* not trying strtod - it returns trash on 0x-es */
zendlval->value.lval = LONG_MAX; /* maximal long */
zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
/* Skip "0x" */
yytext += 2;
yyleng -= 2;

/* Skip any leading 0s */
while (*yytext == '0') {
yytext++;
yyleng--;
}

if (yyleng < SIZEOF_LONG * 2 || (yyleng == SIZEOF_LONG * 2 && *yytext <= '7')) {
zendlval->value.lval = strtol(yytext, NULL, 16);
zendlval->type = IS_LONG;
return T_LNUMBER;
} else {
if (zendlval->value.lval < 0) {
/* maintain consistency with the old way */
zendlval->value.dval = (unsigned long) zendlval->value.lval;
zendlval->value.dval = zend_hex_strtod(yytext, NULL);
zendlval->type = IS_DOUBLE;
return T_DNUMBER;
}
zendlval->type = IS_LONG;
}
zendlval->type = IS_LONG;
return T_LNUMBER;
}

<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */
Expand Down
32 changes: 15 additions & 17 deletions Zend/zend_operators.c
Expand Up @@ -123,14 +123,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
char *strval;

strval = op->value.str.val;
switch ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1))) {
case IS_DOUBLE:
case IS_LONG:
break;
default:
op->value.lval = strtol(op->value.str.val, NULL, 10);
if ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1)) == 0) {
op->value.lval = 0;
op->type = IS_LONG;
break;
}
STR_FREE(strval);
break;
Expand Down Expand Up @@ -161,14 +156,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
switch ((op)->type) { \
case IS_STRING: \
{ \
switch (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1))) { \
case IS_DOUBLE: \
case IS_LONG: \
break; \
default: \
(holder).value.lval = strtol((op)->value.str.val, NULL, 10); \
if (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1)) == 0) { \
(holder).value.lval = 0; \
(holder).type = IS_LONG; \
break; \
} \
(op) = &(holder); \
break; \
Expand Down Expand Up @@ -560,7 +550,7 @@ ZEND_API void _convert_to_string(zval *op ZEND_FILE_LINE_DC)
TSRMLS_FETCH();

zend_list_delete(op->value.lval);
op->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG);
op->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
op->value.str.len = sprintf(op->value.str.val, "Resource id #%ld", tmp);
break;
}
Expand Down Expand Up @@ -1227,10 +1217,14 @@ ZEND_API int concat_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
zval op1_copy, op2_copy;
int use_copy1, use_copy2;
int use_copy1 = 0, use_copy2 = 0;

if (op1->type != IS_STRING) {
zend_make_printable_zval(op1, &op1_copy, &use_copy1);
}
if (op2->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2);
}

if (use_copy1) {
op1 = &op1_copy;
Expand All @@ -1255,10 +1249,14 @@ ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_D
ZEND_API int string_locale_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
zval op1_copy, op2_copy;
int use_copy1, use_copy2;
int use_copy1 = 0, use_copy2 = 0;

if (op1->type != IS_STRING) {
zend_make_printable_zval(op1, &op1_copy, &use_copy1);
}
if (op2->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2);
}

if (use_copy1) {
op1 = &op1_copy;
Expand Down
152 changes: 111 additions & 41 deletions Zend/zend_operators.h
Expand Up @@ -36,7 +36,16 @@
#include "ext/bcmath/libbcmath/src/bcmath.h"
#endif

#if SIZEOF_LONG == 4
#define MAX_LENGTH_OF_LONG 11
static const char long_min_digits[] = "2147483648";
#elif SIZEOF_LONG == 8
#define MAX_LENGTH_OF_LONG 20
static const char long_min_digits[] = "9223372036854775808";
#else
#error "Unknown SIZEOF_LONG"
#endif

#define MAX_LENGTH_OF_DOUBLE 32

BEGIN_EXTERN_C()
Expand Down Expand Up @@ -66,82 +75,143 @@ ZEND_API zend_bool instanceof_function_ex(zend_class_entry *instance_ce, zend_cl
ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC);
END_EXTERN_C()

#define ZEND_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
#define ZEND_IS_XDIGIT(c) (((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))

/**
* Checks whether the string "str" with the length "length" is a numeric string.
* Checks whether the string "str" with length "length" is numeric. The value
* of allow_errors determines whether it's required to be entirely numeric, or
* just its prefix. Leading whitespace is allowed.
*
* The function returns 0 if the string did not contain a string; IS_LONG if
* the string contained a number that fits in the integer range and IS_DOUBLE
* in case it did not. The long value is returned into the pointer *lval if
* that pointer was not NULL or into the pointer *dval if that pointer was not
* NULL.
* The function returns 0 if the string did not contain a valid number; IS_LONG
* if it contained a number that fits within the range of a long; or IS_DOUBLE
* if the number was out of long range or contained a decimal point/exponent.
* The number's value is returned into the respective pointer, *lval or *dval,
* if that pointer is not NULL.
*/
static inline zend_bool is_numeric_string(char *str, int length, long *lval, double *dval, int allow_errors)

static inline zend_uchar is_numeric_string(const char *str, int length, long *lval, double *dval, int allow_errors)
{
long local_lval;
const char *ptr;
int base = 10, digits = 0, dp_or_e = 0;
double local_dval;
char *end_ptr_long, *end_ptr_double;
int conv_base=10;
zend_uchar type;

if (!length) {
return 0;
}

/* handle hex numbers */
if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
conv_base=16;
/* Skip any whitespace
* This is much faster than the isspace() function */
while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
str++;
length--;
}
errno=0;
local_lval = strtol(str, &end_ptr_long, conv_base);
if (errno!=ERANGE) {
if (end_ptr_long == str+length) { /* integer string */
if (lval) {
*lval = local_lval;
ptr = str;

if (*ptr == '-' || *ptr == '+') {
ptr++;
}
return IS_LONG;
} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
return 0;

if (ZEND_IS_DIGIT(*ptr)) {
/* Handle hex numbers
* str is used instead of ptr to disallow signs and keep old behavior */
if (length > 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
base = 16;
ptr += 2;
}
} else {
end_ptr_long=NULL;

/* Skip any leading 0s */
while (*ptr == '0') {
ptr++;
}

if (conv_base==16) { /* hex string, under UNIX strtod() messes it up */
return 0;
/* Count the number of digits. If a decimal point/exponent is found,
* it's a double. Otherwise, if there's a dval or no need to check for
* a full match, stop when there are too many digits for a long */
for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
check_digits:
if (ZEND_IS_DIGIT(*ptr) || (base == 16 && ZEND_IS_XDIGIT(*ptr))) {
continue;
} else if (base == 10) {
if (*ptr == '.' && dp_or_e < 1) {
goto process_double;
} else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2) {
const char *e = ptr + 1;

if (*e == '-' || *e == '+') {
ptr = e++;
}
if (ZEND_IS_DIGIT(*e)) {
goto process_double;
}
}
}

errno=0;
local_dval = zend_strtod(str, &end_ptr_double);
if (errno != ERANGE) {
if (end_ptr_double == str+length) { /* floating point string */
if (!zend_finite(local_dval)) {
/* "inf","nan" and maybe other weird ones */
return 0;
break;
}

if (base == 10) {
if (digits >= MAX_LENGTH_OF_LONG) {
dp_or_e = -1;
goto process_double;
}
} else if (!(digits < SIZEOF_LONG * 2 || (digits == SIZEOF_LONG * 2 && ptr[-digits] <= '7'))) {
if (dval) {
*dval = local_dval;
local_dval = zend_hex_strtod(str, (char **)&ptr);
}
return IS_DOUBLE;
type = IS_DOUBLE;
}
} else if (*ptr == '.' && ZEND_IS_DIGIT(ptr[1])) {
process_double:
type = IS_DOUBLE;

/* If there's a dval, do the conversion; else continue checking
* the digits if we need to check for a full match */
if (dval) {
local_dval = zend_strtod(str, (char **)&ptr);
} else if (allow_errors != 1 && dp_or_e != -1) {
dp_or_e = (*ptr++ == '.') ? 1 : 2;
goto check_digits;
}
} else {
end_ptr_double=NULL;
return 0;
}

if (ptr != str + length) {
if (!allow_errors) {
return 0;
}
if (allow_errors == -1) {
zend_error(E_NOTICE, "A non well formed numeric value encountered");
}
}

if (type == IS_LONG) {
if (digits == MAX_LENGTH_OF_LONG - 1) {
int cmp = strcmp(&ptr[-digits], long_min_digits);

if (!(cmp < 0 || (cmp == 0 && *str == '-'))) {
if (dval) {
*dval = zend_strtod(str, NULL);
}

if (end_ptr_double>end_ptr_long && dval) {
*dval = local_dval;
return IS_DOUBLE;
} else if (end_ptr_long && lval) {
*lval = local_lval;
}
}

if (lval) {
*lval = strtol(str, NULL, base);
}

return IS_LONG;
} else {
if (dval) {
*dval = local_dval;
}

return IS_DOUBLE;
}
return 0;
}

static inline char *
Expand Down
33 changes: 33 additions & 0 deletions Zend/zend_strtod.c
Expand Up @@ -2557,6 +2557,39 @@ ZEND_API double zend_strtod (CONST char *s00, char **se)
return result;
}

ZEND_API double zend_hex_strtod(const char *str, char **endptr)
{
const char *s = str;
char c;
int any = 0;
double value = 0;

if (*s == '0' && (s[1] == 'x' || s[1] == 'X')) {
s += 2;
}

while (c = *s++) {
if (c >= '0' && c <= '9') {
c -= '0';
} else if (c >= 'A' && c <= 'F') {
c -= 'A' - 10;
} else if (c >= 'a' && c <= 'f') {
c -= 'a' - 10;
} else {
break;
}

any = 1;
value = value * 16 + c;
}

if (endptr != NULL) {
*endptr = (char *)(any ? s - 1 : str);
}

return value;
}

/*
* Local variables:
* tab-width: 4
Expand Down

0 comments on commit 0accb4b

Please sign in to comment.