From a1ec76eeaf03ef806ba6a2907bc4f1af44571e1b Mon Sep 17 00:00:00 2001 From: Theodore Brown Date: Thu, 2 May 2019 12:10:19 -0500 Subject: [PATCH] Implement numeric literal separators RFC: https://wiki.php.net/rfc/numeric_literal_separator --- Zend/tests/numeric_literal_separator_001.phpt | 19 +++ Zend/tests/numeric_literal_separator_002.phpt | 7 + Zend/tests/numeric_literal_separator_003.phpt | 7 + Zend/tests/numeric_literal_separator_004.phpt | 7 + Zend/tests/numeric_literal_separator_005.phpt | 7 + Zend/tests/numeric_literal_separator_006.phpt | 7 + Zend/tests/numeric_literal_separator_007.phpt | 7 + Zend/tests/numeric_literal_separator_008.phpt | 7 + Zend/tests/numeric_literal_separator_009.phpt | 7 + Zend/zend_language_scanner.l | 141 ++++++++++++++---- 10 files changed, 188 insertions(+), 28 deletions(-) create mode 100644 Zend/tests/numeric_literal_separator_001.phpt create mode 100644 Zend/tests/numeric_literal_separator_002.phpt create mode 100644 Zend/tests/numeric_literal_separator_003.phpt create mode 100644 Zend/tests/numeric_literal_separator_004.phpt create mode 100644 Zend/tests/numeric_literal_separator_005.phpt create mode 100644 Zend/tests/numeric_literal_separator_006.phpt create mode 100644 Zend/tests/numeric_literal_separator_007.phpt create mode 100644 Zend/tests/numeric_literal_separator_008.phpt create mode 100644 Zend/tests/numeric_literal_separator_009.phpt diff --git a/Zend/tests/numeric_literal_separator_001.phpt b/Zend/tests/numeric_literal_separator_001.phpt new file mode 100644 index 0000000000000..9d52c1d78ddae --- /dev/null +++ b/Zend/tests/numeric_literal_separator_001.phpt @@ -0,0 +1,19 @@ +--TEST-- +Valid use of numeric literal separator +--FILE-- +{BNUM} { - char *bin = yytext + 2; /* Skip "0b" */ - int len = yyleng - 2; - char *end; + /* The +/- 2 skips "0b" */ + int len = yyleng - 2, contains_underscores, i; + char *end, *bin = yytext + 2; /* Skip any leading 0s */ - while (*bin == '0') { + while (*bin == '0' || *bin == '_') { ++bin; --len; } + for (i = 0; i < len && bin[i] != '_'; ++i); + + contains_underscores = i != len; + + if (contains_underscores) { + bin = estrndup(bin, len); + strip_underscores(bin, &len); + } + if (len < SIZEOF_ZEND_LONG * 8) { if (len == 0) { ZVAL_LONG(zendlval, 0); } else { errno = 0; ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2)); - ZEND_ASSERT(!errno && end == yytext + yyleng); + ZEND_ASSERT(!errno && end == bin + len); + } + if (contains_underscores) { + efree(bin); } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ - ZEND_ASSERT(end == yytext + yyleng); + ZEND_ASSERT(end == bin + len); + if (contains_underscores) { + efree(bin); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } } {LNUM} { - char *end; - if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ + int len = yyleng, contains_underscores, i; + char *end, *lnum = yytext; + + for (i = 0; i < len && lnum[i] != '_'; ++i); + + contains_underscores = i != len; + + if (contains_underscores) { + lnum = estrndup(lnum, len); + strip_underscores(lnum, &len); + } + + if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ errno = 0; /* base must be passed explicitly for correct parse error on Windows */ - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10)); /* This isn't an assert, we need to ensure 019 isn't valid octal * Because the lexing itself doesn't do that for us */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } @@ -1806,29 +1850,38 @@ NEWLINE ("\r"|"\n"|"\r\n") } } else { errno = 0; - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0)); if (errno == ERANGE) { /* Overflow */ errno = 0; - if (yytext[0] == '0') { /* octal overflow */ - ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end)); + if (lnum[0] == '0') { /* octal overflow */ + ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end)); } else { - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end)); + ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end)); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } } + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } @@ -1836,18 +1889,30 @@ NEWLINE ("\r"|"\n"|"\r\n") } } ZEND_ASSERT(!errno); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } {HNUM} { - char *hex = yytext + 2; /* Skip "0x" */ - int len = yyleng - 2; - char *end; + /* The +/- 2 skips "0x" */ + int len = yyleng - 2, contains_underscores, i; + char *end, *hex = yytext + 2; /* Skip any leading 0s */ - while (*hex == '0') { - hex++; - len--; + while (*hex == '0' || *hex == '_') { + ++hex; + --len; + } + + for (i = 0; i < len && hex[i] != '_'; ++i); + + contains_underscores = i != len; + + if (contains_underscores) { + hex = estrndup(hex, len); + strip_underscores(hex, &len); } if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) { @@ -1858,11 +1923,17 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16)); ZEND_ASSERT(!errno && end == hex + len); } + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == hex + len); + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } } @@ -1894,10 +1965,24 @@ string: {DNUM}|{EXPONENT_DNUM} { const char *end; + int len = yyleng, contains_underscores, i; + char *dnum = yytext; + + for (i = 0; i < len && dnum[i] != '_'; ++i); - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end)); + contains_underscores = i != len; + + if (contains_underscores) { + dnum = estrndup(dnum, len); + strip_underscores(dnum, &len); + } + + ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ - ZEND_ASSERT(end == yytext + yyleng); + ZEND_ASSERT(end == dnum + len); + if (contains_underscores) { + efree(dnum); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); }