Skip to content

Commit

Permalink
Implement numeric literal separators
Browse files Browse the repository at this point in the history
  • Loading branch information
theodorejb committed May 26, 2019
1 parent aa9433e commit a1ec76e
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 28 deletions.
19 changes: 19 additions & 0 deletions Zend/tests/numeric_literal_separator_001.phpt
@@ -0,0 +1,19 @@
--TEST--
Valid use of numeric literal separator
--FILE--
<?php
var_dump(299_792_458 === 299792458);
var_dump(135_00 === 13500);
var_dump(96_485.332_12 === 96485.33212);
var_dump(6.674_083e-11 === 6.674083e-11);
var_dump(0xCAFE_F00D === 0xCAFEF00D);
var_dump(0b0101_1111 === 0b01011111);
var_dump(024_044_120 === 024044120);
--EXPECT--
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_002.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: trailing underscore
--FILE--
<?php
100_;
--EXPECTF--
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_003.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: adjacent underscores
--FILE--
<?php
10__0;
--EXPECTF--
Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_004.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore left of period
--FILE--
<?php
100_.0;
--EXPECTF--
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_005.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore right of period
--FILE--
<?php
100._0;
--EXPECTF--
Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_006.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore next to 0x
--FILE--
<?php
0x_0123;
--EXPECTF--
Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_007.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore next to 0b
--FILE--
<?php
0b_0101;
--EXPECTF--
Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_008.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore left of e
--FILE--
<?php
1_e2;
--EXPECTF--
Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d
7 changes: 7 additions & 0 deletions Zend/tests/numeric_literal_separator_009.phpt
@@ -0,0 +1,7 @@
--TEST--
Invalid use: underscore right of e
--FILE--
<?php
1e_2;
--EXPECTF--
Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d
141 changes: 113 additions & 28 deletions Zend/zend_language_scanner.l
Expand Up @@ -120,6 +120,21 @@ do { \

BEGIN_EXTERN_C()

static void strip_underscores(char *str, int *len)
{
char *src = str, *dest = str;
while (*src != '\0') {
if (*src != '_') {
*dest = *src;
dest++;
} else {
--(*len);
}
src++;
}
*dest = '\0';
}

static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
Expand Down Expand Up @@ -1245,11 +1260,11 @@ restart:

/*!re2c
re2c:yyfill:check = 0;
LNUM [0-9]+
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
LNUM [0-9]+(_[0-9]+)*
DNUM (([0-9]+(_[0-9]+)*)*"."([0-9]+(_[0-9]+)*)+)|(([0-9]+(_[0-9]+)*)+"."([0-9]+(_[0-9]+)*)*)
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
HNUM "0x"[0-9a-fA-F]+
BNUM "0b"[01]+
HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
Expand Down Expand Up @@ -1760,94 +1775,144 @@ NEWLINE ("\r"|"\n"|"\r\n")
}

<ST_IN_SCRIPTING>{BNUM} {
char *bin = yytext + 2; /* Skip "0b" */
int len = yyleng - 2;
char *end;
/* The +/- 2 skips "0b" */
int len = yyleng - 2, contains_underscores, i;
char *end, *bin = yytext + 2;

/* Skip any leading 0s */
while (*bin == '0') {
while (*bin == '0' || *bin == '_') {
++bin;
--len;
}

for (i = 0; i < len && bin[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
bin = estrndup(bin, len);
strip_underscores(bin, &len);
}

if (len < SIZEOF_ZEND_LONG * 8) {
if (len == 0) {
ZVAL_LONG(zendlval, 0);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
ZEND_ASSERT(!errno && end == yytext + yyleng);
ZEND_ASSERT(!errno && end == bin + len);
}
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
ZEND_ASSERT(end == bin + len);
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}

<ST_IN_SCRIPTING>{LNUM} {
char *end;
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
int len = yyleng, contains_underscores, i;
char *end, *lnum = yytext;

for (i = 0; i < len && lnum[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
lnum = estrndup(lnum, len);
strip_underscores(lnum, &len);
}

if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
errno = 0;
/* base must be passed explicitly for correct parse error on Windows */
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
/* This isn't an assert, we need to ensure 019 isn't valid octal
* Because the lexing itself doesn't do that for us
*/
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
if (errno == ERANGE) { /* Overflow */
errno = 0;
if (yytext[0] == '0') { /* octal overflow */
ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
if (lnum[0] == '0') { /* octal overflow */
ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
} else {
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
ZEND_ASSERT(!errno);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}

<ST_IN_SCRIPTING>{HNUM} {
char *hex = yytext + 2; /* Skip "0x" */
int len = yyleng - 2;
char *end;
/* The +/- 2 skips "0x" */
int len = yyleng - 2, contains_underscores, i;
char *end, *hex = yytext + 2;

/* Skip any leading 0s */
while (*hex == '0') {
hex++;
len--;
while (*hex == '0' || *hex == '_') {
++hex;
--len;
}

for (i = 0; i < len && hex[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
hex = estrndup(hex, len);
strip_underscores(hex, &len);
}

if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
Expand All @@ -1858,11 +1923,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
Expand Down Expand Up @@ -1894,10 +1965,24 @@ string:

<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
const char *end;
int len = yyleng, contains_underscores, i;
char *dnum = yytext;

for (i = 0; i < len && dnum[i] != '_'; ++i);

ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
contains_underscores = i != len;

if (contains_underscores) {
dnum = estrndup(dnum, len);
strip_underscores(dnum, &len);
}

ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
ZEND_ASSERT(end == dnum + len);
if (contains_underscores) {
efree(dnum);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}

Expand Down

0 comments on commit a1ec76e

Please sign in to comment.