Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions Zend/tests/bigint/add.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--TEST--
Bigint addition tests
--SKIPIF--
<?php if (!extension_loaded('gmp')) { die('skip requires gmp'); } ?>
--FILE--
<?php
echo 1_2_3n+1n, "\n";
var_dump(0700n + 0004n);
echo 2n ** 100, "\n";
function sum_cube(GMP $a, GMP $b): GMP {
return $a ** 3 + $b ** 3;
}
echo sum_cube(3n, 9000000000000n), "\n";

?>
--EXPECTF--
124
object(GMP)#1 (1) {
["num"]=>
string(3) "452"
}
1267650600228229401496703205376
729000000000000000000000000000000000027
16 changes: 16 additions & 0 deletions Zend/tests/bigint/ast.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
--TEST--
Bigint AST representation in assert()
--SKIPIF--
<?php if (!extension_loaded('gmp')) { die('skip requires gmp'); } ?>
--FILE--
<?php
// The result is a GMP arbitrary precision number object, not an int
try {
assert(is_int(1+0n+01_23n));
} catch (AssertionError $e) {
echo "Caught " . $e->getMessage() . "\n";
}

?>
--EXPECTF--
Caught assert(is_int(1 + 0n + 01_23n))
25 changes: 25 additions & 0 deletions Zend/tests/bigint/parse.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
--TEST--
Bigint parsing tests
--SKIPIF--
<?php if (!extension_loaded('tokenizer')) { die('skip requires tokenizer'); } ?>
--FILE--
<?php
foreach (token_get_all('<?php return 1111111111111111111111111111111111111111111111111239n+012_345n+1n;?>') as $token) {
if (is_string($token)) {
printf("%s\n", $token);
continue;
}
printf("%s: '%s'\n", token_name($token[0]), $token[1]);
}
?>
--EXPECT--
T_OPEN_TAG: '<?php '
T_RETURN: 'return'
T_WHITESPACE: ' '
T_BIGINT: '1111111111111111111111111111111111111111111111111239n'
+
T_BIGINT: '012_345n'
+
T_BIGINT: '1n'
;
T_CLOSE_TAG: '?>'
9 changes: 9 additions & 0 deletions Zend/zend_ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -1714,6 +1714,15 @@ static ZEND_COLD void zend_ast_export_ex(smart_str *str, zend_ast *ast, int prio
}
smart_str_appendc(str, '`');
break;
case ZEND_AST_BIGINT:
{
zval *zv;
ZEND_ASSERT(ast->child[0]->kind == ZEND_AST_ZVAL);
zv = zend_ast_get_zval(ast->child[0]);
ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING);
smart_str_append(str, Z_STR_P(zv));
}
break;
case ZEND_AST_CLONE:
PREFIX_OP("clone ", 270, 271);
case ZEND_AST_EXIT:
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ enum _zend_ast_kind {
ZEND_AST_POST_DEC,
ZEND_AST_YIELD_FROM,
ZEND_AST_CLASS_NAME,
ZEND_AST_BIGINT,

ZEND_AST_GLOBAL,
ZEND_AST_UNSET,
Expand Down
54 changes: 54 additions & 0 deletions Zend/zend_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -8770,6 +8770,57 @@ void zend_compile_shell_exec(znode *result, zend_ast *ast) /* {{{ */
}
/* }}} */

static void strip_underscores(char *str, size_t *len)
{
char *src = str, *dest = str;
while (*src != '\0') {
if (*src != '_') {
*dest = *src;
dest++;
} else {
--(*len);
}
src++;
}
*dest = '\0';
}


void zend_compile_bigint(znode *result, zend_ast *ast) /* {{{ */
{
zend_ast *expr_ast = ast->child[0];
zend_ast *new_expr_ast;
const zval *expr_zv = zend_ast_get_zval(expr_ast);
char* new_string = Z_STRVAL_P(expr_zv);
zval new_expr_zv;
size_t len = Z_STRLEN_P(expr_zv) - 1; /* Remove 'n' suffix */
zend_bool contains_underscores = (memchr(new_string, '_', len) != NULL);

zval fn_name;
zend_ast *name_ast, *args_ast, *call_ast;
ZEND_ASSERT(Z_TYPE_P(expr_zv) == IS_STRING);
if (contains_underscores) {
new_string = estrndup(new_string, len);
strip_underscores(new_string, &len);
}
ZVAL_STRINGL(&new_expr_zv, new_string, len);
if (contains_underscores) {
efree(new_string);
}

ZVAL_STRING(&fn_name, "gmp_init");
new_expr_ast = zend_ast_create_zval(&new_expr_zv);
name_ast = zend_ast_create_zval(&fn_name);
args_ast = zend_ast_create_list(1, ZEND_AST_ARG_LIST, new_expr_ast);
call_ast = zend_ast_create(ZEND_AST_CALL, name_ast, args_ast);

zend_compile_expr(result, call_ast);

zval_ptr_dtor(&fn_name);
zval_ptr_dtor(&new_expr_zv);
}
/* }}} */

void zend_compile_array(znode *result, zend_ast *ast) /* {{{ */
{
zend_ast_list *list = zend_ast_get_list(ast);
Expand Down Expand Up @@ -9558,6 +9609,9 @@ static void zend_compile_expr_inner(znode *result, zend_ast *ast) /* {{{ */
case ZEND_AST_SHELL_EXEC:
zend_compile_shell_exec(result, ast);
return;
case ZEND_AST_BIGINT:
zend_compile_bigint(result, ast);
return;
case ZEND_AST_ARRAY:
zend_compile_array(result, ast);
return;
Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_language_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);

%token <ast> T_LNUMBER "integer"
%token <ast> T_DNUMBER "floating-point number"
%token <ast> T_BIGINT "arbitrary precision integer"
%token <ast> T_STRING "identifier"
%token <ast> T_NAME_FULLY_QUALIFIED "fully qualified name"
%token <ast> T_NAME_RELATIVE "namespace-relative name"
Expand Down Expand Up @@ -1116,6 +1117,7 @@ expr:
| T_EXIT exit_expr { $$ = zend_ast_create(ZEND_AST_EXIT, $2); }
| '@' expr { $$ = zend_ast_create(ZEND_AST_SILENCE, $2); }
| scalar { $$ = $1; }
| T_BIGINT { $$ = zend_ast_create(ZEND_AST_BIGINT, $1); }
| '`' backticks_expr '`' { $$ = zend_ast_create(ZEND_AST_SHELL_EXEC, $2); }
| T_PRINT expr { $$ = zend_ast_create(ZEND_AST_PRINT, $2); }
| T_YIELD { $$ = zend_ast_create(ZEND_AST_YIELD, NULL, NULL); CG(extra_fn_flags) |= ZEND_ACC_GENERATOR; }
Expand Down
27 changes: 27 additions & 0 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -1971,6 +1971,33 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
}

/* Bigint support */
<ST_IN_SCRIPTING>{LNUM}"n" {
/* Length without the "n" suffix */
size_t len = yyleng - 1;
char *lnum = yytext;
zend_bool is_octal = lnum[0] == '0';

/* Digits 8 and 9 are illegal in octal literals. */
if (is_octal) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should throw instead

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does throw (this is C, not C++), and is based on the adjacent ordinary LNUM parsing. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);

For token_get_all, the choice to not throw without the TOKEN_PARSE flag is deliberate - tools such as php-parser couldn't work if token_get_all threw instead of returning the T_ERROR token.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean to not allow octals in bigints at all - reject any input starting with 0 except 0n

Copy link
Contributor Author

@TysonAndre TysonAndre Aug 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the token_get_all() implementation in ext/tokenizer/tokenizer.c clears this exception - I was wondering how it worked for ordinary numbers

		/* Normal token_get_all() should not throw. */
		zend_clear_exception();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean to not allow octals in bigints at all - reject any input starting with 0 except 0n

That makes no sense, the GMP extension accepts Hex, Octal and Binary numbers just fine

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But usecases vs. possible mistakes? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd find it more consistent to allow any integer token to have a bigint equivalent by adding the suffix n to it.

Possibly use cases would be cryptography with known hex values, arbitrary-precision math based on octal/hex/binary reference material, etc - e.g. https://en.wikipedia.org/wiki/Mersenne_prime

size_t i;
for (i = 0; i < len; i++) {
if (lnum[i] == '8' || lnum[i] == '9') {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
if (PARSER_MODE()) {
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}

/* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
len = i;
break;
}
}
}
RETURN_TOKEN_WITH_STR(T_BIGINT, 0);
}

<ST_IN_SCRIPTING>{LNUM} {
size_t len = yyleng;
char *end, *lnum = yytext;
Expand Down
2 changes: 2 additions & 0 deletions ext/tokenizer/tokenizer_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
REGISTER_LONG_CONSTANT("T_ELSE", T_ELSE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_LNUMBER", T_LNUMBER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_DNUMBER", T_DNUMBER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_BIGINT", T_BIGINT, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_STRING", T_STRING, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_NAME_FULLY_QUALIFIED", T_NAME_FULLY_QUALIFIED, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_NAME_RELATIVE", T_NAME_RELATIVE, CONST_CS | CONST_PERSISTENT);
Expand Down Expand Up @@ -225,6 +226,7 @@ char *get_token_type_name(int token_type)
case T_ELSE: return "T_ELSE";
case T_LNUMBER: return "T_LNUMBER";
case T_DNUMBER: return "T_DNUMBER";
case T_BIGINT: return "T_BIGINT";
case T_STRING: return "T_STRING";
case T_NAME_FULLY_QUALIFIED: return "T_NAME_FULLY_QUALIFIED";
case T_NAME_RELATIVE: return "T_NAME_RELATIVE";
Expand Down