Navigation Menu

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proposal] Bigint shorthand (123n) for GMP objects #5930

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 23 additions & 0 deletions Zend/tests/bigint/add.phpt
@@ -0,0 +1,23 @@
--TEST--
Bigint addition tests
--SKIPIF--
<?php if (!extension_loaded('gmp')) { die('skip requires gmp'); } ?>
--FILE--
<?php
echo 1_2_3n+1n, "\n";
var_dump(0700n + 0004n);
echo 2n ** 100, "\n";
function sum_cube(GMP $a, GMP $b): GMP {
return $a ** 3 + $b ** 3;
}
echo sum_cube(3n, 9000000000000n), "\n";

?>
--EXPECTF--
124
object(GMP)#1 (1) {
["num"]=>
string(3) "452"
}
1267650600228229401496703205376
729000000000000000000000000000000000027
16 changes: 16 additions & 0 deletions Zend/tests/bigint/ast.phpt
@@ -0,0 +1,16 @@
--TEST--
Bigint AST representation in assert()
--SKIPIF--
<?php if (!extension_loaded('gmp')) { die('skip requires gmp'); } ?>
--FILE--
<?php
// The result is a GMP arbitrary precision number object, not an int
try {
assert(is_int(1+0n+01_23n));
} catch (AssertionError $e) {
echo "Caught " . $e->getMessage() . "\n";
}

?>
--EXPECTF--
Caught assert(is_int(1 + 0n + 01_23n))
25 changes: 25 additions & 0 deletions Zend/tests/bigint/parse.phpt
@@ -0,0 +1,25 @@
--TEST--
Bigint parsing tests
--SKIPIF--
<?php if (!extension_loaded('tokenizer')) { die('skip requires tokenizer'); } ?>
--FILE--
<?php
foreach (token_get_all('<?php return 1111111111111111111111111111111111111111111111111239n+012_345n+1n;?>') as $token) {
if (is_string($token)) {
printf("%s\n", $token);
continue;
}
printf("%s: '%s'\n", token_name($token[0]), $token[1]);
}
?>
--EXPECT--
T_OPEN_TAG: '<?php '
T_RETURN: 'return'
T_WHITESPACE: ' '
T_BIGINT: '1111111111111111111111111111111111111111111111111239n'
+
T_BIGINT: '012_345n'
+
T_BIGINT: '1n'
;
T_CLOSE_TAG: '?>'
9 changes: 9 additions & 0 deletions Zend/zend_ast.c
Expand Up @@ -1714,6 +1714,15 @@ static ZEND_COLD void zend_ast_export_ex(smart_str *str, zend_ast *ast, int prio
}
smart_str_appendc(str, '`');
break;
case ZEND_AST_BIGINT:
{
zval *zv;
ZEND_ASSERT(ast->child[0]->kind == ZEND_AST_ZVAL);
zv = zend_ast_get_zval(ast->child[0]);
ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING);
smart_str_append(str, Z_STR_P(zv));
}
break;
case ZEND_AST_CLONE:
PREFIX_OP("clone ", 270, 271);
case ZEND_AST_EXIT:
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_ast.h
Expand Up @@ -92,6 +92,7 @@ enum _zend_ast_kind {
ZEND_AST_POST_DEC,
ZEND_AST_YIELD_FROM,
ZEND_AST_CLASS_NAME,
ZEND_AST_BIGINT,

ZEND_AST_GLOBAL,
ZEND_AST_UNSET,
Expand Down
54 changes: 54 additions & 0 deletions Zend/zend_compile.c
Expand Up @@ -8770,6 +8770,57 @@ void zend_compile_shell_exec(znode *result, zend_ast *ast) /* {{{ */
}
/* }}} */

static void strip_underscores(char *str, size_t *len)
{
char *src = str, *dest = str;
while (*src != '\0') {
if (*src != '_') {
*dest = *src;
dest++;
} else {
--(*len);
}
src++;
}
*dest = '\0';
}


void zend_compile_bigint(znode *result, zend_ast *ast) /* {{{ */
{
zend_ast *expr_ast = ast->child[0];
zend_ast *new_expr_ast;
const zval *expr_zv = zend_ast_get_zval(expr_ast);
char* new_string = Z_STRVAL_P(expr_zv);
zval new_expr_zv;
size_t len = Z_STRLEN_P(expr_zv) - 1; /* Remove 'n' suffix */
zend_bool contains_underscores = (memchr(new_string, '_', len) != NULL);

zval fn_name;
zend_ast *name_ast, *args_ast, *call_ast;
ZEND_ASSERT(Z_TYPE_P(expr_zv) == IS_STRING);
if (contains_underscores) {
new_string = estrndup(new_string, len);
strip_underscores(new_string, &len);
}
ZVAL_STRINGL(&new_expr_zv, new_string, len);
if (contains_underscores) {
efree(new_string);
}

ZVAL_STRING(&fn_name, "gmp_init");
new_expr_ast = zend_ast_create_zval(&new_expr_zv);
name_ast = zend_ast_create_zval(&fn_name);
args_ast = zend_ast_create_list(1, ZEND_AST_ARG_LIST, new_expr_ast);
call_ast = zend_ast_create(ZEND_AST_CALL, name_ast, args_ast);

zend_compile_expr(result, call_ast);

zval_ptr_dtor(&fn_name);
zval_ptr_dtor(&new_expr_zv);
}
/* }}} */

void zend_compile_array(znode *result, zend_ast *ast) /* {{{ */
{
zend_ast_list *list = zend_ast_get_list(ast);
Expand Down Expand Up @@ -9558,6 +9609,9 @@ static void zend_compile_expr_inner(znode *result, zend_ast *ast) /* {{{ */
case ZEND_AST_SHELL_EXEC:
zend_compile_shell_exec(result, ast);
return;
case ZEND_AST_BIGINT:
zend_compile_bigint(result, ast);
return;
case ZEND_AST_ARRAY:
zend_compile_array(result, ast);
return;
Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_language_parser.y
Expand Up @@ -88,6 +88,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);

%token <ast> T_LNUMBER "integer"
%token <ast> T_DNUMBER "floating-point number"
%token <ast> T_BIGINT "arbitrary precision integer"
%token <ast> T_STRING "identifier"
%token <ast> T_NAME_FULLY_QUALIFIED "fully qualified name"
%token <ast> T_NAME_RELATIVE "namespace-relative name"
Expand Down Expand Up @@ -1116,6 +1117,7 @@ expr:
| T_EXIT exit_expr { $$ = zend_ast_create(ZEND_AST_EXIT, $2); }
| '@' expr { $$ = zend_ast_create(ZEND_AST_SILENCE, $2); }
| scalar { $$ = $1; }
| T_BIGINT { $$ = zend_ast_create(ZEND_AST_BIGINT, $1); }
| '`' backticks_expr '`' { $$ = zend_ast_create(ZEND_AST_SHELL_EXEC, $2); }
| T_PRINT expr { $$ = zend_ast_create(ZEND_AST_PRINT, $2); }
| T_YIELD { $$ = zend_ast_create(ZEND_AST_YIELD, NULL, NULL); CG(extra_fn_flags) |= ZEND_ACC_GENERATOR; }
Expand Down
27 changes: 27 additions & 0 deletions Zend/zend_language_scanner.l
Expand Up @@ -1971,6 +1971,33 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
}

/* Bigint support */
<ST_IN_SCRIPTING>{LNUM}"n" {
/* Length without the "n" suffix */
size_t len = yyleng - 1;
char *lnum = yytext;
zend_bool is_octal = lnum[0] == '0';

/* Digits 8 and 9 are illegal in octal literals. */
if (is_octal) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should throw instead

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does throw (this is C, not C++), and is based on the adjacent ordinary LNUM parsing. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);

For token_get_all, the choice to not throw without the TOKEN_PARSE flag is deliberate - tools such as php-parser couldn't work if token_get_all threw instead of returning the T_ERROR token.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean to not allow octals in bigints at all - reject any input starting with 0 except 0n

Copy link
Contributor Author

@TysonAndre TysonAndre Aug 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the token_get_all() implementation in ext/tokenizer/tokenizer.c clears this exception - I was wondering how it worked for ordinary numbers

		/* Normal token_get_all() should not throw. */
		zend_clear_exception();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean to not allow octals in bigints at all - reject any input starting with 0 except 0n

That makes no sense, the GMP extension accepts Hex, Octal and Binary numbers just fine

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But usecases vs. possible mistakes? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd find it more consistent to allow any integer token to have a bigint equivalent by adding the suffix n to it.

Possibly use cases would be cryptography with known hex values, arbitrary-precision math based on octal/hex/binary reference material, etc - e.g. https://en.wikipedia.org/wiki/Mersenne_prime

size_t i;
for (i = 0; i < len; i++) {
if (lnum[i] == '8' || lnum[i] == '9') {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
if (PARSER_MODE()) {
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}

/* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
len = i;
break;
}
}
}
RETURN_TOKEN_WITH_STR(T_BIGINT, 0);
}

<ST_IN_SCRIPTING>{LNUM} {
size_t len = yyleng;
char *end, *lnum = yytext;
Expand Down
2 changes: 2 additions & 0 deletions ext/tokenizer/tokenizer_data.c
Expand Up @@ -76,6 +76,7 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
REGISTER_LONG_CONSTANT("T_ELSE", T_ELSE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_LNUMBER", T_LNUMBER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_DNUMBER", T_DNUMBER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_BIGINT", T_BIGINT, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_STRING", T_STRING, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_NAME_FULLY_QUALIFIED", T_NAME_FULLY_QUALIFIED, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_NAME_RELATIVE", T_NAME_RELATIVE, CONST_CS | CONST_PERSISTENT);
Expand Down Expand Up @@ -225,6 +226,7 @@ char *get_token_type_name(int token_type)
case T_ELSE: return "T_ELSE";
case T_LNUMBER: return "T_LNUMBER";
case T_DNUMBER: return "T_DNUMBER";
case T_BIGINT: return "T_BIGINT";
case T_STRING: return "T_STRING";
case T_NAME_FULLY_QUALIFIED: return "T_NAME_FULLY_QUALIFIED";
case T_NAME_RELATIVE: return "T_NAME_RELATIVE";
Expand Down