From 56e5ef203b01ac8dfb1cb46143f6f7c53237b79d Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 22 Feb 2014 16:39:45 +0200 Subject: [PATCH] parse: Refactor parse node encoding to support full range of small ints. Based on suggestion by @dpgeorge at https://github.com/micropython/micropython/pull/313 --- py/compile.c | 37 +++++++++++++++----------- py/emitinlinethumb.c | 2 +- py/parse.c | 11 +++++--- py/parse.h | 56 ++++++++++++++++++++------------------- tests/basics/int-small.py | 24 +++++++++++++++++ 5 files changed, 84 insertions(+), 46 deletions(-) diff --git a/py/compile.c b/py/compile.c index ef0130463dac..9be90a601f29 100644 --- a/py/compile.c +++ b/py/compile.c @@ -86,8 +86,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) { switch (MP_PARSE_NODE_STRUCT_KIND(pns)) { case PN_shift_expr: if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { - int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]); - int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]); + int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]); if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_DBL_LESS)) { #if MICROPY_EMIT_CPYTHON // can overflow; enabled only to compare with CPython @@ -105,8 +105,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) { case PN_arith_expr: // overflow checking here relies on SMALL_INT being strictly smaller than machine_int_t if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { - machine_int_t arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]); - machine_int_t arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + machine_int_t arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]); + machine_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]); machine_int_t res; if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_PLUS)) { res = arg0 + arg1; @@ -125,8 +125,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) { case PN_term: if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { - int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]); - int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]); + int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]); if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_STAR)) { #if MICROPY_EMIT_CPYTHON // can overflow; enabled only to compare with CPython @@ -149,7 +149,7 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) { case PN_factor_2: if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) { - machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pns->nodes[1]); + machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[1]); if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_PLUS)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg); } else if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_MINUS)) { @@ -169,10 +169,10 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) { if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_NULL(pns->nodes[1]) && !MP_PARSE_NODE_IS_NULL(pns->nodes[2])) { mp_parse_node_struct_t* pns2 = (mp_parse_node_struct_t*)pns->nodes[2]; if (MP_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) { - int power = MP_PARSE_NODE_LEAF_ARG(pns2->nodes[0]); + int power = MP_PARSE_NODE_LEAF_SMALL_INT(pns2->nodes[0]); if (power >= 0) { int ans = 1; - int base = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + int base = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]); for (; power > 0; power--) { ans *= base; } @@ -320,10 +320,14 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) { STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) { assert(MP_PARSE_NODE_IS_LEAF(pn)); + if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { + vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn)); + return; + } + int arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: assert(0); - case MP_PARSE_NODE_SMALL_INT: vstr_printf(vstr, "%d", arg); break; case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: cpython_c_print_quoted_str(vstr, arg, false); break; @@ -421,11 +425,11 @@ void compile_generic_tuple(compiler_t *comp, mp_parse_node_struct_t *pns) { STATIC bool node_is_const_false(mp_parse_node_t pn) { return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_FALSE); - // untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1); + // untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 0); } STATIC bool node_is_const_true(mp_parse_node_t pn) { - return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1); + return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 1); } #if MICROPY_EMIT_CPYTHON @@ -1464,7 +1468,8 @@ void compile_for_stmt_optimised_range(compiler_t *comp, mp_parse_node_t pn_var, // compile: if var end: goto top compile_node(comp, pn_var); compile_node(comp, pn_end); - if (MP_PARSE_NODE_LEAF_ARG(pn_step) >= 0) { + assert(MP_PARSE_NODE_IS_SMALL_INT(pn_step)); + if (MP_PARSE_NODE_LEAF_SMALL_INT(pn_step) >= 0) { EMIT_ARG(binary_op, RT_BINARY_OP_LESS); } else { EMIT_ARG(binary_op, RT_BINARY_OP_MORE); @@ -2514,11 +2519,13 @@ STATIC compile_function_t compile_function[] = { void compile_node(compiler_t *comp, mp_parse_node_t pn) { if (MP_PARSE_NODE_IS_NULL(pn)) { // pass + } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { + machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + EMIT_ARG(load_const_small_int, arg); } else if (MP_PARSE_NODE_IS_LEAF(pn)) { - machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn); + machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break; - case MP_PARSE_NODE_SMALL_INT: EMIT_ARG(load_const_small_int, arg); break; case MP_PARSE_NODE_INTEGER: EMIT_ARG(load_const_int, arg); break; case MP_PARSE_NODE_DECIMAL: EMIT_ARG(load_const_dec, arg); break; case MP_PARSE_NODE_STRING: EMIT_ARG(load_const_str, arg, false); break; diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 8699b48bd759..675ed1efc893 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -110,7 +110,7 @@ STATIC int get_arg_i(qstr op, mp_parse_node_t *pn_args, int wanted_arg_num, int printf("SyntaxError: '%s' expects an integer in position %d\n", qstr_str(op), wanted_arg_num); return 0; } - int i = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]); + int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn_args[wanted_arg_num]); if ((i & (~fit_mask)) != 0) { printf("SyntaxError: '%s' integer 0x%x does not fit in mask 0x%x\n", qstr_str(op), i, fit_mask); return 0; diff --git a/py/parse.c b/py/parse.c index 57d78a05b1f6..e70456e81482 100644 --- a/py/parse.c +++ b/py/parse.c @@ -125,7 +125,10 @@ STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *s } mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { - return (mp_parse_node_t)(kind | (arg << 4)); + if (kind == MP_PARSE_NODE_SMALL_INT) { + return (mp_parse_node_t)(kind | (arg << 1)); + } + return (mp_parse_node_t)(kind | (arg << 5)); } //int num_parse_nodes_allocated = 0; @@ -171,11 +174,13 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) { } if (MP_PARSE_NODE_IS_NULL(pn)) { printf("NULL\n"); + } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { + machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn); + printf("int(" INT_FMT ")\n", arg); } else if (MP_PARSE_NODE_IS_LEAF(pn)) { - machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn); + machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; - case MP_PARSE_NODE_SMALL_INT: printf("int(" INT_FMT ")\n", arg); break; case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; diff --git a/py/parse.h b/py/parse.h index 6e299ef69e11..6492f4d9e053 100644 --- a/py/parse.h +++ b/py/parse.h @@ -2,29 +2,30 @@ struct _mp_lexer_t; // a mp_parse_node_t is: // - 0000...0000: no node -// - xxxx...0001: an identifier; bits 4 and above are the qstr -// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement -// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value -// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value -// - xxxx...1001: a string; bits 4 and above are the qstr holding the value -// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value -// - xxxx...1101: a token; bits 4 and above are mp_token_kind_t -// - xxxx...xxx0: pointer to mp_parse_node_struct_t +// - xxxx...xxx1: a small integer; bits 1 and above are the signed value, 2's complement +// - xxxx...xx00: pointer to mp_parse_node_struct_t +// - xx...x00010: an identifier; bits 5 and above are the qstr +// - xx...x00110: an integer; bits 5 and above are the qstr holding the value +// - xx...x01010: a decimal; bits 5 and above are the qstr holding the value +// - xx...x01110: a string; bits 5 and above are the qstr holding the value +// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value +// - xx...x10110: a token; bits 5 and above are mp_token_kind_t -// makes sure the top 5 bits of x are all cleared (positive number) or all set (negavite number) +// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x) +// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number) // these macros can probably go somewhere else because they are used more than just in the parser -#define MP_UINT_HIGH_5_BITS (~((~((machine_uint_t)0)) >> 5)) +#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2)) // parser's small ints are different from VM small int -#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == MP_UINT_HIGH_5_BITS)) +#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS)) #define MP_PARSE_NODE_NULL (0) -#define MP_PARSE_NODE_ID (0x1) -#define MP_PARSE_NODE_SMALL_INT (0x3) -#define MP_PARSE_NODE_INTEGER (0x5) -#define MP_PARSE_NODE_DECIMAL (0x7) -#define MP_PARSE_NODE_STRING (0x9) -#define MP_PARSE_NODE_BYTES (0xb) -#define MP_PARSE_NODE_TOKEN (0xd) +#define MP_PARSE_NODE_SMALL_INT (0x1) +#define MP_PARSE_NODE_ID (0x02) +#define MP_PARSE_NODE_INTEGER (0x06) +#define MP_PARSE_NODE_DECIMAL (0x0a) +#define MP_PARSE_NODE_STRING (0x0e) +#define MP_PARSE_NODE_BYTES (0x12) +#define MP_PARSE_NODE_TOKEN (0x16) typedef machine_uint_t mp_parse_node_t; // must be pointer size @@ -38,18 +39,19 @@ typedef struct _mp_parse_node_struct_t { // some of these evaluate their argument more than once #define MP_PARSE_NODE_IS_NULL(pn) ((pn) == MP_PARSE_NODE_NULL) -#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 1) -#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0) -#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k)) +#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 3) +#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0) +#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k)) -#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == MP_PARSE_NODE_ID) -#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == MP_PARSE_NODE_SMALL_INT) -#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == MP_PARSE_NODE_TOKEN) -#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | (k << 4))) +#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0x1) == MP_PARSE_NODE_SMALL_INT) +#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0x1f) == MP_PARSE_NODE_ID) +#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0x1f) == MP_PARSE_NODE_TOKEN) +#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | ((k) << 5))) -#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf) +#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0x1f) // TODO should probably have int and uint versions of this macro -#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4) +#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_uint_t)(pn)) >> 5) +#define MP_PARSE_NODE_LEAF_SMALL_INT(pn) (((machine_int_t)(pn)) >> 1) #define MP_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff) #define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) diff --git a/tests/basics/int-small.py b/tests/basics/int-small.py index 53902c7e39b0..102dac8ae7b5 100644 --- a/tests/basics/int-small.py +++ b/tests/basics/int-small.py @@ -1,5 +1,29 @@ # This tests small int range for 32-bit machine +# Small ints are variable-length encoded in MicroPython, so first +# test that encoding works as expected. + +print(0) +print(1) +print(-1) +# Value is split in 7-bit "subwords", and taking into account that all +# ints in Python are signed, there're 6 bits of magnitude. So, around 2^6 +# there's "turning point" +print(63) +print(64) +print(65) +print(-63) +print(-64) +print(-65) +# Maximum values of small ints on 32-bit platform +print(1073741823) +# Per python semantics, lexical integer is without a sign (i.e. positive) +# and '-' is unary minus operation applied to it. That's why -1073741824 +# (min two-complement's negative value) is not allowed. +print(-1073741823) + +# Operations tests + a = 0x3fffff print(a) a *= 0x10