Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parse: Refactor parse node encoding to support full range of small ints. #314

Merged
merged 1 commit into from
Feb 22, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 22 additions & 15 deletions py/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
switch (MP_PARSE_NODE_STRUCT_KIND(pns)) {
case PN_shift_expr:
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_DBL_LESS)) {
#if MICROPY_EMIT_CPYTHON
// can overflow; enabled only to compare with CPython
Expand All @@ -105,8 +105,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
case PN_arith_expr:
// overflow checking here relies on SMALL_INT being strictly smaller than machine_int_t
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
machine_int_t arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
machine_int_t arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
machine_int_t arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
machine_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
machine_int_t res;
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_PLUS)) {
res = arg0 + arg1;
Expand All @@ -125,8 +125,8 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {

case PN_term:
if (n == 3 && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) {
int arg0 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_ARG(pns->nodes[2]);
int arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
int arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[2]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], MP_TOKEN_OP_STAR)) {
#if MICROPY_EMIT_CPYTHON
// can overflow; enabled only to compare with CPython
Expand All @@ -149,7 +149,7 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {

case PN_factor_2:
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pns->nodes[1]);
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[1]);
if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_PLUS)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg);
} else if (MP_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], MP_TOKEN_OP_MINUS)) {
Expand All @@ -169,10 +169,10 @@ mp_parse_node_t fold_constants(mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && MP_PARSE_NODE_IS_NULL(pns->nodes[1]) && !MP_PARSE_NODE_IS_NULL(pns->nodes[2])) {
mp_parse_node_struct_t* pns2 = (mp_parse_node_struct_t*)pns->nodes[2];
if (MP_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) {
int power = MP_PARSE_NODE_LEAF_ARG(pns2->nodes[0]);
int power = MP_PARSE_NODE_LEAF_SMALL_INT(pns2->nodes[0]);
if (power >= 0) {
int ans = 1;
int base = MP_PARSE_NODE_LEAF_ARG(pns->nodes[0]);
int base = MP_PARSE_NODE_LEAF_SMALL_INT(pns->nodes[0]);
for (; power > 0; power--) {
ans *= base;
}
Expand Down Expand Up @@ -320,10 +320,14 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {

STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) {
assert(MP_PARSE_NODE_IS_LEAF(pn));
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn));
return;
}

int arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: assert(0);
case MP_PARSE_NODE_SMALL_INT: vstr_printf(vstr, "%d", arg); break;
case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING: cpython_c_print_quoted_str(vstr, arg, false); break;
Expand Down Expand Up @@ -421,11 +425,11 @@ void compile_generic_tuple(compiler_t *comp, mp_parse_node_struct_t *pns) {

STATIC bool node_is_const_false(mp_parse_node_t pn) {
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_FALSE);
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
// untested: || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 0);
}

STATIC bool node_is_const_true(mp_parse_node_t pn) {
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_ARG(pn) == 1);
return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE) || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 1);
}

#if MICROPY_EMIT_CPYTHON
Expand Down Expand Up @@ -1464,7 +1468,8 @@ void compile_for_stmt_optimised_range(compiler_t *comp, mp_parse_node_t pn_var,
// compile: if var <cond> end: goto top
compile_node(comp, pn_var);
compile_node(comp, pn_end);
if (MP_PARSE_NODE_LEAF_ARG(pn_step) >= 0) {
assert(MP_PARSE_NODE_IS_SMALL_INT(pn_step));
if (MP_PARSE_NODE_LEAF_SMALL_INT(pn_step) >= 0) {
EMIT_ARG(binary_op, RT_BINARY_OP_LESS);
} else {
EMIT_ARG(binary_op, RT_BINARY_OP_MORE);
Expand Down Expand Up @@ -2514,11 +2519,13 @@ STATIC compile_function_t compile_function[] = {
void compile_node(compiler_t *comp, mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_NULL(pn)) {
// pass
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
EMIT_ARG(load_const_small_int, arg);
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break;
case MP_PARSE_NODE_SMALL_INT: EMIT_ARG(load_const_small_int, arg); break;
case MP_PARSE_NODE_INTEGER: EMIT_ARG(load_const_int, arg); break;
case MP_PARSE_NODE_DECIMAL: EMIT_ARG(load_const_dec, arg); break;
case MP_PARSE_NODE_STRING: EMIT_ARG(load_const_str, arg, false); break;
Expand Down
2 changes: 1 addition & 1 deletion py/emitinlinethumb.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ STATIC int get_arg_i(qstr op, mp_parse_node_t *pn_args, int wanted_arg_num, int
printf("SyntaxError: '%s' expects an integer in position %d\n", qstr_str(op), wanted_arg_num);
return 0;
}
int i = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]);
int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn_args[wanted_arg_num]);
if ((i & (~fit_mask)) != 0) {
printf("SyntaxError: '%s' integer 0x%x does not fit in mask 0x%x\n", qstr_str(op), i, fit_mask);
return 0;
Expand Down
11 changes: 8 additions & 3 deletions py/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *s
}

mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
return (mp_parse_node_t)(kind | (arg << 4));
if (kind == MP_PARSE_NODE_SMALL_INT) {
return (mp_parse_node_t)(kind | (arg << 1));
}
return (mp_parse_node_t)(kind | (arg << 5));
}

//int num_parse_nodes_allocated = 0;
Expand Down Expand Up @@ -171,11 +174,13 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
}
if (MP_PARSE_NODE_IS_NULL(pn)) {
printf("NULL\n");
} else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
printf("int(" INT_FMT ")\n", arg);
} else if (MP_PARSE_NODE_IS_LEAF(pn)) {
machine_int_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_SMALL_INT: printf("int(" INT_FMT ")\n", arg); break;
case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
Expand Down
56 changes: 29 additions & 27 deletions py/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,30 @@ struct _mp_lexer_t;

// a mp_parse_node_t is:
// - 0000...0000: no node
// - xxxx...0001: an identifier; bits 4 and above are the qstr
// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement
// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value
// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value
// - xxxx...1001: a string; bits 4 and above are the qstr holding the value
// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value
// - xxxx...1101: a token; bits 4 and above are mp_token_kind_t
// - xxxx...xxx0: pointer to mp_parse_node_struct_t
// - xxxx...xxx1: a small integer; bits 1 and above are the signed value, 2's complement
// - xxxx...xx00: pointer to mp_parse_node_struct_t
// - xx...x00010: an identifier; bits 5 and above are the qstr
// - xx...x00110: an integer; bits 5 and above are the qstr holding the value
// - xx...x01010: a decimal; bits 5 and above are the qstr holding the value
// - xx...x01110: a string; bits 5 and above are the qstr holding the value
// - xx...x10010: a string with triple quotes; bits 5 and above are the qstr holding the value
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t

// makes sure the top 5 bits of x are all cleared (positive number) or all set (negavite number)
// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)
// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number)
// these macros can probably go somewhere else because they are used more than just in the parser
#define MP_UINT_HIGH_5_BITS (~((~((machine_uint_t)0)) >> 5))
#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2))
// parser's small ints are different from VM small int
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_5_BITS) == MP_UINT_HIGH_5_BITS))
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS))

#define MP_PARSE_NODE_NULL (0)
#define MP_PARSE_NODE_ID (0x1)
#define MP_PARSE_NODE_SMALL_INT (0x3)
#define MP_PARSE_NODE_INTEGER (0x5)
#define MP_PARSE_NODE_DECIMAL (0x7)
#define MP_PARSE_NODE_STRING (0x9)
#define MP_PARSE_NODE_BYTES (0xb)
#define MP_PARSE_NODE_TOKEN (0xd)
#define MP_PARSE_NODE_SMALL_INT (0x1)
#define MP_PARSE_NODE_ID (0x02)
#define MP_PARSE_NODE_INTEGER (0x06)
#define MP_PARSE_NODE_DECIMAL (0x0a)
#define MP_PARSE_NODE_STRING (0x0e)
#define MP_PARSE_NODE_BYTES (0x12)
#define MP_PARSE_NODE_TOKEN (0x16)

typedef machine_uint_t mp_parse_node_t; // must be pointer size

Expand All @@ -38,18 +39,19 @@ typedef struct _mp_parse_node_struct_t {
// some of these evaluate their argument more than once

#define MP_PARSE_NODE_IS_NULL(pn) ((pn) == MP_PARSE_NODE_NULL)
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 1)
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0)
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 1) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))
#define MP_PARSE_NODE_IS_LEAF(pn) ((pn) & 3)
#define MP_PARSE_NODE_IS_STRUCT(pn) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0)
#define MP_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != MP_PARSE_NODE_NULL && ((pn) & 3) == 0 && MP_PARSE_NODE_STRUCT_KIND((mp_parse_node_struct_t*)(pn)) == (k))

#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == MP_PARSE_NODE_ID)
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == MP_PARSE_NODE_SMALL_INT)
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == MP_PARSE_NODE_TOKEN)
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | (k << 4)))
#define MP_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0x1) == MP_PARSE_NODE_SMALL_INT)
#define MP_PARSE_NODE_IS_ID(pn) (((pn) & 0x1f) == MP_PARSE_NODE_ID)
#define MP_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0x1f) == MP_PARSE_NODE_TOKEN)
#define MP_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (MP_PARSE_NODE_TOKEN | ((k) << 5)))

#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf)
#define MP_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0x1f)
// TODO should probably have int and uint versions of this macro
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4)
#define MP_PARSE_NODE_LEAF_ARG(pn) (((machine_uint_t)(pn)) >> 5)
#define MP_PARSE_NODE_LEAF_SMALL_INT(pn) (((machine_int_t)(pn)) >> 1)
#define MP_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff)
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)

Expand Down
24 changes: 24 additions & 0 deletions tests/basics/int-small.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
# This tests small int range for 32-bit machine

# Small ints are variable-length encoded in MicroPython, so first
# test that encoding works as expected.

print(0)
print(1)
print(-1)
# Value is split in 7-bit "subwords", and taking into account that all
# ints in Python are signed, there're 6 bits of magnitude. So, around 2^6
# there's "turning point"
print(63)
print(64)
print(65)
print(-63)
print(-64)
print(-65)
# Maximum values of small ints on 32-bit platform
print(1073741823)
# Per python semantics, lexical integer is without a sign (i.e. positive)
# and '-' is unary minus operation applied to it. That's why -1073741824
# (min two-complement's negative value) is not allowed.
print(-1073741823)

# Operations tests

a = 0x3fffff
print(a)
a *= 0x10
Expand Down