Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistent bytecode v2 #1577

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
104 changes: 60 additions & 44 deletions py/bc.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "py/nlr.h"
#include "py/objfun.h"
#include "py/runtime0.h"
#include "py/bc.h"

#if 0 // print debugging info
Expand Down Expand Up @@ -84,21 +85,31 @@ STATIC void dump_args(const mp_obj_t *a, mp_uint_t sz) {

// On entry code_state should be allocated somewhere (stack/heap) and
// contain the following valid entries:
// - code_state->code_info should be the offset in bytes from the start of
// the bytecode chunk to the start of the code-info within the bytecode
// - code_state->ip should contain the offset in bytes from the start of
// the bytecode chunk to the start of the prelude within the bytecode
// the bytecode chunk to just after n_state and n_exc_stack
// - code_state->n_state should be set to the state size (locals plus stack)
void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
// usage for the common case of positional only args.
mp_obj_fun_bc_t *self = self_in;
mp_uint_t n_state = code_state->n_state;

// ip comes in as an offset into bytecode, so turn it into a true pointer
code_state->ip = self->bytecode + (mp_uint_t)code_state->ip;

// store pointer to constant table
code_state->const_table = self->const_table;

#if MICROPY_STACKLESS
code_state->prev = NULL;
#endif
code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info;

// get params
mp_uint_t scope_flags = *code_state->ip++;
mp_uint_t n_pos_args = *code_state->ip++;
mp_uint_t n_kwonly_args = *code_state->ip++;
mp_uint_t n_def_pos_args = *code_state->ip++;

code_state->sp = &code_state->state[0] - 1;
code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1;

Expand All @@ -108,33 +119,33 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t
const mp_obj_t *kwargs = args + n_args;

// var_pos_kw_args points to the stack where the var-args tuple, and var-kw dict, should go (if they are needed)
mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - self->n_pos_args - self->n_kwonly_args];
mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - n_pos_args - n_kwonly_args];

// check positional arguments

if (n_args > self->n_pos_args) {
if (n_args > n_pos_args) {
// given more than enough arguments
if (!self->takes_var_args) {
fun_pos_args_mismatch(self, self->n_pos_args, n_args);
if ((scope_flags & MP_SCOPE_FLAG_VARARGS) == 0) {
fun_pos_args_mismatch(self, n_pos_args, n_args);
}
// put extra arguments in varargs tuple
*var_pos_kw_args-- = mp_obj_new_tuple(n_args - self->n_pos_args, args + self->n_pos_args);
n_args = self->n_pos_args;
*var_pos_kw_args-- = mp_obj_new_tuple(n_args - n_pos_args, args + n_pos_args);
n_args = n_pos_args;
} else {
if (self->takes_var_args) {
if ((scope_flags & MP_SCOPE_FLAG_VARARGS) != 0) {
DEBUG_printf("passing empty tuple as *args\n");
*var_pos_kw_args-- = mp_const_empty_tuple;
}
// Apply processing and check below only if we don't have kwargs,
// otherwise, kw handling code below has own extensive checks.
if (n_kw == 0 && !self->has_def_kw_args) {
if (n_args >= (mp_uint_t)(self->n_pos_args - self->n_def_args)) {
if (n_kw == 0 && (scope_flags & MP_SCOPE_FLAG_DEFKWARGS) == 0) {
if (n_args >= (mp_uint_t)(n_pos_args - n_def_pos_args)) {
// given enough arguments, but may need to use some default arguments
for (mp_uint_t i = n_args; i < self->n_pos_args; i++) {
code_state->state[n_state - 1 - i] = self->extra_args[i - (self->n_pos_args - self->n_def_args)];
for (mp_uint_t i = n_args; i < n_pos_args; i++) {
code_state->state[n_state - 1 - i] = self->extra_args[i - (n_pos_args - n_def_pos_args)];
}
} else {
fun_pos_args_mismatch(self, self->n_pos_args - self->n_def_args, n_args);
fun_pos_args_mismatch(self, n_pos_args - n_def_pos_args, n_args);
}
}
}
Expand All @@ -146,27 +157,22 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t

// check keyword arguments

if (n_kw != 0 || self->has_def_kw_args) {
if (n_kw != 0 || (scope_flags & MP_SCOPE_FLAG_DEFKWARGS) != 0) {
DEBUG_printf("Initial args: ");
dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args);
dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args);

mp_obj_t dict = MP_OBJ_NULL;
if (self->takes_kw_args) {
if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0) {
dict = mp_obj_new_dict(n_kw); // TODO: better go conservative with 0?
*var_pos_kw_args = dict;
}

// get pointer to arg_names array at start of bytecode prelude
const mp_obj_t *arg_names;
{
const byte *code_info = code_state->code_info;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size);
}
// get pointer to arg_names array
const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table;

for (mp_uint_t i = 0; i < n_kw; i++) {
mp_obj_t wanted_arg_name = kwargs[2 * i];
for (mp_uint_t j = 0; j < self->n_pos_args + self->n_kwonly_args; j++) {
for (mp_uint_t j = 0; j < n_pos_args + n_kwonly_args; j++) {
if (wanted_arg_name == arg_names[j]) {
if (code_state->state[n_state - 1 - j] != MP_OBJ_NULL) {
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
Expand All @@ -177,27 +183,27 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t
}
}
// Didn't find name match with positional args
if (!self->takes_kw_args) {
if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) == 0) {
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "function does not take keyword arguments"));
}
mp_obj_dict_store(dict, kwargs[2 * i], kwargs[2 * i + 1]);
continue2:;
}

DEBUG_printf("Args with kws flattened: ");
dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args);
dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args);

// fill in defaults for positional args
mp_obj_t *d = &code_state->state[n_state - self->n_pos_args];
mp_obj_t *s = &self->extra_args[self->n_def_args - 1];
for (mp_uint_t i = self->n_def_args; i > 0; i--, d++, s--) {
mp_obj_t *d = &code_state->state[n_state - n_pos_args];
mp_obj_t *s = &self->extra_args[n_def_pos_args - 1];
for (mp_uint_t i = n_def_pos_args; i > 0; i--, d++, s--) {
if (*d == MP_OBJ_NULL) {
*d = *s;
}
}

DEBUG_printf("Args after filling default positional: ");
dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args);
dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args);

// Check that all mandatory positional args are specified
while (d < &code_state->state[n_state]) {
Expand All @@ -209,34 +215,44 @@ continue2:;

// Check that all mandatory keyword args are specified
// Fill in default kw args if we have them
for (mp_uint_t i = 0; i < self->n_kwonly_args; i++) {
if (code_state->state[n_state - 1 - self->n_pos_args - i] == MP_OBJ_NULL) {
for (mp_uint_t i = 0; i < n_kwonly_args; i++) {
if (code_state->state[n_state - 1 - n_pos_args - i] == MP_OBJ_NULL) {
mp_map_elem_t *elem = NULL;
if (self->has_def_kw_args) {
elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[self->n_def_args])->map, arg_names[self->n_pos_args + i], MP_MAP_LOOKUP);
if ((scope_flags & MP_SCOPE_FLAG_DEFKWARGS) != 0) {
elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[n_def_pos_args])->map, arg_names[n_pos_args + i], MP_MAP_LOOKUP);
}
if (elem != NULL) {
code_state->state[n_state - 1 - self->n_pos_args - i] = elem->value;
code_state->state[n_state - 1 - n_pos_args - i] = elem->value;
} else {
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
"function missing required keyword argument '%q'", MP_OBJ_QSTR_VALUE(arg_names[self->n_pos_args + i])));
"function missing required keyword argument '%q'", MP_OBJ_QSTR_VALUE(arg_names[n_pos_args + i])));
}
}
}

} else {
// no keyword arguments given
if (self->n_kwonly_args != 0) {
if (n_kwonly_args != 0) {
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError,
"function missing keyword-only argument"));
}
if (self->takes_kw_args) {
if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0) {
*var_pos_kw_args = mp_obj_new_dict(0);
}
}

// get the ip and skip argument names
const byte *ip = code_state->ip;

// store pointer to code_info and jump over it
{
code_state->code_info = ip;
const byte *ip2 = ip;
mp_uint_t code_info_size = mp_decode_uint(&ip2);
ip += code_info_size;
}

// bytecode prelude: initialise closed over variables
const byte *ip = self->bytecode + (mp_uint_t)code_state->ip;
mp_uint_t local_num;
while ((local_num = *ip++) != 255) {
code_state->state[n_state - 1 - local_num] =
Expand All @@ -246,7 +262,7 @@ continue2:;
// now that we skipped over the prelude, set the ip for the VM
code_state->ip = ip;

DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", self->n_pos_args, self->n_kwonly_args);
dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args);
DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", n_pos_args, n_kwonly_args);
dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args);
dump_args(code_state->state, n_state);
}
33 changes: 32 additions & 1 deletion py/bc.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,36 @@
#include "py/runtime.h"
#include "py/obj.h"

// bytecode layout:
//
// n_state : var uint
// n_exc_stack : var uint
// scope_flags : byte
// n_pos_args : byte number of arguments this function takes
// n_kwonly_args : byte number of keyword-only arguments this function takes
// n_def_pos_args : byte number of default positional arguments
//
// code_info_size : var uint | code_info_size counts bytes in this chunk
// simple_name : var qstr |
// source_file : var qstr |
// <line number info> |
// <word alignment padding> | only needed if bytecode contains pointers
//
// local_num0 : byte |
// ... : byte |
// local_numN : byte | N = num_cells
// 255 : byte | end of list sentinel
// <bytecode> |
//
//
// constant table layout:
//
// argname0 : obj (qstr)
// ... : obj (qstr)
// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args
// const0 : obj
// constN : obj

// Exception stack entry
typedef struct _mp_exc_stack {
const byte *handler;
Expand All @@ -42,6 +72,7 @@ typedef struct _mp_exc_stack {
typedef struct _mp_code_state {
const byte *code_info;
const byte *ip;
const mp_uint_t *const_table;
mp_obj_t *sp;
// bit 0 is saved currently_in_except_block value
mp_exc_stack_t *exc_sp;
Expand All @@ -61,7 +92,7 @@ mp_uint_t mp_decode_uint(const byte **ptr);
mp_vm_return_kind_t mp_execute_bytecode(mp_code_state *code_state, volatile mp_obj_t inject_exc);
mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args);
void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args);
void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *code, mp_uint_t len);
void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table);
void mp_bytecode_print2(const byte *code, mp_uint_t len);
const byte *mp_bytecode_print_str(const byte *ip);
#define mp_bytecode_print_inst(code) mp_bytecode_print2(code, 1)
Expand Down
59 changes: 57 additions & 2 deletions py/builtinimport.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) {
if (stat == MP_IMPORT_STAT_DIR) {
return stat;
}

#if MICROPY_PORTABLE_CODE
vstr_add_str(path, ".mpc");
stat = mp_import_stat(vstr_null_terminated_str(path));
if (stat == MP_IMPORT_STAT_FILE) {
return stat;
}
vstr_cut_tail_bytes(path, 4);
#endif

vstr_add_str(path, ".py");
stat = mp_import_stat(vstr_null_terminated_str(path));
if (stat == MP_IMPORT_STAT_FILE) {
Expand Down Expand Up @@ -132,11 +142,56 @@ STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex, const char
mp_parse_compile_execute(lex, MP_PARSE_FILE_INPUT, mod_globals, mod_globals);
}

#if MICROPY_PORTABLE_CODE
STATIC void do_load_from_mpc(mp_obj_t module_obj, const char *fname) {
#if MICROPY_PY___FILE__
//TODO
//qstr source_name = lex->source_name;
//mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
#endif

// execute the module in its context
mp_obj_dict_t *mod_globals = mp_obj_module_get_globals(module_obj);

// save context
mp_obj_dict_t *volatile old_globals = mp_globals_get();
mp_obj_dict_t *volatile old_locals = mp_locals_get();

// set new context
mp_globals_set(mod_globals);
mp_locals_set(mod_globals);

nlr_buf_t nlr;
if (nlr_push(&nlr) == 0) {
mp_raw_code_t *outer_raw_code = mp_raw_code_load_file(fname);
mp_obj_t module_fun = mp_make_function_from_raw_code(outer_raw_code, MP_OBJ_NULL, MP_OBJ_NULL);
mp_call_function_0(module_fun);

// finish nlr block, restore context
nlr_pop();
mp_globals_set(old_globals);
mp_locals_set(old_locals);
} else {
// exception; restore context and re-raise same exception
mp_globals_set(old_globals);
mp_locals_set(old_locals);
nlr_raise(nlr.ret_val);
}
}
#endif

STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
// create the lexer
char *file_str = vstr_null_terminated_str(file);
mp_lexer_t *lex = mp_lexer_new_from_file(file_str);
do_load_from_lexer(module_obj, lex, file_str);
#if MICROPY_PORTABLE_CODE
if (file_str[file->len - 1] == 'c') {
do_load_from_mpc(module_obj, file_str);
} else
#endif
{
mp_lexer_t *lex = mp_lexer_new_from_file(file_str);
do_load_from_lexer(module_obj, lex, file_str);
}
}

STATIC void chop_component(const char *start, const char **end) {
Expand Down
17 changes: 17 additions & 0 deletions py/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,12 @@ STATIC void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int
assert(n_pos_defaults >= 0);
assert(n_kw_defaults >= 0);

// set flags
if (n_kw_defaults > 0) {
this_scope->scope_flags |= MP_SCOPE_FLAG_DEFKWARGS;
}
this_scope->num_def_pos_args = n_pos_defaults;

// make closed over variables, if any
// ensure they are closed over in the order defined in the outer scope (mainly to agree with CPython)
int nfree = 0;
Expand Down Expand Up @@ -3259,6 +3265,17 @@ mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, uint emit_opt
nlr_raise(comp->compile_error);
} else {
// return function that executes the outer module
#if MICROPY_PORTABLE_CODE_SAVE
if (!is_repl) {
vstr_t vstr;
vstr_init(&vstr, 16);
vstr_add_str(&vstr, qstr_str(source_file));
vstr_cut_tail_bytes(&vstr, 2);
vstr_add_str(&vstr, "mpc");
mp_raw_code_save_file(outer_raw_code, vstr_null_terminated_str(&vstr));
vstr_clear(&vstr);
}
#endif
return mp_make_function_from_raw_code(outer_raw_code, MP_OBJ_NULL, MP_OBJ_NULL);
}
}