Skip to content

Commit f441b6f

Browse files
committed
WIP - Introduce contextually parsing programs vs evals
This is more or less the code I used in my POC in JRuby to parse evals. Evals depend on parent variable scopes and will produce a different syntax tree. Questions: 1. How does MRI compile evals currently? I cannot find anything. 2. This passes in a char * of data. It does not encode the variables we pass in because the system calling this already knows. Is this adequate though? 3. Can I get guidance on how best to test this?
1 parent 094b2b7 commit f441b6f

File tree

3 files changed

+43
-13
lines changed

3 files changed

+43
-13
lines changed

ext/yarp/extension.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ dump_input(input_t *input, const char *filepath) {
194194
yp_parser_t parser;
195195
yp_parser_init(&parser, input->source, input->size, filepath);
196196

197-
yp_node_t *node = yp_parse(&parser);
197+
yp_node_t *node = yp_parse(&parser, false);
198198
yp_serialize(&parser, node, &buffer);
199199

200200
VALUE result = rb_str_new(buffer.value, buffer.length);
@@ -378,7 +378,7 @@ lex_input(input_t *input, const char *filepath) {
378378
};
379379

380380
parser.lex_callback = &lex_callback;
381-
yp_node_t *node = yp_parse(&parser);
381+
yp_node_t *node = yp_parse(&parser, false);
382382

383383
// Here we need to update the source range to have the correct newline
384384
// offsets. We do it here because we've already created the object and given
@@ -439,7 +439,7 @@ parse_input(input_t *input, const char *filepath) {
439439
yp_parser_t parser;
440440
yp_parser_init(&parser, input->source, input->size, filepath);
441441

442-
yp_node_t *node = yp_parse(&parser);
442+
yp_node_t *node = yp_parse(&parser, false);
443443
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
444444

445445
VALUE source = yp_source_new(&parser);
@@ -582,7 +582,7 @@ memsize(VALUE self, VALUE string) {
582582
size_t length = RSTRING_LEN(string);
583583
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
584584

585-
yp_node_t *node = yp_parse(&parser);
585+
yp_node_t *node = yp_parse(&parser, false);
586586
yp_memsize_t memsize;
587587
yp_node_memsize(node, &memsize);
588588

@@ -608,7 +608,7 @@ profile_file(VALUE self, VALUE filepath) {
608608
yp_parser_t parser;
609609
yp_parser_init(&parser, input.source, input.size, checked);
610610

611-
yp_node_t *node = yp_parse(&parser);
611+
yp_node_t *node = yp_parse(&parser, false);
612612
yp_node_destroy(&parser, node);
613613
yp_parser_free(&parser);
614614

include/yarp.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_
5151
YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
5252

5353
// Parse the Ruby source associated with the given parser and return the tree.
54-
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
54+
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser, bool eval);
5555

5656
// Pretty-prints the AST represented by the given node to the given buffer.
5757
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
@@ -61,7 +61,7 @@ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_
6161

6262
// Parse and serialize the AST represented by the given source to the given
6363
// buffer.
64-
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer);
64+
YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes);
6565

6666
// Returns a string representation of the given token type.
6767
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);

src/yarp.c

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12773,8 +12773,8 @@ parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const ch
1277312773
}
1277412774

1277512775
static yp_node_t *
12776-
parse_program(yp_parser_t *parser) {
12777-
yp_parser_scope_push(parser, true);
12776+
parse_program(yp_parser_t *parser, bool eval) {
12777+
yp_parser_scope_push(parser, !eval);
1277812778
parser_lex(parser);
1277912779

1278012780
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN);
@@ -12794,6 +12794,34 @@ parse_program(yp_parser_t *parser) {
1279412794
return (yp_node_t *) yp_program_node_create(parser, &locals, statements);
1279512795
}
1279612796

12797+
// Assume always a valid string since it is from trusted source (Ruby impl internals).
12798+
// Format: [num_scopes, (num_vars1, (var_char1*, 0)*)*]
12799+
static void
12800+
yp_populate_eval_scopes(yp_parser_t *parser, const char *data) {
12801+
const char *p = data;
12802+
size_t number_of_scopes = (size_t) *p;
12803+
12804+
p++;
12805+
for (size_t scope_index = 0; scope_index < number_of_scopes; scope_index++) {
12806+
size_t number_of_variables = (size_t) *p++;
12807+
12808+
yp_parser_scope_push(parser, scope_index == 0);
12809+
12810+
for (size_t variable_index = 0; variable_index < number_of_variables; variable_index++) {
12811+
char *eos = strchr(p, 0);
12812+
12813+
yp_token_t lvar = (yp_token_t) {
12814+
.type = YP_TOKEN_IDENTIFIER,
12815+
.start = p,
12816+
.end = eos
12817+
};
12818+
yp_parser_local_add_token(parser, &lvar);
12819+
12820+
p = ++eos;
12821+
}
12822+
}
12823+
}
12824+
1279712825
/******************************************************************************/
1279812826
/* External functions */
1279912827
/******************************************************************************/
@@ -12930,8 +12958,8 @@ yp_parser_free(yp_parser_t *parser) {
1293012958

1293112959
// Parse the Ruby source associated with the given parser and return the tree.
1293212960
YP_EXPORTED_FUNCTION yp_node_t *
12933-
yp_parse(yp_parser_t *parser) {
12934-
return parse_program(parser);
12961+
yp_parse(yp_parser_t *parser, bool eval) {
12962+
return parse_program(parser, eval);
1293512963
}
1293612964

1293712965
YP_EXPORTED_FUNCTION void
@@ -12948,11 +12976,13 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
1294812976
// Parse and serialize the AST represented by the given source to the given
1294912977
// buffer.
1295012978
YP_EXPORTED_FUNCTION void
12951-
yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
12979+
yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes) {
12980+
bool eval = parent_scopes != NULL;
1295212981
yp_parser_t parser;
1295312982
yp_parser_init(&parser, source, size, NULL);
12983+
if (eval) yp_populate_eval_scopes(&parser, parent_scopes);
1295412984

12955-
yp_node_t *node = yp_parse(&parser);
12985+
yp_node_t *node = yp_parse(&parser, eval);
1295612986
yp_serialize(&parser, node, buffer);
1295712987

1295812988
yp_node_destroy(&parser, node);

0 commit comments

Comments
 (0)