Skip to content

Commit

Permalink
[Universal Parser] Reduce dependence on RArray in parse.y
Browse files Browse the repository at this point in the history
- Introduce `rb_parser_ary_t` structure to partly eliminate RArray from parse.y
  - In this patch, `parser_params->tokens` and `parser_params->ast->node_buffer->tokens` are now `rb_parser_ary_t *`
  - Instead, `ast_node_all_tokens()` internally creates a Ruby Array object from the `rb_parser_ary_t`
  - Also, delete `rb_ast_tokens()` and `rb_ast_set_tokens()` in node.c

- Implement `rb_parser_str_escape()`
  - This is a port of the `rb_str_escape()` function in string.c
  - `rb_parser_str_escape()` does not depend on `VALUE` (RString)
  - Instead, it uses `rb_parser_stirng_t *`
  - This function works when --dump=y option passed

- Because WIP of the universal parser, similar functions like `rb_parser_tokens_free()` exist in both node.c and parse.y. Refactoring them may be needed in some way in the future

- Although we considered redesigning the structure: `ast->node_buffer->tokens` into `ast->tokens`, we leave it as it is because `rb_ast_t` is an imemo. (We will address it in the future)
  • Loading branch information
hasumikin authored and yui-knk committed Mar 12, 2024
1 parent f42164e commit 9a19cfd
Show file tree
Hide file tree
Showing 7 changed files with 429 additions and 215 deletions.
27 changes: 26 additions & 1 deletion ast.c
Expand Up @@ -774,10 +774,35 @@ ast_node_last_column(rb_execution_context_t *ec, VALUE self)
static VALUE
ast_node_all_tokens(rb_execution_context_t *ec, VALUE self)
{
long i;
struct ASTNodeData *data;
rb_parser_ary_t *parser_tokens;
rb_parser_ast_token_t *parser_token;
VALUE str, loc, token, all_tokens;

TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);

return rb_ast_tokens(data->ast);
parser_tokens = data->ast->node_buffer->tokens;
if (parser_tokens == NULL) {
return Qnil;
}

all_tokens = rb_ary_new2(parser_tokens->len);
for (i = 0; i < parser_tokens->len; i++) {
parser_token = parser_tokens->data[i];
str = rb_str_new(parser_token->str->ptr, parser_token->str->len);
loc = rb_ary_new_from_args(4,
INT2FIX(parser_token->loc.beg_pos.lineno),
INT2FIX(parser_token->loc.beg_pos.column),
INT2FIX(parser_token->loc.end_pos.lineno),
INT2FIX(parser_token->loc.end_pos.column)
);
token = rb_ary_new_from_args(4, INT2FIX(parser_token->id), ID2SYM(rb_intern(parser_token->type_name)), str, loc);
rb_ary_push(all_tokens, token);
}
rb_obj_freeze(all_tokens);

return all_tokens;
}

static VALUE
Expand Down
37 changes: 22 additions & 15 deletions node.c
Expand Up @@ -69,7 +69,7 @@ rb_node_buffer_new(void)
init_node_buffer_list(&nb->unmarkable, (node_buffer_elem_t*)&nb[1], ruby_xmalloc);
init_node_buffer_list(&nb->markable, (node_buffer_elem_t*)((size_t)nb->unmarkable.head + bucket_size), ruby_xmalloc);
nb->local_tables = 0;
nb->tokens = Qnil;
nb->tokens = 0;
#ifdef UNIVERSAL_PARSER
nb->config = config;
#endif
Expand Down Expand Up @@ -176,6 +176,24 @@ parser_string_free(rb_ast_t *ast, rb_parser_string_t *str)
xfree(str);
}

static void
parser_ast_token_free(rb_ast_t *ast, rb_parser_ast_token_t *token)
{
if (!token) return;
parser_string_free(ast, token->str);
xfree(token);
}

static void
parser_tokens_free(rb_ast_t *ast, rb_parser_ary_t *tokens)
{
for (long i = 0; i < tokens->len; i++) {
parser_ast_token_free(ast, tokens->data[i]);
}
xfree(tokens->data);
xfree(tokens);
}

static void
free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
{
Expand Down Expand Up @@ -228,6 +246,9 @@ free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
static void
rb_node_buffer_free(rb_ast_t *ast, node_buffer_t *nb)
{
if (ast->node_buffer && ast->node_buffer->tokens) {
parser_tokens_free(ast, ast->node_buffer->tokens);
}
iterate_node_values(ast, &nb->unmarkable, free_ast_value, NULL);
node_buffer_list_free(ast, &nb->unmarkable);
node_buffer_list_free(ast, &nb->markable);
Expand Down Expand Up @@ -388,8 +409,6 @@ void
rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating)
{
if (ast->node_buffer) {
rb_gc_mark_and_move(&ast->node_buffer->tokens);

node_buffer_t *nb = ast->node_buffer;
iterate_node_values(ast, &nb->markable, mark_and_move_ast_value, NULL);

Expand Down Expand Up @@ -438,18 +457,6 @@ rb_ast_dispose(rb_ast_t *ast)
rb_ast_free(ast);
}

VALUE
rb_ast_tokens(rb_ast_t *ast)
{
return ast->node_buffer->tokens;
}

void
rb_ast_set_tokens(rb_ast_t *ast, VALUE tokens)
{
RB_OBJ_WRITE(ast, &ast->node_buffer->tokens, tokens);
}

VALUE
rb_node_set_type(NODE *n, enum node_type t)
{
Expand Down
4 changes: 1 addition & 3 deletions node.h
Expand Up @@ -40,7 +40,7 @@ struct node_buffer_struct {
// - text of token
// - location info
// Array, whose entry is array
VALUE tokens;
rb_parser_ary_t *tokens;
#ifdef UNIVERSAL_PARSER
const rb_parser_config_t *config;
#endif
Expand All @@ -55,7 +55,6 @@ rb_ast_t *rb_ast_new(void);
#endif
size_t rb_ast_memsize(const rb_ast_t*);
void rb_ast_dispose(rb_ast_t*);
VALUE rb_ast_tokens(rb_ast_t *ast);
#if RUBY_DEBUG
void rb_ast_node_type_change(NODE *n, enum node_type type);
#endif
Expand All @@ -65,7 +64,6 @@ void rb_node_init(NODE *n, enum node_type type);
void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating);
void rb_ast_update_references(rb_ast_t*);
void rb_ast_free(rb_ast_t*);
void rb_ast_set_tokens(rb_ast_t*, VALUE);
NODE *rb_ast_newnode(rb_ast_t*, enum node_type type, size_t size, size_t alignment);
void rb_ast_delete_node(rb_ast_t*, NODE *n);
rb_ast_id_table_t *rb_ast_new_local_table(rb_ast_t*, int);
Expand Down

0 comments on commit 9a19cfd

Please sign in to comment.