[Universal Parser] Reduce dependence on RArray in parse.y

- Introduce `rb_parser_ary_t` structure to partly eliminate RArray from parse.y - In this patch, `parser_params->tokens` and `parser_params->ast->node_buffer->tokens` are now `rb_parser_ary_t *` - Instead, `ast_node_all_tokens()` internally creates a Ruby Array object from the `rb_parser_ary_t` - Also, delete `rb_ast_tokens()` and `rb_ast_set_tokens()` in node.c - Implement `rb_parser_str_escape()` - This is a port of the `rb_str_escape()` function in string.c - `rb_parser_str_escape()` does not depend on `VALUE` (RString) - Instead, it uses `rb_parser_stirng_t *` - This function works when --dump=y option passed - Because WIP of the universal parser, similar functions like `rb_parser_tokens_free()` exist in both node.c and parse.y. Refactoring them may be needed in some way in the future - Although we considered redesigning the structure: `ast->node_buffer->tokens` into `ast->tokens`, we leave it as it is because `rb_ast_t` is an imemo. (We will address it in the future)
ruby · Mar 12, 2024 · 9a19cfd · 9a19cfd
1 parent f42164e
commit 9a19cfd
Show file tree

Hide file tree

Showing 7 changed files with 429 additions and 215 deletions.
diff --git a/ast.c b/ast.c
@@ -774,10 +774,35 @@ ast_node_last_column(rb_execution_context_t *ec, VALUE self)
 static VALUE
 ast_node_all_tokens(rb_execution_context_t *ec, VALUE self)
 {
+    long i;
     struct ASTNodeData *data;
+    rb_parser_ary_t *parser_tokens;
+    rb_parser_ast_token_t *parser_token;
+    VALUE str, loc, token, all_tokens;
+
     TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
 
-    return rb_ast_tokens(data->ast);
+    parser_tokens = data->ast->node_buffer->tokens;
+    if (parser_tokens == NULL) {
+        return Qnil;
+    }
+
+    all_tokens = rb_ary_new2(parser_tokens->len);
+    for (i = 0; i < parser_tokens->len; i++) {
+        parser_token = parser_tokens->data[i];
+        str = rb_str_new(parser_token->str->ptr, parser_token->str->len);
+        loc = rb_ary_new_from_args(4,
+            INT2FIX(parser_token->loc.beg_pos.lineno),
+            INT2FIX(parser_token->loc.beg_pos.column),
+            INT2FIX(parser_token->loc.end_pos.lineno),
+            INT2FIX(parser_token->loc.end_pos.column)
+        );
+        token = rb_ary_new_from_args(4, INT2FIX(parser_token->id), ID2SYM(rb_intern(parser_token->type_name)), str, loc);
+        rb_ary_push(all_tokens, token);
+    }
+    rb_obj_freeze(all_tokens);
+
+    return all_tokens;
 }
 
 static VALUE

diff --git a/node.c b/node.c
@@ -69,7 +69,7 @@ rb_node_buffer_new(void)
     init_node_buffer_list(&nb->unmarkable, (node_buffer_elem_t*)&nb[1], ruby_xmalloc);
     init_node_buffer_list(&nb->markable, (node_buffer_elem_t*)((size_t)nb->unmarkable.head + bucket_size), ruby_xmalloc);
     nb->local_tables = 0;
-    nb->tokens = Qnil;
+    nb->tokens = 0;
 #ifdef UNIVERSAL_PARSER
     nb->config = config;
 #endif
@@ -176,6 +176,24 @@ parser_string_free(rb_ast_t *ast, rb_parser_string_t *str)
     xfree(str);
 }
 
+static void
+parser_ast_token_free(rb_ast_t *ast, rb_parser_ast_token_t *token)
+{
+    if (!token) return;
+    parser_string_free(ast, token->str);
+    xfree(token);
+}
+
+static void
+parser_tokens_free(rb_ast_t *ast, rb_parser_ary_t *tokens)
+{
+    for (long i = 0; i < tokens->len; i++) {
+        parser_ast_token_free(ast, tokens->data[i]);
+    }
+    xfree(tokens->data);
+    xfree(tokens);
+}
+
 static void
 free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
 {
@@ -228,6 +246,9 @@ free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
 static void
 rb_node_buffer_free(rb_ast_t *ast, node_buffer_t *nb)
 {
+    if (ast->node_buffer && ast->node_buffer->tokens) {
+        parser_tokens_free(ast, ast->node_buffer->tokens);
+    }
     iterate_node_values(ast, &nb->unmarkable, free_ast_value, NULL);
     node_buffer_list_free(ast, &nb->unmarkable);
     node_buffer_list_free(ast, &nb->markable);
@@ -388,8 +409,6 @@ void
 rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating)
 {
     if (ast->node_buffer) {
-        rb_gc_mark_and_move(&ast->node_buffer->tokens);
-
         node_buffer_t *nb = ast->node_buffer;
         iterate_node_values(ast, &nb->markable, mark_and_move_ast_value, NULL);
 
@@ -438,18 +457,6 @@ rb_ast_dispose(rb_ast_t *ast)
     rb_ast_free(ast);
 }
 
-VALUE
-rb_ast_tokens(rb_ast_t *ast)
-{
-    return ast->node_buffer->tokens;
-}
-
-void
-rb_ast_set_tokens(rb_ast_t *ast, VALUE tokens)
-{
-    RB_OBJ_WRITE(ast, &ast->node_buffer->tokens, tokens);
-}
-
 VALUE
 rb_node_set_type(NODE *n, enum node_type t)
 {

diff --git a/node.h b/node.h
@@ -40,7 +40,7 @@ struct node_buffer_struct {
     // - text of token
     // - location info
     // Array, whose entry is array
-    VALUE tokens;
+    rb_parser_ary_t *tokens;
 #ifdef UNIVERSAL_PARSER
     const rb_parser_config_t *config;
 #endif
@@ -55,7 +55,6 @@ rb_ast_t *rb_ast_new(void);
 #endif
 size_t rb_ast_memsize(const rb_ast_t*);
 void rb_ast_dispose(rb_ast_t*);
-VALUE rb_ast_tokens(rb_ast_t *ast);
 #if RUBY_DEBUG
 void rb_ast_node_type_change(NODE *n, enum node_type type);
 #endif
@@ -65,7 +64,6 @@ void rb_node_init(NODE *n, enum node_type type);
 void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating);
 void rb_ast_update_references(rb_ast_t*);
 void rb_ast_free(rb_ast_t*);
-void rb_ast_set_tokens(rb_ast_t*, VALUE);
 NODE *rb_ast_newnode(rb_ast_t*, enum node_type type, size_t size, size_t alignment);
 void rb_ast_delete_node(rb_ast_t*, NODE *n);
 rb_ast_id_table_t *rb_ast_new_local_table(rb_ast_t*, int);