From 2af82e23165180f20ca2af374aedb7a45dedcc20 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 29 Nov 2023 11:46:33 +0100 Subject: [PATCH] [ruby/prism] Convert start line to signed integers Ruby allows for 0 or negative line start, this is often used with `eval` calls to get a correct offset when prefixing a snippet. e.g. ```ruby caller = caller_locations(1, 1).first class_eval <<~RUBY, caller.path, caller.line - 2 # frozen_string_literal: true def some_method #{caller_provided_code_snippet} end RUBY ``` https://github.com/ruby/prism/commit/0d14ed1452 --- prism/extension.c | 3 ++- prism/options.c | 23 ++++++++++++++++++++-- prism/options.h | 4 ++-- prism/parser.h | 2 +- prism/prism.c | 10 ++++------ prism/templates/ext/prism/api_node.c.erb | 2 +- prism/templates/lib/prism/serialize.rb.erb | 7 ++++++- prism/templates/src/serialize.c.erb | 8 ++++---- prism/util/pm_buffer.c | 9 +++++++++ prism/util/pm_buffer.h | 8 ++++++++ test/prism/parse_test.rb | 16 +++++++++++++++ 11 files changed, 74 insertions(+), 18 deletions(-) diff --git a/prism/extension.c b/prism/extension.c index 3637cc1617e5ba..c3ee58d15e2ba5 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -126,7 +126,7 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { } else if (key_id == rb_option_id_encoding) { if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); } else if (key_id == rb_option_id_line) { - if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value)); + if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value)); } else if (key_id == rb_option_id_frozen_string_literal) { if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); } else if (key_id == rb_option_id_verbose) { @@ -166,6 +166,7 @@ build_options(VALUE argument) { */ static void extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { + options->line = 1; // default if (!NIL_P(keywords)) { struct build_options_data data = { .options = options, .keywords = keywords }; struct build_options_data *argument = &data; diff --git a/prism/options.c b/prism/options.c index 84c1fcbb397120..85d04d62725437 100644 --- a/prism/options.c +++ b/prism/options.c @@ -20,7 +20,7 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) { * Set the line option on the given options struct. */ PRISM_EXPORTED_FUNCTION void -pm_options_line_set(pm_options_t *options, uint32_t line) { +pm_options_line_set(pm_options_t *options, int32_t line) { options->line = line; } @@ -114,6 +114,22 @@ pm_options_read_u32(const char *data) { } } +/** + * Read a 32-bit signed integer from a pointer. This function is used to read + * the options that are passed into the parser from the Ruby implementation. It + * handles aligned and unaligned reads. + */ +static int32_t +pm_options_read_s32(const char *data) { + if (((uintptr_t) data) % sizeof(int32_t) == 0) { + return *((int32_t *) data); + } else { + int32_t value; + memcpy(&value, data, sizeof(int32_t)); + return value; + } +} + /** * Deserialize an options struct from the given binary string. This is used to * pass options to the parser from an FFI call so that consumers of the library @@ -123,6 +139,9 @@ pm_options_read_u32(const char *data) { */ void pm_options_read(pm_options_t *options, const char *data) { + options->line = 1; // default + if (data == NULL) return; + uint32_t filepath_length = pm_options_read_u32(data); data += 4; @@ -131,7 +150,7 @@ pm_options_read(pm_options_t *options, const char *data) { data += filepath_length; } - options->line = pm_options_read_u32(data); + options->line = pm_options_read_s32(data); data += 4; uint32_t encoding_length = pm_options_read_u32(data); diff --git a/prism/options.h b/prism/options.h index 2ea85c838ce017..8608838da84853 100644 --- a/prism/options.h +++ b/prism/options.h @@ -35,7 +35,7 @@ typedef struct { * The line within the file that the parse starts on. This value is * 0-indexed. */ - uint32_t line; + int32_t line; /** * The name of the encoding that the source file is in. Note that this must @@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons * @param options The options struct to set the line on. * @param line The line to set. */ -PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, uint32_t line); +PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line); /** * Set the encoding option on the given options struct. diff --git a/prism/parser.h b/prism/parser.h index c1f9e0f663af05..dfc15e19b7b7b7 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -661,7 +661,7 @@ struct pm_parser { * The line number at the start of the parse. This will be used to offset * the line numbers of all of the locations. */ - uint32_t start_line; + int32_t start_line; /** Whether or not we're at the beginning of a command. */ bool command_start; diff --git a/prism/prism.c b/prism/prism.c index aee9fc7b886737..1a4d31c268a944 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -17067,9 +17067,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm parser->filepath_string = options->filepath; // line option - if (options->line > 0) { - parser->start_line = options->line; - } + parser->start_line = options->line; // encoding option size_t encoding_length = pm_string_length(&options->encoding); @@ -17238,7 +17236,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; - if (data != NULL) pm_options_read(&options, data); + pm_options_read(&options, data); pm_parser_t parser; pm_parser_init(&parser, source, size, &options); @@ -17260,7 +17258,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; - if (data != NULL) pm_options_read(&options, data); + pm_options_read(&options, data); pm_parser_t parser; pm_parser_init(&parser, source, size, &options); @@ -17268,7 +17266,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_node_t *node = pm_parse(&parser); pm_serialize_header(buffer); pm_serialize_encoding(&parser.encoding, buffer); - pm_buffer_append_varuint(buffer, parser.start_line); + pm_buffer_append_varsint(buffer, parser.start_line); pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_node_destroy(&parser, node); diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 7bc52c1120b2b9..5811cf20273dc0 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -46,7 +46,7 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) { rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index])); } - VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets }; + VALUE source_argv[] = { source, LONG2NUM(parser->start_line), offsets }; return rb_class_new_instance(3, source_argv, rb_cPrismSource); } diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index a38c7969711543..350a502d6a6541 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -79,7 +79,7 @@ module Prism end def load_start_line - source.start_line = load_varuint + source.start_line = load_varsint end def load_comments @@ -161,6 +161,11 @@ module Prism end end + def load_varsint + n = load_varuint + (n >> 1) ^ (-(n & 1)) + end + def load_serialized_length io.read(4).unpack1("L") end diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index 0f6b119d5bb9f0..e82a8703b2fc42 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -219,7 +219,7 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_encoding(&parser->encoding, buffer); - pm_buffer_append_varuint(buffer, parser->start_line); + pm_buffer_append_varsint(buffer, parser->start_line); <%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> pm_serialize_comment_list(parser, &parser->comment_list, buffer); <%- end -%> @@ -301,7 +301,7 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; - if (data != NULL) pm_options_read(&options, data); + pm_options_read(&options, data); pm_parser_t parser; pm_parser_init(&parser, source, size, &options); @@ -318,7 +318,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_buffer_append_byte(buffer, 0); pm_serialize_encoding(&parser.encoding, buffer); - pm_buffer_append_varuint(buffer, parser.start_line); + pm_buffer_append_varsint(buffer, parser.start_line); pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer); pm_serialize_data_loc(&parser, buffer); @@ -337,7 +337,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; - if (data != NULL) pm_options_read(&options, data); + pm_options_read(&options, data); pm_parser_t parser; pm_parser_init(&parser, source, size, &options); diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c index dcdf1770bb48f8..307b55d030b9ce 100644 --- a/prism/util/pm_buffer.c +++ b/prism/util/pm_buffer.c @@ -151,6 +151,15 @@ pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) { } } +/** + * Append a 32-bit signed integer to the buffer as a variable-length integer. + */ +void +pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) { + uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31)); + pm_buffer_append_varuint(buffer, unsigned_int); +} + /** * Concatenate one buffer onto another. */ diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h index a8596be476beb5..ec11d05e9bd42b 100644 --- a/prism/util/pm_buffer.h +++ b/prism/util/pm_buffer.h @@ -120,6 +120,14 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value); */ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value); +/** + * Append a 32-bit signed integer to the buffer as a variable-length integer. + * + * @param buffer The buffer to append to. + * @param value The integer to append. + */ +void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value); + /** * Concatenate one buffer onto another. * diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index 6bd7a5d2a1f07d..2feb15b48bab01 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -46,6 +46,22 @@ def test_parse_takes_file_path assert_equal filepath, find_source_file_node(result.value).filepath end + def test_parse_takes_line + line = 4 + result = Prism.parse("def foo\n __FILE__\nend", line: line) + + assert_equal line, result.value.location.start_line + assert_equal line + 1, find_source_file_node(result.value).location.start_line + end + + def test_parse_takes_negative_lines + line = -2 + result = Prism.parse("def foo\n __FILE__\nend", line: line) + + assert_equal line, result.value.location.start_line + assert_equal line + 1, find_source_file_node(result.value).location.start_line + end + def test_parse_lex node, tokens = Prism.parse_lex("def foo; end").value