From e1b18cb58275ea45adf6072e50389b2f0b5b8632 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Fri, 27 Mar 2026 12:30:05 +0100 Subject: [PATCH] Implement various string start/end events for ripper `tstring_beg` in particular is needed by `yard`. Before: > 1980 examples, 606 failures, 15 pending After: > 1980 examples, 582 failures, 15 pending Thought it would be more, but oh well. It needs `on_sp` which I guess is why there are not many new passes --- lib/prism/translation/ripper.rb | 243 ++++++++++++++++++-------------- test/prism/ruby/ripper_test.rb | 3 +- 2 files changed, 136 insertions(+), 110 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index e1dec0b47e..97abf74083 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -35,9 +35,6 @@ module Translation # - on_rparen # - on_semicolon # - on_sp - # - on_symbeg - # - on_tstring_beg - # - on_tstring_end # class Ripper < Compiler # Parses the given Ruby program read from +src+. @@ -2235,61 +2232,67 @@ def visit_interpolated_regular_expression_node(node) # "foo #{bar}" # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) - if node.opening&.start_with?("<<~") - heredoc = visit_heredoc_string_node(node) + with_string_bounds(node) do + if node.opening&.start_with?("<<~") + heredoc = visit_heredoc_string_node(node) - bounds(node.location) - on_string_literal(heredoc) - elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } - first, *rest = node.parts - rest.inject(visit(first)) do |content, part| - concat = visit(part) - - bounds(part.location) - on_string_concat(content, concat) - end - else - bounds(node.parts.first.location) - parts = - node.parts.inject(on_string_content) do |content, part| - on_string_add(content, visit_string_content(part)) + bounds(node.location) + on_string_literal(heredoc) + elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } + first, *rest = node.parts + rest.inject(visit(first)) do |content, part| + concat = visit(part) + + bounds(part.location) + on_string_concat(content, concat) end + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end - bounds(node.location) - on_string_literal(parts) + bounds(node.location) + on_string_literal(parts) + end end end # :"foo #{bar}" # ^^^^^^^^^^^^^ def visit_interpolated_symbol_node(node) - bounds(node.parts.first.location) - parts = - node.parts.inject(on_string_content) do |content, part| - on_string_add(content, visit_string_content(part)) - end + with_string_bounds(node) do + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end - bounds(node.location) - on_dyna_symbol(parts) + bounds(node.location) + on_dyna_symbol(parts) + end end # `foo #{bar}` # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) - if node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node) + with_string_bounds(node) do + if node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.parts.first.location) - parts = - node.parts.inject(on_xstring_new) do |content, part| - on_xstring_add(content, visit_string_content(part)) - end + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_xstring_new) do |content, part| + on_xstring_add(content, visit_string_content(part)) + end - bounds(node.location) - on_xstring_literal(parts) + bounds(node.location) + on_xstring_literal(parts) + end end end @@ -3022,24 +3025,60 @@ def visit_statements_node(node) # "foo" # ^^^^^ def visit_string_node(node) - if (content = node.content).empty? - bounds(node.location) - on_string_literal(on_string_content) - elsif (opening = node.opening) == "?" - bounds(node.location) - on_CHAR("?#{node.content}") - elsif opening.start_with?("<<~") - heredoc = visit_heredoc_string_node(node.to_interpolated) + with_string_bounds(node) do + if (content = node.content).empty? + bounds(node.location) + on_string_literal(on_string_content) + elsif (opening = node.opening) == "?" + bounds(node.location) + on_CHAR("?#{node.content}") + elsif opening.start_with?("<<~") + heredoc = visit_heredoc_string_node(node.to_interpolated) - bounds(node.location) - on_string_literal(heredoc) - else - bounds(node.content_loc) - tstring_content = on_tstring_content(content) + bounds(node.location) + on_string_literal(heredoc) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(content) - bounds(node.location) - on_string_literal(on_string_add(on_string_content, tstring_content)) + bounds(node.location) + on_string_literal(on_string_add(on_string_content, tstring_content)) + end + end + end + + # Responsible for emitting the various string-like begin/end events + private def with_string_bounds(node) + # `foo "bar": baz` doesn't emit the closing location + assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":") + + is_heredoc = opening&.start_with?("<<") + if is_heredoc + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + elsif opening&.start_with?(":", "%s") + bounds(node.opening_loc) + on_symbeg(node.opening) + elsif opening&.start_with?("`", "%x") + bounds(node.opening_loc) + on_backtick(node.opening) + elsif opening && !opening.start_with?("?") + bounds(node.opening_loc) + on_tstring_beg(opening) end + + result = yield + return result if assoc + + if is_heredoc + bounds(node.closing_loc) + on_heredoc_end(node.closing) + elsif node.closing_loc + bounds(node.closing_loc) + on_tstring_end(node.closing) + end + + result end # Ripper gives back the escaped string content but strips out the common @@ -3119,36 +3158,18 @@ def visit_string_node(node) # Visit a heredoc node that is representing a string. private def visit_heredoc_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_string_content) do |parts, part| - on_string_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_string_content) do |parts, part| + on_string_add(parts, part) + end end # Visit a heredoc node that is representing an xstring. private def visit_heredoc_x_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| - on_xstring_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| + on_xstring_add(parts, part) + end end # super(foo) @@ -3175,23 +3196,25 @@ def visit_super_node(node) # :foo # ^^^^ def visit_symbol_node(node) - if node.value_loc.nil? - bounds(node.location) - on_dyna_symbol(on_string_content) - elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) - bounds(node.value_loc) - content = on_string_add(on_string_content, on_tstring_content(node.value)) - bounds(node.location) - on_dyna_symbol(content) - elsif (closing = node.closing) == ":" - bounds(node.location) - on_label("#{node.value}:") - elsif opening.nil? && node.closing_loc.nil? - bounds(node.value_loc) - on_symbol_literal(visit_token(node.value)) - else - bounds(node.value_loc) - on_symbol_literal(on_symbol(visit_token(node.value))) + with_string_bounds(node) do + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) + bounds(node.value_loc) + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) + on_dyna_symbol(content) + elsif (closing = node.closing) == ":" + bounds(node.location) + on_label("#{node.value}:") + elsif opening.nil? && node.closing_loc.nil? + bounds(node.value_loc) + on_symbol_literal(visit_token(node.value)) + else + bounds(node.value_loc) + on_symbol_literal(on_symbol(visit_token(node.value))) + end end end @@ -3314,20 +3337,22 @@ def visit_while_node(node) # `foo` # ^^^^^ def visit_x_string_node(node) - if node.unescaped.empty? - bounds(node.location) - on_xstring_literal(on_xstring_new) - elsif node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node.to_interpolated) + with_string_bounds(node) do + if node.unescaped.empty? + bounds(node.location) + on_xstring_literal(on_xstring_new) + elsif node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node.to_interpolated) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.content_loc) - content = on_tstring_content(node.content) + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.content_loc) + content = on_tstring_content(node.content) - bounds(node.location) - on_xstring_literal(on_xstring_add(on_xstring_new, content)) + bounds(node.location) + on_xstring_literal(on_xstring_add(on_xstring_new, content)) + end end end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 8c80b9f886..85a52e2bb9 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -106,6 +106,7 @@ class RipperTest < TestCase "variables.txt", "whitequark/dedenting_heredoc.txt", "whitequark/masgn_nested.txt", + "whitequark/newline_in_hash_argument.txt", "whitequark/numparam_ruby_bug_19025.txt", "whitequark/op_asgn_cmd.txt", "whitequark/parser_drops_truncated_parts_of_squiggly_heredoc.txt", @@ -135,7 +136,7 @@ def test_lex_ignored_missing_heredoc_end end end - UNSUPPORTED_EVENTS = %i[backtick comma heredoc_beg heredoc_end ignored_nl kw label_end lbrace lbracket lparen nl op rbrace rbracket rparen semicolon sp symbeg tstring_beg tstring_end words_sep ignored_sp] + UNSUPPORTED_EVENTS = %i[comma ignored_nl kw label_end lbrace lbracket lparen nl op rbrace rbracket rparen semicolon sp words_sep ignored_sp] SUPPORTED_EVENTS = Translation::Ripper::EVENTS - UNSUPPORTED_EVENTS module Events