Skip to content

Commit c1652a9

Browse files
committed
Fix an AST and token incompatibility for Prism::Translation::Parser
This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser` for dstring literal: ```ruby "foo #{bar}" ``` ## Parser gem (Expected) ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the AST and tokens returned by the Parser gem were different. In this case, `dstr` node should not be nested: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:dstr, s(:str, "foo\n"), s(:str, " ")), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ```
1 parent 20e768d commit c1652a9

File tree

4 files changed

+156
-20
lines changed

4 files changed

+156
-20
lines changed

lib/prism/translation/parser/compiler.rb

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -953,14 +953,35 @@ def visit_interpolated_regular_expression_node(node)
953953
def visit_interpolated_string_node(node)
954954
if node.heredoc?
955955
children, closing = visit_heredoc(node)
956-
builder.string_compose(token(node.opening_loc), children, closing)
956+
957+
return builder.string_compose(token(node.opening_loc), children, closing)
958+
end
959+
960+
parts = if node.parts.one? { |part| part.type == :string_node }
961+
node.parts.flat_map do |node|
962+
if node.type == :string_node && node.unescaped.lines.count >= 2
963+
start_offset = node.content_loc.start_offset
964+
965+
node.unescaped.lines.map do |line|
966+
end_offset = start_offset + line.length
967+
offsets = srange_offsets(start_offset, end_offset)
968+
start_offset = end_offset
969+
970+
builder.string_internal([line, offsets])
971+
end
972+
else
973+
visit(node)
974+
end
975+
end
957976
else
958-
builder.string_compose(
959-
token(node.opening_loc),
960-
visit_all(node.parts),
961-
token(node.closing_loc)
962-
)
977+
visit_all(node.parts)
963978
end
979+
980+
builder.string_compose(
981+
token(node.opening_loc),
982+
parts,
983+
token(node.closing_loc)
984+
)
964985
end
965986

966987
# :"foo #{bar}"
@@ -1487,17 +1508,17 @@ def visit_string_node(node)
14871508
elsif node.opening == "?"
14881509
builder.character([node.unescaped, srange(node.location)])
14891510
else
1490-
parts = if node.unescaped.lines.count <= 1
1511+
parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
14911512
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
14921513
else
14931514
start_offset = node.content_loc.start_offset
14941515

1495-
node.unescaped.lines.map do |line|
1496-
end_offset = start_offset + line.length
1516+
[node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
1517+
end_offset = start_offset + content_line.length
14971518
offsets = srange_offsets(start_offset, end_offset)
14981519
start_offset = end_offset
14991520

1500-
builder.string_internal([line, offsets])
1521+
builder.string_internal([unescaped_line, offsets])
15011522
end
15021523
end
15031524

lib/prism/translation/parser/lexer.rb

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,24 @@ def to_a
295295
unless (lines = token.value.lines).one?
296296
start_offset = offset_cache[token.location.start_offset]
297297
lines.map do |line|
298-
end_offset = start_offset + line.length
299-
tokens << [:tSTRING_CONTENT, [line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
298+
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
299+
chomped_line = line.chomp
300+
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
301+
adjustment = match[:backslashes].size / 2
302+
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
303+
if match[:backslashes].size.odd?
304+
adjusted_line.delete_suffix!("\\")
305+
adjustment += 2
306+
else
307+
adjusted_line << newline
308+
end
309+
else
310+
adjusted_line = line
311+
adjustment = 0
312+
end
313+
314+
end_offset = start_offset + adjusted_line.length + adjustment
315+
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
300316
start_offset = end_offset
301317
end
302318
next

test/prism/fixtures/dstring.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,29 @@
11
"foo
22
bar"
3+
4+
"foo
5+
#{bar}"
6+
7+
"fo
8+
o" "ba
9+
r"
10+
11+
"
12+
foo\
13+
"
14+
15+
"
16+
foo\\
17+
"
18+
19+
"
20+
foo\\\
21+
"
22+
23+
"
24+
foo\\\\
25+
"
26+
27+
"
28+
foo\\\\\
29+
"

test/prism/snapshots/dstring.txt

Lines changed: 80 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)