Skip to content

Commit de9e55d

Browse files
committed
Tilde heredocs for ripper translation
1 parent 4d08736 commit de9e55d

File tree

2 files changed

+146
-33
lines changed

2 files changed

+146
-33
lines changed

lib/prism/translation/ripper.rb

Lines changed: 146 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,14 +1647,21 @@ def visit_interpolated_regular_expression_node(node)
16471647
# "foo #{bar}"
16481648
# ^^^^^^^^^^^^
16491649
def visit_interpolated_string_node(node)
1650-
bounds(node.parts.first.location)
1651-
parts =
1652-
node.parts.inject(on_string_content) do |content, part|
1653-
on_string_add(content, visit_string_content(part))
1654-
end
1650+
if node.opening.start_with?("<<~")
1651+
heredoc = visit_string_heredoc_node(node.parts)
16551652

1656-
bounds(node.location)
1657-
on_string_literal(parts)
1653+
bounds(node.location)
1654+
on_string_literal(heredoc)
1655+
else
1656+
bounds(node.parts.first.location)
1657+
parts =
1658+
node.parts.inject(on_string_content) do |content, part|
1659+
on_string_add(content, visit_string_content(part))
1660+
end
1661+
1662+
bounds(node.location)
1663+
on_string_literal(parts)
1664+
end
16581665
end
16591666

16601667
# :"foo #{bar}"
@@ -1673,14 +1680,21 @@ def visit_interpolated_symbol_node(node)
16731680
# `foo #{bar}`
16741681
# ^^^^^^^^^^^^
16751682
def visit_interpolated_x_string_node(node)
1676-
bounds(node.parts.first.location)
1677-
parts =
1678-
node.parts.inject(on_xstring_new) do |content, part|
1679-
on_xstring_add(content, visit_string_content(part))
1680-
end
1683+
if node.opening.start_with?("<<~")
1684+
heredoc = visit_x_string_heredoc_node(node.parts)
16811685

1682-
bounds(node.location)
1683-
on_xstring_literal(parts)
1686+
bounds(node.location)
1687+
on_xstring_literal(heredoc)
1688+
else
1689+
bounds(node.parts.first.location)
1690+
parts =
1691+
node.parts.inject(on_xstring_new) do |content, part|
1692+
on_xstring_add(content, visit_string_content(part))
1693+
end
1694+
1695+
bounds(node.location)
1696+
on_xstring_literal(parts)
1697+
end
16841698
end
16851699

16861700
# Visit an individual part of a string-like node.
@@ -1846,12 +1860,12 @@ def visit_local_variable_target_node(node)
18461860
# ^^^^^
18471861
def visit_match_last_line_node(node)
18481862
bounds(node.content_loc)
1849-
content = on_tstring_content(node.unescaped)
1863+
tstring_content = on_tstring_content(node.content)
18501864

18511865
bounds(node.closing_loc)
18521866
closing = on_regexp_end(node.closing)
18531867

1854-
on_regexp_literal(on_regexp_add(on_regexp_new, content), closing)
1868+
on_regexp_literal(on_regexp_add(on_regexp_new, tstring_content), closing)
18551869
end
18561870

18571871
# foo in bar
@@ -2140,12 +2154,12 @@ def visit_redo_node(node)
21402154
# ^^^^^
21412155
def visit_regular_expression_node(node)
21422156
bounds(node.content_loc)
2143-
content = on_tstring_content(node.unescaped)
2157+
tstring_content = on_tstring_content(node.content)
21442158

21452159
bounds(node.closing_loc)
21462160
closing = on_regexp_end(node.closing)
21472161

2148-
on_regexp_literal(on_regexp_add(on_regexp_new, content), closing)
2162+
on_regexp_literal(on_regexp_add(on_regexp_new, tstring_content), closing)
21492163
end
21502164

21512165
# def foo(bar:); end
@@ -2320,18 +2334,113 @@ def visit_statements_node(node)
23202334
# "foo"
23212335
# ^^^^^
23222336
def visit_string_node(node)
2323-
if node.opening == "?"
2337+
if (content = node.content).empty?
2338+
bounds(node.location)
2339+
on_string_literal(on_string_content)
2340+
elsif (opening = node.opening) == "?"
23242341
bounds(node.location)
23252342
on_CHAR("?#{node.content}")
2326-
elsif node.content.empty?
2343+
elsif opening.start_with?("<<~")
2344+
heredoc = visit_string_heredoc_node([node])
2345+
23272346
bounds(node.location)
2328-
on_string_literal(on_string_content)
2347+
on_string_literal(heredoc)
23292348
else
23302349
bounds(node.content_loc)
2331-
content = on_tstring_content(node.content)
2350+
tstring_content = on_tstring_content(content)
23322351

23332352
bounds(node.location)
2334-
on_string_literal(on_string_add(on_string_content, content))
2353+
on_string_literal(on_string_add(on_string_content, tstring_content))
2354+
end
2355+
end
2356+
2357+
# Ripper gives back the escaped string content but strips out the common
2358+
# leading whitespace. Prism gives back the unescaped string content and a
2359+
# location for the escaped string content. Unfortunately these don't work
2360+
# well together, so we need to re-derive the common leading whitespace.
2361+
private def heredoc_common_whitespace(parts)
2362+
common_whitespace = nil
2363+
dedent_next = true
2364+
2365+
parts.each do |part|
2366+
if part.is_a?(StringNode)
2367+
if dedent_next
2368+
common_whitespace = [
2369+
common_whitespace || Float::INFINITY,
2370+
part.content[/\A\s*/].each_char.inject(0) do |part_whitespace, char|
2371+
char == "\t" ? ((part_whitespace / 8 + 1) * 8) : (part_whitespace + 1)
2372+
end
2373+
].min
2374+
end
2375+
2376+
dedent_next = true
2377+
else
2378+
dedent_next = false
2379+
end
2380+
end
2381+
2382+
common_whitespace
2383+
end
2384+
2385+
# Take the content of a string and return the index of the first character
2386+
# that is not trimmed out by eliminating common whitespace.
2387+
private def heredoc_trimmed_whitespace(content, common_whitespace)
2388+
trimmed_whitespace = 0
2389+
2390+
index = 0
2391+
while index < content.length && content[index].match?(/\s/) && trimmed_whitespace < common_whitespace
2392+
if content[index] == "\t"
2393+
trimmed_whitespace = ((trimmed_whitespace / 8 + 1) * 8)
2394+
break if trimmed_whitespace > common_whitespace
2395+
else
2396+
trimmed_whitespace += 1
2397+
end
2398+
2399+
index += 1
2400+
end
2401+
2402+
index
2403+
end
2404+
2405+
# Visit a string that is expressed using a <<~ heredoc.
2406+
private def visit_string_heredoc_node(parts)
2407+
common_whitespace = heredoc_common_whitespace(parts)
2408+
2409+
bounds(parts.first.location)
2410+
parts.inject(on_string_content) do |string_content, part|
2411+
on_string_add(
2412+
string_content,
2413+
if part.is_a?(StringNode)
2414+
content = part.content
2415+
trimmed_whitespace = heredoc_trimmed_whitespace(content, common_whitespace)
2416+
2417+
bounds(part.content_loc.copy(start_offset: part.content_loc.start_offset + trimmed_whitespace))
2418+
on_tstring_content(part.content[trimmed_whitespace..])
2419+
else
2420+
visit(part)
2421+
end
2422+
)
2423+
end
2424+
end
2425+
2426+
# Visit an xstring that is expressed using a <<~ heredoc.
2427+
private def visit_x_string_heredoc_node(parts)
2428+
common_whitespace = heredoc_common_whitespace(parts)
2429+
2430+
bounds(parts.first.location)
2431+
parts.inject(on_xstring_new) do |xstring, part|
2432+
on_xstring_add(
2433+
xstring,
2434+
if part.is_a?(StringNode)
2435+
content = part.content
2436+
trimmed_whitespace = heredoc_trimmed_whitespace(content, common_whitespace)
2437+
2438+
bounds(part.content_loc.copy(start_offset: part.content_loc.start_offset + trimmed_whitespace))
2439+
on_tstring_content(part.content[trimmed_whitespace..])
2440+
else
2441+
visit(part)
2442+
end
2443+
)
23352444
end
23362445
end
23372446

@@ -2514,11 +2623,21 @@ def visit_while_node(node)
25142623
# `foo`
25152624
# ^^^^^
25162625
def visit_x_string_node(node)
2517-
bounds(node.content_loc)
2518-
unescaped = on_tstring_content(node.unescaped)
2626+
if node.unescaped.empty?
2627+
bounds(node.location)
2628+
on_xstring_literal(on_xstring_new)
2629+
elsif node.opening.start_with?("<<~")
2630+
heredoc = visit_x_string_heredoc_node([node])
25192631

2520-
bounds(node.location)
2521-
on_xstring_literal(on_xstring_add(on_xstring_new, unescaped))
2632+
bounds(node.location)
2633+
on_xstring_literal(heredoc)
2634+
else
2635+
bounds(node.content_loc)
2636+
content = on_tstring_content(node.content)
2637+
2638+
bounds(node.location)
2639+
on_xstring_literal(on_xstring_add(on_xstring_new, content))
2640+
end
25222641
end
25232642

25242643
# yield

test/prism/ripper_test.rb

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ class RipperTest < TestCase
4646
seattlerb/block_next.txt
4747
seattlerb/block_paren_splat.txt
4848
seattlerb/block_return.txt
49-
seattlerb/bug190.txt
5049
seattlerb/bug_hash_args_trailing_comma.txt
5150
seattlerb/bug_hash_interp_array.txt
5251
seattlerb/call_args_assoc_quoted.txt
@@ -120,8 +119,6 @@ class RipperTest < TestCase
120119
seattlerb/parse_pattern_076.txt
121120
seattlerb/quoted_symbol_hash_arg.txt
122121
seattlerb/quoted_symbol_keys.txt
123-
seattlerb/regexp_esc_C_slash.txt
124-
seattlerb/regexp_escape_extended.txt
125122
seattlerb/rescue_do_end_ensure_result.txt
126123
seattlerb/rescue_do_end_no_raise.txt
127124
seattlerb/rescue_do_end_raised.txt
@@ -138,7 +135,6 @@ class RipperTest < TestCase
138135
symbols.txt
139136
ternary_operator.txt
140137
tilde_heredocs.txt
141-
unescaping.txt
142138
unless.txt
143139
unparser/corpus/literal/assignment.txt
144140
unparser/corpus/literal/block.txt
@@ -216,7 +212,6 @@ class RipperTest < TestCase
216212
whitequark/ruby_bug_11107.txt
217213
whitequark/ruby_bug_11873.txt
218214
whitequark/ruby_bug_11873_a.txt
219-
whitequark/ruby_bug_11989.txt
220215
whitequark/ruby_bug_11990.txt
221216
whitequark/ruby_bug_15789.txt
222217
whitequark/send_block_chain_cmd.txt
@@ -225,7 +220,6 @@ class RipperTest < TestCase
225220
whitequark/slash_newline_in_heredocs.txt
226221
whitequark/string_concat.txt
227222
whitequark/trailing_forward_arg.txt
228-
xstring.txt
229223
]
230224

231225
relatives.each do |relative|

0 commit comments

Comments
 (0)