Skip to content

Commit bd3dd2b

Browse files
committed
Fix parser translator tokens for %-arrays with whitespace escapes
Also fixes a token incompatibility for the word separator. parser only considers whitespace until the first newline
1 parent a0571d9 commit bd3dd2b

File tree

4 files changed

+254
-138
lines changed

4 files changed

+254
-138
lines changed

lib/prism/translation/parser/lexer.rb

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ def to_a
339339
when :tRATIONAL
340340
value = parse_rational(value)
341341
when :tSPACE
342+
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
342343
value = nil
343344
when :tSTRING_BEG
344345
next_token = lexed[index][0]
@@ -395,12 +396,16 @@ def to_a
395396
quote_stack.push(value)
396397
end
397398
when :tSTRING_CONTENT
399+
is_percent_array = percent_array?(quote_stack.last)
400+
398401
if (lines = token.value.lines).one?
399402
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
400403
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
401404
# The parser gem only removes indentation when the heredoc is not nested
402405
not_nested = heredoc_stack.size == 1
403-
if is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
406+
if is_percent_array
407+
value = percent_array_unescape(value)
408+
elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
404409
value = trim_heredoc_whitespace(value, current_heredoc)
405410
end
406411

@@ -417,12 +422,10 @@ def to_a
417422
chomped_line = line.chomp
418423
backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
419424
is_interpolation = interpolation?(quote_stack.last)
420-
is_percent_array = percent_array?(quote_stack.last)
421425

422426
if backslash_count.odd? && (is_interpolation || is_percent_array)
423427
if is_percent_array
424-
# Remove the last backslash, keep potential newlines
425-
current_line << line.sub(/(\\)(\r?\n)\z/, '\2')
428+
current_line << percent_array_unescape(line)
426429
adjustment += 1
427430
else
428431
chomped_line.delete_suffix!("\\")
@@ -701,6 +704,27 @@ def unescape_string(string, quote)
701704
end
702705
end
703706

707+
# In a percent array, certain whitespace can be preceeded with a backslash,
708+
# causing the following characters to be part of the previous element.
709+
def percent_array_unescape(string)
710+
string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
711+
full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
712+
full_match
713+
end
714+
end
715+
716+
# For %-arrays whitespace, the parser gem only considers whitespace before the newline.
717+
def percent_array_leading_whitespace(string)
718+
return 1 if string.start_with?("\n")
719+
720+
leading_whitespace = 0
721+
string.each_char do |c|
722+
break if c == "\n"
723+
leading_whitespace += 1
724+
end
725+
leading_whitespace
726+
end
727+
704728
# Determine if characters preceeded by a backslash should be escaped or not
705729
def interpolation?(quote)
706730
quote != "'" && !quote.start_with?("%q", "%w", "%i")

test/prism/fixtures/strings.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,19 @@ b\nar
6969

7070
%w[foo\ bar baz]
7171

72+
%w[foo\ bar\\ baz\\\
73+
bat]
74+
75+
%w[foo bar]
76+
77+
%w[
78+
a
79+
b c
80+
d
81+
]
82+
83+
%W[f\u{006f 006f}]
84+
7285
%W[a b#{c}d e]
7386

7487
%W[a b c]

test/prism/ruby/parser_test.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ class ParserTest < TestCase
9797
"embdoc_no_newline_at_end.txt",
9898
"heredocs_with_ignored_newlines.txt",
9999
"methods.txt",
100-
"strings.txt",
101100
"seattlerb/bug169.txt",
102101
"seattlerb/case_in.txt",
103102
"seattlerb/difficult4__leading_dots2.txt",

0 commit comments

Comments
 (0)