Skip to content

Commit

Permalink
Fix up tilde heredoc line continuations
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton committed Mar 7, 2024
1 parent f372c6f commit 15e74b2
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 21 deletions.
3 changes: 3 additions & 0 deletions include/prism/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ typedef struct pm_lex_mode {
* a tilde heredoc.
*/
size_t common_whitespace;

/** True if the previous token ended with a line continuation. */
bool line_continuation;
} heredoc;
} as;

Expand Down
38 changes: 30 additions & 8 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -9450,7 +9450,8 @@ parser_lex(pm_parser_t *parser) {
.next_start = parser->current.end,
.quote = quote,
.indent = indent,
.common_whitespace = (size_t) -1
.common_whitespace = (size_t) -1,
.line_continuation = false
}
});

Expand Down Expand Up @@ -10719,6 +10720,9 @@ parser_lex(pm_parser_t *parser) {
// current lex mode.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;

bool line_continuation = lex_mode->as.heredoc.line_continuation;
lex_mode->as.heredoc.line_continuation = false;

// We'll check if we're at the end of the file. If we are, then we
// will add an error (because we weren't able to find the
// terminator) but still continue parsing so that content after the
Expand All @@ -10736,7 +10740,7 @@ parser_lex(pm_parser_t *parser) {

// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
if (current_token_starts_line(parser)) {
if (!line_continuation && current_token_starts_line(parser)) {
const uint8_t *start = parser->current.start;
if (start + ident_length <= parser->end) {
const uint8_t *newline = next_newline(start, parser->end - start);
Expand Down Expand Up @@ -10808,7 +10812,7 @@ parser_lex(pm_parser_t *parser) {

const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
pm_token_buffer_t token_buffer = { { 0 }, 0 };
bool was_escaped_newline = false;
bool was_line_continuation = false;

while (breakpoint != NULL) {
switch (*breakpoint) {
Expand All @@ -10831,7 +10835,7 @@ parser_lex(pm_parser_t *parser) {
// some leading whitespace.
const uint8_t *start = breakpoint + 1;

if (!was_escaped_newline && (start + ident_length <= parser->end)) {
if (!was_line_continuation && (start + ident_length <= parser->end)) {
// We want to match the terminator starting from the end of the line in case
// there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
const uint8_t *newline = next_newline(start, parser->end - start);
Expand Down Expand Up @@ -10873,15 +10877,14 @@ parser_lex(pm_parser_t *parser) {
// heredoc here as string content. Then, the next time a
// token is lexed, it will match again and return the
// end of the heredoc.

if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
lex_mode->as.heredoc.common_whitespace = whitespace;
}

parser->current.end = breakpoint + 1;

if (!was_escaped_newline) {
if (!was_line_continuation) {
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
Expand Down Expand Up @@ -10943,7 +10946,26 @@ parser_lex(pm_parser_t *parser) {
}
/* fallthrough */
case '\n':
was_escaped_newline = true;
// If we are in a tilde here, we should
// break out of the loop and return the
// string content.
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
const uint8_t *end = parser->current.end;
pm_newline_list_append(&parser->newline_list, end);

// Here we want the buffer to only
// include up to the backslash.
parser->current.end = breakpoint;
pm_token_buffer_flush(parser, &token_buffer);

// Now we can advance the end of the
// token past the newline.
parser->current.end = end + 1;
lex_mode->as.heredoc.line_continuation = true;
LEX(PM_TOKEN_STRING_CONTENT);
}

was_line_continuation = true;
token_buffer.cursor = parser->current.end + 1;
breakpoint = parser->current.end;
continue;
Expand Down Expand Up @@ -10980,7 +11002,7 @@ parser_lex(pm_parser_t *parser) {
assert(false && "unreachable");
}

was_escaped_newline = false;
was_line_continuation = false;
}

if (parser->current.end > parser->current.start) {
Expand Down
2 changes: 2 additions & 0 deletions test/prism/ruby_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class RubyParserTest < TestCase
# https://github.com/seattlerb/ruby_parser/issues/344
failures = crlf | %w[
alias.txt
heredocs_with_ignored_newlines.txt
method_calls.txt
methods.txt
multi_write.txt
Expand All @@ -94,6 +95,7 @@ class RubyParserTest < TestCase
whitequark/lvar_injecting_match.txt
whitequark/not.txt
whitequark/op_asgn_cmd.txt
whitequark/parser_bug_640.txt
whitequark/parser_slash_slash_n_escaping_in_literals.txt
whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt
whitequark/pattern_matching_single_line.txt
Expand Down
14 changes: 10 additions & 4 deletions test/prism/snapshots/heredocs_with_ignored_newlines.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 15 additions & 5 deletions test/prism/snapshots/whitequark/parser_bug_640.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 10 additions & 4 deletions test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions test/prism/unescape_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ def assert_unescape(context, escape)
else
assert_equal expected.bytes, actual.bytes, message
end
rescue Exception
binding.irb
end
end
end

0 comments on commit 15e74b2

Please sign in to comment.