Skip to content

Commit

Permalink
[ruby/prism] Improve to handle unterminated strings
Browse files Browse the repository at this point in the history
Fix ruby/prism#1946

This fixes to set an error position for unterminated strings to the
opening delimiters. Previously, the error position was set to the end
of the delimiter.

The same fix applies to other string-like literals.

Additionally, this fixes ruby/prism#1946; that is, it adds the last part of the
string even though the string literal does not terminate.

ruby/prism@c1240baafd
  • Loading branch information
makenowjust authored and matzbot committed Dec 1, 2023
1 parent 0e59933 commit 417d700
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 35 deletions.
121 changes: 94 additions & 27 deletions prism/prism.c
Expand Up @@ -9314,7 +9314,9 @@ parser_lex(pm_parser_t *parser) {

// If we were unable to find a breakpoint, then this token hits the
// end of the file.
LEX(PM_TOKEN_EOF);
parser->current.end = parser->end;
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
case PM_LEX_REGEXP: {
// First, we'll set to start of this token to be the current end.
Expand Down Expand Up @@ -9503,7 +9505,9 @@ parser_lex(pm_parser_t *parser) {

// If we were unable to find a breakpoint, then this token hits the
// end of the file.
LEX(PM_TOKEN_EOF);
parser->current.end = parser->end;
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
case PM_LEX_STRING: {
// First, we'll set to start of this token to be the current end.
Expand Down Expand Up @@ -9707,8 +9711,10 @@ parser_lex(pm_parser_t *parser) {
}

// If we've hit the end of the string, then this is an unterminated
// string. In that case we'll return the EOF token.
LEX(PM_TOKEN_EOF);
// string. In that case we'll return a string content token.
parser->current.end = parser->end;
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
case PM_LEX_HEREDOC: {
// First, we'll set to start of this token.
Expand Down Expand Up @@ -9955,8 +9961,10 @@ parser_lex(pm_parser_t *parser) {
}

// If we've hit the end of the string, then this is an unterminated
// heredoc. In that case we'll return the EOF token.
LEX(PM_TOKEN_EOF);
// heredoc. In that case we'll return a string content token.
parser->current.end = parser->end;
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
}

Expand Down Expand Up @@ -12345,7 +12353,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
}

if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
}

return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
}
Expand All @@ -12366,7 +12378,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
lex_state_set(parser, next_state);
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
}
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
}

Expand Down Expand Up @@ -13401,15 +13417,16 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
// If we don't accept interpolation then we expect the string to
// start with a single string content node.
pm_string_t unescaped;
pm_token_t content;
if (match1(parser, PM_TOKEN_EOF)) {
unescaped = PM_STRING_EMPTY;
content = not_provided(parser);
} else {
unescaped = parser->current_string;
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
content = parser->previous;
}

expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
pm_token_t content = parser->previous;

// It is unfortunately possible to have multiple string content
// nodes in a row in the case that there's heredoc content in
// the middle of the string, like this cursed example:
Expand Down Expand Up @@ -13438,6 +13455,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
Expand Down Expand Up @@ -13474,6 +13494,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {

if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
Expand All @@ -13494,6 +13517,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {

if (accept1(parser, PM_TOKEN_LABEL_END)) {
node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
Expand Down Expand Up @@ -15097,7 +15123,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
}
case PM_TOKEN_PERCENT_LOWER_I: {
parser_lex(parser);
pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);

while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
Expand All @@ -15112,14 +15139,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
pm_array_node_close_set(array, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
}
pm_array_node_close_set(array, &closing);

return (pm_node_t *) array;
}
case PM_TOKEN_PERCENT_UPPER_I: {
parser_lex(parser);
pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);

// This is the current node that we are parsing that will be added to the
// list of elements.
Expand Down Expand Up @@ -15259,14 +15293,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
pm_array_node_elements_append(array, current);
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
pm_array_node_close_set(array, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
}
pm_array_node_close_set(array, &closing);

return (pm_node_t *) array;
}
case PM_TOKEN_PERCENT_LOWER_W: {
parser_lex(parser);
pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);

// skip all leading whitespaces
accept1(parser, PM_TOKEN_WORDS_SEP);
Expand All @@ -15286,14 +15327,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
pm_array_node_close_set(array, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
}
pm_array_node_close_set(array, &closing);

return (pm_node_t *) array;
}
case PM_TOKEN_PERCENT_UPPER_W: {
parser_lex(parser);
pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);

// This is the current node that we are parsing that will be added to the
// list of elements.
Expand Down Expand Up @@ -15413,8 +15461,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
pm_array_node_elements_append(array, current);
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
pm_array_node_close_set(array, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
}
pm_array_node_close_set(array, &closing);

return (pm_node_t *) array;
}
Expand Down Expand Up @@ -15478,8 +15532,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
}
}

expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
pm_interpolated_regular_expression_node_closing_set(node, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
}
pm_interpolated_regular_expression_node_closing_set(node, &closing);

return (pm_node_t *) node;
}
Expand Down Expand Up @@ -15544,8 +15604,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
}
}

expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
pm_interpolated_xstring_node_closing_set(node, &parser->previous);
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
}
pm_interpolated_xstring_node_closing_set(node, &closing);

return (pm_node_t *) node;
}
case PM_TOKEN_USTAR: {
Expand Down
16 changes: 8 additions & 8 deletions test/prism/errors_test.rb
Expand Up @@ -110,45 +110,45 @@ def test_unterminated_embdoc

def test_unterminated_i_list
assert_errors expression("%i["), "%i[", [
["expected a closing delimiter for the `%i` list", 3..3]
["expected a closing delimiter for the `%i` list", 0..3]
]
end

def test_unterminated_w_list
assert_errors expression("%w["), "%w[", [
["expected a closing delimiter for the `%w` list", 3..3]
["expected a closing delimiter for the `%w` list", 0..3]
]
end

def test_unterminated_W_list
assert_errors expression("%W["), "%W[", [
["expected a closing delimiter for the `%W` list", 3..3]
["expected a closing delimiter for the `%W` list", 0..3]
]
end

def test_unterminated_regular_expression
assert_errors expression("/hello"), "/hello", [
["expected a closing delimiter for the regular expression", 1..1]
["expected a closing delimiter for the regular expression", 0..1]
]
end

def test_unterminated_regular_expression_with_heredoc
source = "<<-END + /b\nEND\n"

assert_errors expression(source), source, [
["expected a closing delimiter for the regular expression", 16..16]
["expected a closing delimiter for the regular expression", 9..10]
]
end

def test_unterminated_xstring
assert_errors expression("`hello"), "`hello", [
["expected a closing delimiter for the `%x` or backtick string", 1..1]
["expected a closing delimiter for the `%x` or backtick string", 0..1]
]
end

def test_unterminated_string
assert_errors expression('"hello'), '"hello', [
["expected a closing delimiter for the interpolated string", 1..1]
["expected a closing delimiter for the interpolated string", 0..1]
]
end

Expand All @@ -161,7 +161,7 @@ def test_incomplete_instance_var_string

def test_unterminated_s_symbol
assert_errors expression("%s[abc"), "%s[abc", [
["expected a closing delimiter for the dynamic symbol", 3..3]
["expected a closing delimiter for the dynamic symbol", 0..3]
]
end

Expand Down

0 comments on commit 417d700

Please sign in to comment.