@@ -12016,139 +12016,155 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12016
12016
return (yp_node_t *) node;
12017
12017
}
12018
12018
case YP_TOKEN_STRING_BEGIN: {
12019
- assert(parser->lex_modes.current->mode == YP_LEX_STRING);
12020
- bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
12019
+ yp_node_t *result = NULL;
12021
12020
12022
- yp_token_t opening = parser->current;
12023
- parser_lex(parser);
12024
-
12025
- yp_node_t *node;
12021
+ while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
12022
+ assert(parser->lex_modes.current->mode == YP_LEX_STRING);
12023
+ bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
12026
12024
12027
- if (accept(parser, YP_TOKEN_STRING_END)) {
12028
- // If we get here, then we have an end immediately after a start. In
12029
- // that case we'll create an empty content token and return an
12030
- // uninterpolated string.
12031
- yp_token_t content = (yp_token_t) {
12032
- .type = YP_TOKEN_STRING_CONTENT,
12033
- .start = parser->previous.start,
12034
- .end = parser->previous.start
12035
- };
12025
+ yp_node_t *node = NULL;
12026
+ yp_token_t opening = parser->current;
12027
+ parser_lex(parser);
12036
12028
12037
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
12038
- } else if (accept(parser, YP_TOKEN_LABEL_END)) {
12039
- // If we get here, then we have an end of a label immediately after a
12040
- // start. In that case we'll create an empty symbol node.
12041
- yp_token_t opening = not_provided(parser);
12042
- yp_token_t content = (yp_token_t) {
12043
- .type = YP_TOKEN_STRING_CONTENT,
12044
- .start = parser->previous.start,
12045
- .end = parser->previous.start
12046
- };
12029
+ if (accept(parser, YP_TOKEN_STRING_END)) {
12030
+ // If we get here, then we have an end immediately after a
12031
+ // start. In that case we'll create an empty content token
12032
+ // and return an uninterpolated string.
12033
+ yp_token_t content = (yp_token_t) {
12034
+ .type = YP_TOKEN_STRING_CONTENT,
12035
+ .start = parser->previous.start,
12036
+ .end = parser->previous.start
12037
+ };
12038
+
12039
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
12040
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
12041
+ // If we get here, then we have an end of a label
12042
+ // immediately after a start. In that case we'll create an
12043
+ // empty symbol node.
12044
+ yp_token_t opening = not_provided(parser);
12045
+ yp_token_t content = (yp_token_t) {
12046
+ .type = YP_TOKEN_STRING_CONTENT,
12047
+ .start = parser->previous.start,
12048
+ .end = parser->previous.start
12049
+ };
12050
+
12051
+ node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
12052
+ } else if (!lex_interpolation) {
12053
+ // If we don't accept interpolation then we expect the
12054
+ // string to start with a single string content node.
12055
+ expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
12056
+ yp_token_t content = parser->previous;
12057
+
12058
+ // It is unfortunately possible to have multiple string
12059
+ // content nodes in a row in the case that there's heredoc
12060
+ // content in the middle of the string, like this cursed
12061
+ // example:
12062
+ //
12063
+ // <<-END+'b
12064
+ // a
12065
+ // END
12066
+ // c'+'d'
12067
+ //
12068
+ // In that case we need to switch to an interpolated string
12069
+ // to be able to contain all of the parts.
12070
+ if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
12071
+ yp_node_list_t parts = YP_EMPTY_NODE_LIST;
12047
12072
12048
- return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
12049
- } else if (!lex_interpolation) {
12050
- // If we don't accept interpolation then we expect the string to start
12051
- // with a single string content node.
12052
- expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
12053
- yp_token_t content = parser->previous;
12073
+ yp_token_t delimiters = not_provided(parser);
12074
+ yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
12075
+ yp_node_list_append(&parts, part);
12054
12076
12055
- // It is unfortunately possible to have multiple string content nodes in
12056
- // a row in the case that there's heredoc content in the middle of the
12057
- // string, like this cursed example:
12058
- //
12059
- // <<-END+'b
12060
- // a
12061
- // END
12062
- // c'+'d'
12063
- //
12064
- // In that case we need to switch to an interpolated string to be able
12065
- // to contain all of the parts.
12066
- if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
12067
- yp_node_list_t parts = YP_EMPTY_NODE_LIST;
12077
+ while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
12078
+ part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
12079
+ yp_node_list_append(&parts, part);
12080
+ }
12068
12081
12069
- yp_token_t delimiters = not_provided(parser);
12070
- yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
12071
- yp_node_list_append(&parts, part);
12082
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
12083
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12084
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
12085
+ node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12086
+ } else {
12087
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
12088
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
12089
+ }
12090
+ } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
12091
+ // In this case we've hit string content so we know the string at
12092
+ // least has something in it. We'll need to check if the following
12093
+ // token is the end (in which case we can return a plain string) or if
12094
+ // it's not then it has interpolation.
12095
+ yp_token_t content = parser->current;
12096
+ parser_lex(parser);
12072
12097
12073
- while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
12074
- part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
12098
+ if (accept(parser, YP_TOKEN_STRING_END)) {
12099
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12100
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
12101
+ node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12102
+ } else {
12103
+ // If we get here, then we have interpolation so we'll need to create
12104
+ // a string or symbol node with interpolation.
12105
+ yp_node_list_t parts = YP_EMPTY_NODE_LIST;
12106
+ yp_token_t string_opening = not_provided(parser);
12107
+ yp_token_t string_closing = not_provided(parser);
12108
+ yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
12075
12109
yp_node_list_append(&parts, part);
12076
- }
12077
-
12078
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
12079
- return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12080
- }
12081
12110
12082
- if (accept(parser, YP_TOKEN_LABEL_END)) {
12083
- return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12084
- }
12085
-
12086
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
12087
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
12088
- } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
12089
- // In this case we've hit string content so we know the string at
12090
- // least has something in it. We'll need to check if the following
12091
- // token is the end (in which case we can return a plain string) or if
12092
- // it's not then it has interpolation.
12093
- yp_token_t content = parser->current;
12094
- parser_lex(parser);
12111
+ while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
12112
+ yp_node_t *part = parse_string_part(parser);
12113
+ if (part != NULL) yp_node_list_append(&parts, part);
12114
+ }
12095
12115
12096
- if (accept(parser, YP_TOKEN_STRING_END)) {
12097
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12098
- } else if (accept(parser, YP_TOKEN_LABEL_END)) {
12099
- return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
12116
+ if (accept(parser, YP_TOKEN_LABEL_END)) {
12117
+ node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12118
+ } else {
12119
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
12120
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12121
+ }
12122
+ }
12100
12123
} else {
12101
- // If we get here, then we have interpolation so we'll need to create
12102
- // a string or symbol node with interpolation.
12124
+ // If we get here, then the first part of the string is not plain string
12125
+ // content, in which case we need to parse the string as an interpolated
12126
+ // string.
12103
12127
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
12104
- yp_token_t string_opening = not_provided(parser);
12105
- yp_token_t string_closing = not_provided(parser);
12106
- yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
12107
- yp_node_list_append(&parts, part);
12108
12128
12109
12129
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
12110
12130
yp_node_t *part = parse_string_part(parser);
12111
12131
if (part != NULL) yp_node_list_append(&parts, part);
12112
12132
}
12113
12133
12114
12134
if (accept(parser, YP_TOKEN_LABEL_END)) {
12115
- return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12135
+ node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12136
+ } else {
12137
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
12138
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12116
12139
}
12117
-
12118
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
12119
- node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12120
12140
}
12121
- } else {
12122
- // If we get here, then the first part of the string is not plain string
12123
- // content, in which case we need to parse the string as an interpolated
12124
- // string.
12125
- yp_node_list_t parts = YP_EMPTY_NODE_LIST;
12126
12141
12127
- while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
12128
- yp_node_t *part = parse_string_part(parser);
12129
- if (part != NULL) yp_node_list_append(&parts, part);
12130
- }
12142
+ if (result == NULL) {
12143
+ // If the node we just parsed is a symbol node, then we
12144
+ // can't concatenate it with anything else, so we can now
12145
+ // return that node.
12146
+ if (YP_NODE_TYPE_P(node, YP_NODE_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_SYMBOL_NODE)) {
12147
+ return node;
12148
+ }
12131
12149
12132
- if (accept(parser, YP_TOKEN_LABEL_END)) {
12133
- return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12134
- }
12150
+ // If we don't already have a node, then it's fine and we
12151
+ // can just set the result to be the node we just parsed.
12152
+ result = node;
12153
+ } else {
12154
+ // Otherwise we need to check the type of the node we just
12155
+ // parsed. If it cannot be concatenated with the previous
12156
+ // node, then we'll need to add a syntax error.
12157
+ if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE)) {
12158
+ yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Unexpected string concatenation.");
12159
+ }
12135
12160
12136
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
12137
- node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
12161
+ // Either way we will create a concat node to hold the
12162
+ // strings together.
12163
+ result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
12164
+ }
12138
12165
}
12139
12166
12140
- // If there's a string immediately following this string, then it's a
12141
- // concatenatation. In this case we'll parse the next string and create a
12142
- // node in the tree that concatenates the two strings.
12143
- if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
12144
- return (yp_node_t *) yp_string_concat_node_create(
12145
- parser,
12146
- node,
12147
- parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
12148
- );
12149
- } else {
12150
- return node;
12151
- }
12167
+ return result;
12152
12168
}
12153
12169
case YP_TOKEN_SYMBOL_BEGIN: {
12154
12170
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
0 commit comments