Skip to content

Commit be1d8ae

Browse files
committed
Track current_string to pass forward for character literals
1 parent 23b2336 commit be1d8ae

File tree

2 files changed

+19
-20
lines changed

2 files changed

+19
-20
lines changed

include/prism/parser.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "prism/util/pm_list.h"
99
#include "prism/util/pm_newline_list.h"
1010
#include "prism/util/pm_state_stack.h"
11+
#include "prism/util/pm_string.h"
1112

1213
#include <stdbool.h>
1314

@@ -393,6 +394,10 @@ struct pm_parser {
393394
// when we find tokens that we need it for.
394395
pm_node_flags_t integer_base;
395396

397+
// This string is used to pass information from the lexer to the parser. It
398+
// is particularly necessary because of escape sequences.
399+
pm_string_t current_string;
400+
396401
// Whether or not we're at the beginning of a command
397402
bool command_start;
398403

src/prism.c

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4860,8 +4860,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
48604860
return node;
48614861
}
48624862

4863-
4864-
#undef PM_EMPTY_STRING
48654863
#undef PM_ALLOC_NODE
48664864

48674865
/******************************************************************************/
@@ -6099,6 +6097,7 @@ lex_question_mark(pm_parser_t *parser) {
60996097

61006098
if (parser->current.end >= parser->end) {
61016099
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
6100+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
61026101
return PM_TOKEN_CHARACTER_LITERAL;
61036102
}
61046103

@@ -6112,6 +6111,8 @@ lex_question_mark(pm_parser_t *parser) {
61126111
if (parser->current.start[1] == '\\') {
61136112
lex_state_set(parser, PM_LEX_STATE_END);
61146113
parser->current.end += pm_unescape_calculate_difference(parser, parser->current.start + 1, PM_UNESCAPE_ALL, true);
6114+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
6115+
pm_unescape_manipulate_char_literal(parser, &parser->current_string, PM_UNESCAPE_ALL);
61156116
return PM_TOKEN_CHARACTER_LITERAL;
61166117
} else {
61176118
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -6128,6 +6129,7 @@ lex_question_mark(pm_parser_t *parser) {
61286129
) {
61296130
lex_state_set(parser, PM_LEX_STATE_END);
61306131
parser->current.end += encoding_width;
6132+
pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
61316133
return PM_TOKEN_CHARACTER_LITERAL;
61326134
}
61336135
}
@@ -8077,17 +8079,6 @@ pm_symbol_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *openin
80778079
return node;
80788080
}
80798081

8080-
static pm_string_node_t *
8081-
pm_char_literal_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
8082-
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
8083-
8084-
assert((content->end - content->start) >= 0);
8085-
pm_string_shared_init(&node->unescaped, content->start, content->end);
8086-
8087-
pm_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
8088-
return node;
8089-
}
8090-
80918082
static pm_string_node_t *
80928083
pm_string_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
80938084
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
@@ -11763,16 +11754,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
1176311754
content.start = content.start + 1;
1176411755

1176511756
pm_token_t closing = not_provided(parser);
11766-
pm_node_t *node = (pm_node_t *) pm_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, PM_UNESCAPE_ALL);
11757+
pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create(parser, &opening, &content, &closing);
11758+
node->unescaped = parser->current_string;
1176711759

1176811760
// Characters can be followed by strings in which case they are
1176911761
// automatically concatenated.
1177011762
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
1177111763
pm_node_t *concat = parse_strings(parser);
11772-
return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
11764+
return (pm_node_t *) pm_string_concat_node_create(parser, (pm_node_t *) node, concat);
1177311765
}
1177411766

11775-
return node;
11767+
return (pm_node_t *) node;
1177611768
}
1177711769
case PM_TOKEN_CLASS_VARIABLE: {
1177811770
parser_lex(parser);
@@ -14538,6 +14530,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
1453814530
.constant_pool = PM_CONSTANT_POOL_EMPTY,
1453914531
.newline_list = PM_NEWLINE_LIST_EMPTY,
1454014532
.integer_base = 0,
14533+
.current_string = PM_EMPTY_STRING,
1454114534
.command_start = true,
1454214535
.recovering = false,
1454314536
.encoding_changed = false,
@@ -14675,10 +14668,11 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
1467514668
pm_parser_free(&parser);
1467614669
}
1467714670

14678-
#undef PM_LOCATION_NULL_VALUE
14679-
#undef PM_LOCATION_TOKEN_VALUE
14680-
#undef PM_LOCATION_NODE_VALUE
14681-
#undef PM_LOCATION_NODE_BASE_VALUE
1468214671
#undef PM_CASE_KEYWORD
1468314672
#undef PM_CASE_OPERATOR
1468414673
#undef PM_CASE_WRITABLE
14674+
#undef PM_EMPTY_STRING
14675+
#undef PM_LOCATION_NODE_BASE_VALUE
14676+
#undef PM_LOCATION_NODE_VALUE
14677+
#undef PM_LOCATION_NULL_VALUE
14678+
#undef PM_LOCATION_TOKEN_VALUE

0 commit comments

Comments
 (0)