Skip to content

Commit 60315d0

Browse files
committed
Rework the list lexer to check terminators properly
1 parent 6d74fd4 commit 60315d0

File tree

1 file changed

+67
-78
lines changed

1 file changed

+67
-78
lines changed

src/yarp.c

Lines changed: 67 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -6657,99 +6657,88 @@ parser_lex(yp_parser_t *parser) {
66576657

66586658
// Here we'll get a list of the places where strpbrk should break,
66596659
// and then find the first one.
6660-
const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
6660+
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
6661+
const char *breakpoints = lex_mode->as.list.breakpoints;
66616662
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
66626663

66636664
while (breakpoint != NULL) {
6664-
switch (*breakpoint) {
6665-
case '\0':
6666-
// If we hit a null byte, skip directly past it.
6667-
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6668-
break;
6669-
case '\\': {
6670-
// If we hit escapes, then we need to treat the next token
6671-
// literally. In this case we'll skip past the next character and
6672-
// find the next breakpoint.
6673-
6674-
yp_unescape_type_t unescape_type;
6675-
if (parser->lex_modes.current->as.list.interpolation) {
6676-
unescape_type = YP_UNESCAPE_ALL;
6677-
} else {
6678-
unescape_type = YP_UNESCAPE_MINIMAL;
6679-
}
6680-
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
6665+
// If we hit a null byte, skip directly past it.
6666+
if (*breakpoint == '\0') {
6667+
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6668+
continue;
6669+
}
66816670

6682-
// If the result is an escaped newline, then we need to
6683-
// track that newline.
6684-
if (breakpoint[difference - 1] == '\n') {
6685-
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
6686-
}
6671+
// If we hit whitespace, then we must have received content by
6672+
// now, so we can return an element of the list.
6673+
if (yp_char_is_whitespace(*breakpoint)) {
6674+
parser->current.end = breakpoint;
6675+
LEX(YP_TOKEN_STRING_CONTENT);
6676+
}
66876677

6688-
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6689-
break;
6678+
//If we hit the terminator, we need to check which token to
6679+
// return.
6680+
if (*breakpoint == lex_mode->as.list.terminator) {
6681+
// If this terminator doesn't actually close the list, then
6682+
// we need to continue on past it.
6683+
if (lex_mode->as.list.nesting > 0) {
6684+
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6685+
lex_mode->as.list.nesting--;
6686+
continue;
66906687
}
6691-
case ' ':
6692-
case '\t':
6693-
case '\f':
6694-
case '\r':
6695-
case '\v':
6696-
case '\n':
6697-
// If we've hit whitespace, then we must have received content by
6698-
// now, so we can return an element of the list.
6688+
6689+
// If we've hit the terminator and we've already skipped
6690+
// past content, then we can return a list node.
6691+
if (breakpoint > parser->current.start) {
66996692
parser->current.end = breakpoint;
67006693
LEX(YP_TOKEN_STRING_CONTENT);
6701-
case '#': {
6702-
// if # is the terminator, we need to fall into the default case
6703-
if (parser->lex_modes.current->as.list.terminator != '#') {
6704-
yp_token_type_t type = lex_interpolation(parser, breakpoint);
6705-
if (type != YP_TOKEN_NOT_PROVIDED) {
6706-
LEX(type);
6707-
}
6708-
6709-
// If we haven't returned at this point then we had something
6710-
// that looked like an interpolated class or instance variable
6711-
// like "#@" but wasn't actually. In this case we'll just skip
6712-
// to the next breakpoint.
6713-
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6714-
break;
6715-
}
67166694
}
6717-
/* fallthrough */
6718-
default:
6719-
if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
6720-
// If we've hit the incrementor, then we need to skip past it and
6721-
// find the next breakpoint.
6722-
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6723-
parser->lex_modes.current->as.list.nesting++;
6724-
break;
6725-
}
67266695

6727-
// In this case we've hit the terminator.
6728-
assert(*breakpoint == parser->lex_modes.current->as.list.terminator);
6696+
// Otherwise, switch back to the default state and return
6697+
// the end of the list.
6698+
parser->current.end = breakpoint + 1;
6699+
lex_mode_pop(parser);
6700+
lex_state_set(parser, YP_LEX_STATE_END);
6701+
LEX(YP_TOKEN_STRING_END);
6702+
}
67296703

6730-
// If this terminator doesn't actually close the list, then we need
6731-
// to continue on past it.
6732-
if (parser->lex_modes.current->as.list.nesting > 0) {
6733-
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6734-
parser->lex_modes.current->as.list.nesting--;
6735-
break;
6736-
}
6704+
// If we hit escapes, then we need to treat the next token
6705+
// literally. In this case we'll skip past the next character
6706+
// and find the next breakpoint.
6707+
if (*breakpoint == '\\') {
6708+
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6709+
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
67376710

6738-
// If we've hit the terminator and we've already skipped past
6739-
// content, then we can return a list node.
6740-
if (breakpoint > parser->current.start) {
6741-
parser->current.end = breakpoint;
6742-
LEX(YP_TOKEN_STRING_CONTENT);
6743-
}
6711+
// If the result is an escaped newline, then we need to
6712+
// track that newline.
6713+
if (breakpoint[difference - 1] == '\n') {
6714+
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
6715+
}
67446716

6745-
// Otherwise, switch back to the default state and return the end of
6746-
// the list.
6747-
parser->current.end = breakpoint + 1;
6748-
lex_mode_pop(parser);
6717+
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6718+
continue;
6719+
}
67496720

6750-
lex_state_set(parser, YP_LEX_STATE_END);
6751-
LEX(YP_TOKEN_STRING_END);
6721+
// If we hit a #, then we will attempt to lex interpolation.
6722+
if (*breakpoint == '#') {
6723+
yp_token_type_t type = lex_interpolation(parser, breakpoint);
6724+
if (type != YP_TOKEN_NOT_PROVIDED) {
6725+
LEX(type);
6726+
}
6727+
6728+
// If we haven't returned at this point then we had something
6729+
// that looked like an interpolated class or instance variable
6730+
// like "#@" but wasn't actually. In this case we'll just skip
6731+
// to the next breakpoint.
6732+
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6733+
continue;
67526734
}
6735+
6736+
// If we've hit the incrementor, then we need to skip past it
6737+
// and find the next breakpoint.
6738+
assert(*breakpoint == lex_mode->as.list.incrementor);
6739+
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
6740+
lex_mode->as.list.nesting++;
6741+
continue;
67536742
}
67546743

67556744
// If we were unable to find a breakpoint, then this token hits the end of

0 commit comments

Comments
 (0)