Skip to content

Commit 087cd8f

Browse files
committed
Use token buffer for string lexing
1 parent 341e027 commit 087cd8f

File tree

1 file changed

+17
-43
lines changed

1 file changed

+17
-43
lines changed

src/prism.c

Lines changed: 17 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8228,8 +8228,7 @@ parser_lex(pm_parser_t *parser) {
82288228

82298229
// If we haven't found an escape yet, then this buffer will be
82308230
// unallocated since we can refer directly to the source string.
8231-
pm_buffer_t buffer = (pm_buffer_t) { .value = NULL, .length = 0, .capacity = 0 };
8232-
const uint8_t *buffer_cursor = NULL;
8231+
pm_token_buffer_t token_buffer = { 0 };
82338232

82348233
while (breakpoint != NULL) {
82358234
// If we hit the incrementor, then we'll increment then nesting and
@@ -8256,14 +8255,7 @@ parser_lex(pm_parser_t *parser) {
82568255
// then we need to return that content as string content first.
82578256
if (breakpoint > parser->current.start) {
82588257
parser->current.end = breakpoint;
8259-
8260-
if (buffer_cursor == NULL) {
8261-
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
8262-
} else {
8263-
pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
8264-
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
8265-
}
8266-
8258+
pm_token_buffer_flush(parser, &token_buffer);
82678259
LEX(PM_TOKEN_STRING_CONTENT);
82688260
}
82698261

@@ -8300,14 +8292,7 @@ parser_lex(pm_parser_t *parser) {
83008292
} else {
83018293
parser->current.end = breakpoint + 1;
83028294
parser_flush_heredoc_end(parser);
8303-
8304-
if (buffer_cursor == NULL) {
8305-
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
8306-
} else {
8307-
pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
8308-
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
8309-
}
8310-
8295+
pm_token_buffer_flush(parser, &token_buffer);
83118296
LEX(PM_TOKEN_STRING_CONTENT);
83128297
}
83138298
}
@@ -8319,14 +8304,8 @@ parser_lex(pm_parser_t *parser) {
83198304
break;
83208305
case '\\': {
83218306
// Here we hit escapes.
8322-
if (buffer_cursor == NULL) {
8323-
pm_buffer_init_capacity(&buffer, 16);
8324-
pm_buffer_append_bytes(&buffer, parser->current.start, (size_t) (breakpoint - parser->current.start));
8325-
} else {
8326-
pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (breakpoint - buffer_cursor));
8327-
}
8328-
83298307
parser->current.end = breakpoint + 1;
8308+
pm_token_buffer_escape(parser, &token_buffer);
83308309

83318310
// If we've hit the end of the file, then break out of
83328311
// the loop by setting the breakpoint to NULL.
@@ -8338,31 +8317,31 @@ parser_lex(pm_parser_t *parser) {
83388317
uint8_t peeked = peek(parser);
83398318
switch (peeked) {
83408319
case '\\':
8341-
pm_buffer_append_u8(&buffer, '\\');
8320+
pm_token_buffer_push(&token_buffer, '\\');
83428321
parser->current.end++;
83438322
break;
83448323
case '\r':
83458324
parser->current.end++;
83468325
if (peek(parser) != '\n') {
83478326
if (!lex_mode->as.string.interpolation) {
8348-
pm_buffer_append_u8(&buffer, '\\');
8327+
pm_token_buffer_push(&token_buffer, '\\');
83498328
}
8350-
pm_buffer_append_u8(&buffer, '\r');
8329+
pm_token_buffer_push(&token_buffer, '\r');
83518330
break;
83528331
}
83538332
/* fallthrough */
83548333
case '\n':
83558334
if (!lex_mode->as.string.interpolation) {
8356-
pm_buffer_append_u8(&buffer, '\\');
8357-
pm_buffer_append_u8(&buffer, '\n');
8335+
pm_token_buffer_push(&token_buffer, '\\');
8336+
pm_token_buffer_push(&token_buffer, '\n');
83588337
}
83598338

83608339
if (parser->heredoc_end) {
83618340
// ... if we are on the same line as a heredoc,
83628341
// flush the heredoc and continue parsing after
83638342
// heredoc_end.
83648343
parser_flush_heredoc_end(parser);
8365-
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
8344+
pm_token_buffer_copy(parser, &token_buffer);
83668345
LEX(PM_TOKEN_STRING_CONTENT);
83678346
} else {
83688347
// ... else track the newline.
@@ -8373,23 +8352,23 @@ parser_lex(pm_parser_t *parser) {
83738352
break;
83748353
default:
83758354
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
8376-
pm_buffer_append_u8(&buffer, peeked);
8355+
pm_token_buffer_push(&token_buffer, peeked);
83778356
parser->current.end++;
83788357
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
8379-
pm_buffer_append_u8(&buffer, peeked);
8358+
pm_token_buffer_push(&token_buffer, peeked);
83808359
parser->current.end++;
83818360
} else if (lex_mode->as.string.interpolation) {
8382-
escape_read(parser, &buffer, PM_ESCAPE_FLAG_NONE);
8361+
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
83838362
} else {
8384-
pm_buffer_append_u8(&buffer, '\\');
8385-
pm_buffer_append_u8(&buffer, peeked);
8363+
pm_token_buffer_push(&token_buffer, '\\');
8364+
pm_token_buffer_push(&token_buffer, peeked);
83868365
parser->current.end++;
83878366
}
83888367

83898368
break;
83908369
}
83918370

8392-
buffer_cursor = parser->current.end;
8371+
token_buffer.cursor = parser->current.end;
83938372
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
83948373
break;
83958374
}
@@ -8406,12 +8385,7 @@ parser_lex(pm_parser_t *parser) {
84068385
}
84078386

84088387
if (type == PM_TOKEN_STRING_CONTENT) {
8409-
if (buffer_cursor == NULL) {
8410-
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
8411-
} else {
8412-
pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
8413-
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
8414-
}
8388+
pm_token_buffer_flush(parser, &token_buffer);
84158389
}
84168390

84178391
LEX(type);

0 commit comments

Comments
 (0)