@@ -17402,6 +17402,51 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17402
17402
}
17403
17403
}
17404
17404
17405
+ /**
17406
+ * This struct is used to pass information between the regular expression parser
17407
+ * and the error callback.
17408
+ */
17409
+ typedef struct {
17410
+ pm_parser_t *parser;
17411
+ const uint8_t *start;
17412
+ const uint8_t *end;
17413
+ bool shared;
17414
+ } parse_regular_expression_error_data_t;
17415
+
17416
+ /**
17417
+ * This callback is called when the regular expression parser encounters a
17418
+ * syntax error.
17419
+ */
17420
+ static void
17421
+ parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17422
+ parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
17423
+ pm_location_t location;
17424
+
17425
+ if (callback_data->shared) {
17426
+ location = (pm_location_t) { .start = start, .end = end };
17427
+ } else {
17428
+ location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17429
+ }
17430
+
17431
+ PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17432
+ }
17433
+
17434
+ /**
17435
+ * Parse the errors for the regular expression and add them to the parser.
17436
+ */
17437
+ static void
17438
+ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17439
+ const pm_string_t *unescaped = &node->unescaped;
17440
+ parse_regular_expression_error_data_t error_data = {
17441
+ .parser = parser,
17442
+ .start = node->base.location.start,
17443
+ .end = node->base.location.end,
17444
+ .shared = unescaped->type == PM_STRING_SHARED
17445
+ };
17446
+
17447
+ pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
17448
+ }
17449
+
17405
17450
/**
17406
17451
* Parse an expression that begins with the previous node that we just lexed.
17407
17452
*/
@@ -19523,13 +19568,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19523
19568
bool ascii_only = parser->current_regular_expression_ascii_only;
19524
19569
parser_lex(parser);
19525
19570
19526
- // If we hit an end, then we can create a regular expression node
19527
- // without interpolation, which can be represented more succinctly and
19528
- // more easily compiled.
19571
+ // If we hit an end, then we can create a regular expression
19572
+ // node without interpolation, which can be represented more
19573
+ // succinctly and more easily compiled.
19529
19574
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19530
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19531
- pm_node_flag_set(node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->flags));
19532
- return node;
19575
+ pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19576
+
19577
+ // If we're not immediately followed by a =~, then we want
19578
+ // to parse all of the errors at this point. If it is
19579
+ // followed by a =~, then it will get parsed higher up while
19580
+ // parsing the named captures as well.
19581
+ if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19582
+ parse_regular_expression_errors(parser, node);
19583
+ }
19584
+
19585
+ pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
19586
+ return (pm_node_t *) node;
19533
19587
}
19534
19588
19535
19589
// If we get here, then we have interpolation so we'll need to create
@@ -20095,38 +20149,6 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20095
20149
}
20096
20150
}
20097
20151
20098
- /**
20099
- * This struct is used to pass information between the regular expression parser
20100
- * and the error callback.
20101
- */
20102
- typedef struct {
20103
- pm_parser_t *parser;
20104
- const pm_string_t *content;
20105
- const pm_call_node_t *call;
20106
- } parse_regular_expression_error_data_t;
20107
-
20108
- /**
20109
- * This callback is called when the regular expression parser encounters a
20110
- * syntax error.
20111
- */
20112
- static void
20113
- parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
20114
- parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
20115
-
20116
- pm_parser_t *parser = callback_data->parser;
20117
- const pm_string_t *content = callback_data->content;
20118
- const pm_call_node_t *call = callback_data->call;
20119
-
20120
- pm_location_t location;
20121
- if (content->type == PM_STRING_SHARED) {
20122
- location = (pm_location_t) { .start = start, .end = end };
20123
- } else {
20124
- location = call->receiver->location;
20125
- }
20126
-
20127
- PM_PARSER_ERR_FORMAT(parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
20128
- }
20129
-
20130
20152
/**
20131
20153
* Potentially change a =~ with a regular expression with named captures into a
20132
20154
* match write node.
@@ -20142,8 +20164,9 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
20142
20164
20143
20165
parse_regular_expression_error_data_t error_data = {
20144
20166
.parser = parser,
20145
- .content = content,
20146
- .call = call
20167
+ .start = call->receiver->location.start,
20168
+ .end = call->receiver->location.end,
20169
+ .shared = content->type == PM_STRING_SHARED
20147
20170
};
20148
20171
20149
20172
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
0 commit comments