Skip to content

Commit 0af69bd

Browse files
committed
fix: handling escaped whitespace in a %w list
Introduces a new flavor of unescaping, YP_UNESCAPE_WHITESPACE, which is the same as MINIMAL but also unescapes whitespace. Note that a spanning_heredoc.txt fixture test is updated to be less wrong, but YARP's behavior doesn't yet fully match Ruby in this case. Fixes #1505
1 parent 0d8d1be commit 0af69bd

File tree

8 files changed

+45
-8
lines changed

8 files changed

+45
-8
lines changed

ext/yarp/extension.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,12 @@ unescape_minimal(VALUE self, VALUE source) {
491491
return unescape(source, YP_UNESCAPE_MINIMAL);
492492
}
493493

494+
// Escape the given string minimally plus whitespace. Returns the unescaped string.
495+
static VALUE
496+
unescape_whitespace(VALUE self, VALUE source) {
497+
return unescape(source, YP_UNESCAPE_WHITESPACE);
498+
}
499+
494500
// Unescape everything in the given string. Return the unescaped string.
495501
static VALUE
496502
unescape_all(VALUE self, VALUE source) {
@@ -608,6 +614,7 @@ Init_yarp(void) {
608614
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
609615
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
610616
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
617+
rb_define_singleton_method(rb_cYARPDebug, "unescape_whitespace", unescape_whitespace, 1);
611618
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
612619
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
613620
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);

include/yarp/unescape.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,13 @@ typedef enum {
2424
// single quotes and backslashes.
2525
YP_UNESCAPE_MINIMAL,
2626

27+
// When we're unescaping a string list, in addition to MINIMAL, we need to
28+
// unescape whitespace.
29+
YP_UNESCAPE_WHITESPACE,
30+
2731
// When we're unescaping a double-quoted string, we need to unescape all
2832
// escapes.
29-
YP_UNESCAPE_ALL
33+
YP_UNESCAPE_ALL,
3034
} yp_unescape_type_t;
3135

3236
// Unescape the contents of the given token into the given string using the given unescape mode.

src/unescape.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,17 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
509509
cursor = backslash + 2;
510510
break;
511511
default:
512-
if (unescape_type == YP_UNESCAPE_MINIMAL) {
512+
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
513+
if (backslash[1] == '\r' && backslash[2] == '\n') {
514+
cursor = backslash + 2;
515+
break;
516+
}
517+
if (yp_strspn_whitespace(backslash + 1, 1)) {
518+
cursor = backslash + 1;
519+
break;
520+
}
521+
}
522+
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
513523
// In this case we're escaping something that doesn't need escaping.
514524
dest[dest_length++] = '\\';
515525
cursor = backslash + 1;
@@ -579,7 +589,16 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
579589
case '\'':
580590
return 2;
581591
default: {
582-
if (unescape_type == YP_UNESCAPE_MINIMAL) {
592+
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
593+
if (backslash[1] == '\r' && backslash[2] == '\n') {
594+
return 2;
595+
}
596+
size_t whitespace = yp_strspn_whitespace(backslash + 1, 1);
597+
if (whitespace > 0) {
598+
return whitespace;
599+
}
600+
}
601+
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
583602
return 1 + yp_char_width(parser, backslash + 1, parser->end);
584603
}
585604

src/yarp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12958,7 +12958,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
1295812958

1295912959
yp_token_t opening = not_provided(parser);
1296012960
yp_token_t closing = not_provided(parser);
12961-
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL);
12961+
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_WHITESPACE);
1296212962
yp_array_node_elements_append(array, string);
1296312963
}
1296412964

test/yarp/snapshots/spanning_heredoc.txt

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/yarp/snapshots/strings.txt

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/yarp/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/yarp/unescape_test.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ def test_escaping_normal_characters
136136
assert_unescape_all("g", "\\g")
137137
end
138138

139+
def test_whitespace_escaping_string_list
140+
assert_equal("a b", Debug.unescape_whitespace("a\\ b"))
141+
assert_equal("a\tb", Debug.unescape_whitespace("a\\\tb"))
142+
assert_equal("a\nb", Debug.unescape_whitespace("a\\\nb"))
143+
assert_equal("a\nb", Debug.unescape_whitespace("a\\\r\nb"))
144+
end
145+
139146
private
140147

141148
def unescape_all(source)

0 commit comments

Comments
 (0)