Skip to content

Commit

Permalink
Fix handling of control/meta escapes in literal regexps
Browse files Browse the repository at this point in the history
Ruby uses a recursive algorithm for handling control/meta escapes
in strings (read_escape).  However, the equivalent code for regexps
(tokadd_escape) in did not use a recursive algorithm.  Due to this,
Handling of control/meta escapes in regexp did not have the same
behavior as in strings, leading to behavior such as the following
returning nil:

```ruby
/\c\xFF/ =~ "\c\xFF"
```

Switch the code for handling \c, \C and \M in literal regexps to
use the same code as for strings (read_escape), to keep behavior
consistent between the two.

Fixes [Bug #14367]
  • Loading branch information
jeremyevans committed May 13, 2021
1 parent 9484f9e commit 11ae581
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 34 deletions.
50 changes: 17 additions & 33 deletions parse.y
Original file line number Diff line number Diff line change
Expand Up @@ -6902,10 +6902,8 @@ static int
tokadd_escape(struct parser_params *p, rb_encoding **encp)
{
int c;
int flags = 0;
size_t numlen;

first:
switch (c = nextc(p)) {
case '\n':
return 0; /* just ignore */
Expand All @@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp)
}
return 0;

case 'M':
if (flags & ESCAPE_META) goto eof;
if ((c = nextc(p)) != '-') {
pushback(p, c);
goto eof;
}
tokcopy(p, 3);
flags |= ESCAPE_META;
goto escaped;

case 'C':
if (flags & ESCAPE_CONTROL) goto eof;
if ((c = nextc(p)) != '-') {
pushback(p, c);
goto eof;
}
tokcopy(p, 3);
goto escaped;

case 'c':
if (flags & ESCAPE_CONTROL) goto eof;
tokcopy(p, 2);
flags |= ESCAPE_CONTROL;
escaped:
if ((c = nextc(p)) == '\\') {
goto first;
}
else if (c == -1) goto eof;
tokadd(p, c);
return 0;

eof:
case -1:
yyerror0("Invalid escape character syntax");
Expand Down Expand Up @@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p,
goto non_ascii;
}
if (func & STR_FUNC_REGEXP) {
switch (c) {
case 'c':
case 'C':
case 'M': {
pushback(p, c);
c = read_escape(p, 0, enc);

int i;
char escbuf[5];
snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
for(i = 0; i < 4; i++) {
tokadd(p, escbuf[i]);
}
continue;
}
}

if (c == term && !simple_re_meta(c)) {
tokadd(p, c);
continue;
Expand Down
2 changes: 1 addition & 1 deletion spec/ruby/language/regexp/interpolation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def o.to_s

it "gives precedence to escape sequences over substitution" do
str = "J"
/\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
/\c#{str}/.to_s.should include('{str}')
end

it "throws RegexpError for malformed interpolation" do
Expand Down
18 changes: 18 additions & 0 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,24 @@ def test_initialize
assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
end

def test_match_control_meta_escape
assert_equal(0, /\c\xFF/ =~ "\c\xFF")
assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")

assert_nil(/\c\xFE/ =~ "\c\xFF")
assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
assert_nil(/\C-\xFE/ =~ "\C-\xFF")
assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
assert_nil(/\M-\xFE/ =~ "\M-\xFF")
assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
end

def test_unescape
assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)
Expand Down

0 comments on commit 11ae581

Please sign in to comment.