Skip to content

Commit

Permalink
Avoid race condition in Regexp#match
Browse files Browse the repository at this point in the history
In certain conditions, Regexp#match could return a MatchData with
missing captures.  This seems to require at the least, multiple
threads calling a method that calls the same block/proc/lambda
which calls Regexp#match.

The race condition happens because the MatchData is passed from
indirectly via the backref, and other threads can modify the
backref.

Fix the issue by:

1. Not reusing the existing MatchData from the backref, and always
   allocating a new MatchData.
2. Passing the MatchData directly to the caller using a VALUE*,
   instead of indirectly through the backref.

It's likely that variants of this issue exist for other Regexp
methods.  Anywhere that MatchData is passed implicitly through
the backref is probably vulnerable to this issue.

Fixes [Bug #17507]
  • Loading branch information
jeremyevans committed Oct 2, 2021
1 parent d087214 commit abc0304
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 27 deletions.
46 changes: 19 additions & 27 deletions re.c
Expand Up @@ -1615,8 +1615,8 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
}

/* returns byte offset */
long
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
static long
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
{
long result;
VALUE match;
Expand All @@ -1638,18 +1638,7 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
tmpreg = reg != RREGEXP_PTR(re);
if (!tmpreg) RREGEXP(re)->usecnt++;

match = rb_backref_get();
if (!NIL_P(match)) {
if (FL_TEST(match, MATCH_BUSY)) {
match = Qnil;
}
else {
regs = RMATCH_REGS(match);
}
}
if (NIL_P(match)) {
MEMZERO(regs, struct re_registers, 1);
}
MEMZERO(regs, struct re_registers, 1);
if (!reverse) {
range += len;
}
Expand Down Expand Up @@ -1682,24 +1671,28 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
}
}

if (NIL_P(match)) {
int err;
match = match_alloc(rb_cMatch);
err = rb_reg_region_copy(RMATCH_REGS(match), regs);
onig_region_free(regs, 0);
if (err) rb_memerror();
}
match = match_alloc(rb_cMatch);
int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs);
onig_region_free(regs, 0);
if (copy_err) rb_memerror();

if (set_backref_str) {
RMATCH(match)->str = rb_str_new4(str);
}

RMATCH(match)->regexp = re;
rb_backref_set(match);
if (set_match) *set_match = match;

return result;
}

long
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
{
return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
}

long
rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
{
Expand Down Expand Up @@ -3193,7 +3186,7 @@ reg_operand(VALUE s, int check)
}

static long
reg_match_pos(VALUE re, VALUE *strp, long pos)
reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
{
VALUE str = *strp;

Expand All @@ -3212,7 +3205,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos)
}
pos = rb_str_offset(str, pos);
}
return rb_reg_search(re, str, pos, 0);
return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
}

/*
Expand Down Expand Up @@ -3266,7 +3259,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos)
VALUE
rb_reg_match(VALUE re, VALUE str)
{
long pos = reg_match_pos(re, &str, 0);
long pos = reg_match_pos(re, &str, 0, NULL);
if (pos < 0) return Qnil;
pos = rb_str_sublen(str, pos);
return LONG2FIX(pos);
Expand Down Expand Up @@ -3377,7 +3370,7 @@ rb_reg_match2(VALUE re)
static VALUE
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
{
VALUE result, str, initpos;
VALUE result = Qnil, str, initpos;
long pos;

if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
Expand All @@ -3387,12 +3380,11 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re)
pos = 0;
}

pos = reg_match_pos(re, &str, pos);
pos = reg_match_pos(re, &str, pos, &result);
if (pos < 0) {
rb_backref_set(Qnil);
return Qnil;
}
result = rb_backref_get();
rb_match_busy(result);
if (!NIL_P(result) && rb_block_given_p()) {
return rb_yield(result);
Expand Down
21 changes: 21 additions & 0 deletions test/ruby/test_regexp.rb
Expand Up @@ -265,6 +265,27 @@ def test_match_regexp
assert_equal(re, re.match("foo").regexp)
end

def test_match_lambda_multithread
bug17507 = "[ruby-core:101901]"
str = "a-x-foo-bar-baz-z-b"

worker = lambda do
m = /foo-([A-Za-z0-9_\.]+)-baz/.match(str)
assert_equal("bar", m[1], bug17507)

# These two lines are needed to trigger the bug
File.exist? "/tmp"
str.gsub(/foo-bar-baz/, "foo-abc-baz")
end

def self. threaded_test(worker)
6.times.map {Thread.new {10_000.times {worker.call}}}.each(&:join)
end

# The bug only occurs in a method calling a block/proc/lambda
threaded_test(worker)
end

def test_source
bug5484 = '[ruby-core:40364]'
assert_equal('', //.source)
Expand Down

0 comments on commit abc0304

Please sign in to comment.