Skip to content

Commit

Permalink
Always issue deprecation warning when calling Regexp.new with 3rd pos…
Browse files Browse the repository at this point in the history
…itional argument

Previously, only certain values of the 3rd argument triggered a
deprecation warning.

First step for fix for bug #18797.  Support for the 3rd argument
will be removed after the release of Ruby 3.2.

Fix minor fallout discovered by the tests.

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
  • Loading branch information
jeremyevans and nobu committed Dec 22, 2022
1 parent 9dcee2d commit 7e8fa06
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 63 deletions.
2 changes: 1 addition & 1 deletion lib/racc/statetransitiontable.rb
Expand Up @@ -216,7 +216,7 @@ def mkmapexp(arr)
end
i = ii
end
Regexp.compile(map, nil, 'n')
Regexp.compile(map, Regexp::NOENCODING)
end

def set_table(entries, dummy, tbl, chk, ptr)
Expand Down
24 changes: 10 additions & 14 deletions re.c
Expand Up @@ -3759,10 +3759,11 @@ struct reg_init_args {

static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);

/*
* call-seq:
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp
* Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp
*
* With argument +string+ given, returns a new regexp with the given string
Expand All @@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im
*
* - The logical OR of one or more of the constants
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE:
* Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
* Regexp::NOENCODING:
*
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix
*
* - +nil+ or +false+, which is ignored.
*
* If optional argument +n_flag+ if it is a string starts with
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
* +US-ASCII+, by its content.
*
* Regexp.new('foo', nil, 'n') # => /foo/n
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
*
* If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class,
* Regexp.timeout.
Expand Down Expand Up @@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil;

rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);

args->timeout = Qnil;
if (!NIL_P(kwargs)) {
Expand All @@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
}

if (argc == 3) {
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
}

if (RB_TYPE_P(src, T_REGEXP)) {
re = src;

Expand All @@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE;
}
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
}
}
str = StringValue(src);
}
Expand Down
82 changes: 43 additions & 39 deletions spec/ruby/core/regexp/shared/new.rb
Expand Up @@ -197,48 +197,50 @@ def obj.to_int() ScratchPad.record(:called) end
end
end

it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
ruby_version_is ""..."3.2" do
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end

it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"

Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
end
end

describe "with escaped characters" do
Expand Down Expand Up @@ -598,8 +600,10 @@ def obj.to_int() ScratchPad.record(:called) end
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end

it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
ruby_version_is ''...'3.2' do
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
end
end
end
end
2 changes: 1 addition & 1 deletion test/psych/test_yaml.rb
Expand Up @@ -34,7 +34,7 @@ def test_multiline_regexp

# [ruby-core:34969]
def test_regexp_with_n
assert_cycle(Regexp.new('',0,'n'))
assert_cycle(Regexp.new('',Regexp::NOENCODING))
end
#
# Tests modified from 00basic.t in Psych.pm
Expand Down
40 changes: 32 additions & 8 deletions test/ruby/test_regexp.rb
Expand Up @@ -42,14 +42,14 @@ def test_ruby_dev_24887

def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]"
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")}
r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE")
end

def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8")
assert_warning(/ignored/) {Regexp.new(s, nil, "u")}
assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end
end

Expand Down Expand Up @@ -646,13 +646,37 @@ def test_initialize
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})

assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
assert_equal(//n, Regexp.new("", nil, "n"))
arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value

arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
assert_deprecated_warning('') do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
assert_equal(//, Regexp.new(""))
assert_equal(//, Regexp.new("", timeout: 1))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))

assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
end

assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
end
assert_deprecated_warning(/3\.3/) do
assert_equal(//n, Regexp.new("", nil, "n"))
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
end

assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }
Expand Down

0 comments on commit 7e8fa06

Please sign in to comment.