From cb899a990a02ad86ffc27fa17308fe514cc6415c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 6 Nov 2022 10:16:12 +0900 Subject: [PATCH 001/104] Disable YJIT support when cross-compiling As the target-list of `rustc` is different from `config.guess` and `config.sub`, `$target` cannot be used directly. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9e2ba81a5101f9..bb83865010f165 100644 --- a/configure.ac +++ b/configure.ac @@ -3768,6 +3768,7 @@ AC_ARG_ENABLE(yjit, CARGO= CARGO_BUILD_ARGS= YJIT_LIBS= +AS_IF([test "$cross_compiling" = yes], [YJIT_SUPPORT=no]) AS_CASE(["${YJIT_SUPPORT}"], [yes|dev|stats|dev_nodebug], [ AS_IF([test x"$enable_jit_support" = "xno"], @@ -3776,7 +3777,6 @@ AS_CASE(["${YJIT_SUPPORT}"], AS_IF([test x"$RUSTC" = "xno"], AC_MSG_ERROR([rustc is required. Installation instructions available at https://www.rust-lang.org/tools/install]) ) - AS_IF([test "$cross_compiling" = yes], [RUSTC="$RUSTC --target=$target"]) AS_CASE(["${YJIT_SUPPORT}"], [yes], [ From 9627aab82524e71b702479b4fa5e24b36cced398 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 6 Nov 2022 15:59:32 +0900 Subject: [PATCH 002/104] `--disable-jit-support` should disable YJIT successfully Even if `rustc` is available, it should not be an error unless `--enable-yjit` is explicitly given. --- common.mk | 2 +- configure.ac | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/common.mk b/common.mk index aea4d02c5fa1d0..88934378f4bf35 100644 --- a/common.mk +++ b/common.mk @@ -303,7 +303,7 @@ showflags: " LC_ALL = $(LC_ALL)" \ " LC_CTYPE = $(LC_CTYPE)" \ " MFLAGS = $(MFLAGS)" \ - " RUST = $(RUST)" \ + " RUSTC = $(RUSTC)" \ " YJIT_RUSTC_ARGS = $(YJIT_RUSTC_ARGS)" \ $(MESSAGE_END) -@$(CC_VERSION) diff --git a/configure.ac b/configure.ac index bb83865010f165..8c176db836f3c5 100644 --- a/configure.ac +++ b/configure.ac @@ -3754,15 +3754,19 @@ AC_ARG_ENABLE(yjit, AS_HELP_STRING([--enable-yjit], [enable experimental in-process JIT compiler that requires Rust build tools [default=no]]), [YJIT_SUPPORT=$enableval], - [ - AS_IF([test x"$RUSTC" != "xno"], + [AS_CASE(["$enable_jit_support:$YJIT_TARGET_OK:$RUSTC"], + [no:*|yes:no:*|yes:yes:no], [ + YJIT_SUPPORT=no + ], + [yes:yes:*], [ AS_IF([ echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --emit asm=/dev/null ], - [YJIT_SUPPORT="$YJIT_TARGET_OK"], + [YJIT_SUPPORT=yes], [YJIT_SUPPORT=no] - ), + ) + ], [ [YJIT_SUPPORT=no] - ) - ] + ] + )] ) CARGO= From fc842c9ccc2059e20ec6487b6678e6b71cbd77e1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 6 Nov 2022 19:32:50 +0900 Subject: [PATCH 003/104] Check `rustc` with the target --- configure.ac | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/configure.ac b/configure.ac index 8c176db836f3c5..85e1359f5b59d0 100644 --- a/configure.ac +++ b/configure.ac @@ -3736,17 +3736,28 @@ AC_SUBST(MJIT_SUPPORT) AC_CHECK_PROG(RUSTC, [rustc], [rustc], [no]) dnl no ac_tool_prefix dnl check if we can build YJIT on this target platform -AS_CASE(["$target_cpu-$target_os"], - [arm64-darwin*|aarch64-darwin*|x86_64-darwin*], [ - YJIT_TARGET_OK=yes +AS_CASE(["$target_cpu"], + [arm64|aarch64], [ + YJIT_TARGET=aarch64 + ], + [x86_64], [ + YJIT_TARGET="$target_cpu" + ], + [YJIT_TARGET=] +) +AS_CASE(["$YJIT_TARGET:$target_os"], + [:*], [ # unsupported CPU + ], + [darwin*], [ + YJIT_TARGET=${YJIT_TARGET}-apple-darwin ], - [arm64-*linux*|aarch64-*linux*|x86_64-*linux*], [ - YJIT_TARGET_OK=yes + [linux-android], [ # no target_vendor + YJIT_TARGET=${YJIT_TARGET}-${target_os} ], - [arm64-*bsd*|aarch64-*bsd*|x86_64-*bsd*], [ - YJIT_TARGET_OK=yes + [*linux*], [ + YJIT_TARGET=${YJIT_TARGET}-${target_vendor}-${target_os} ], - [YJIT_TARGET_OK=no] + [YJIT_TARGET=] ) dnl build YJIT in release mode if rustc >= 1.58.0 is present and we are on a supported platform @@ -3754,12 +3765,12 @@ AC_ARG_ENABLE(yjit, AS_HELP_STRING([--enable-yjit], [enable experimental in-process JIT compiler that requires Rust build tools [default=no]]), [YJIT_SUPPORT=$enableval], - [AS_CASE(["$enable_jit_support:$YJIT_TARGET_OK:$RUSTC"], - [no:*|yes:no:*|yes:yes:no], [ + [AS_CASE(["$enable_jit_support:$YJIT_TARGET:$RUSTC"], + [no:*|yes::*|yes:*:no], [ YJIT_SUPPORT=no ], [yes:yes:*], [ - AS_IF([ echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --emit asm=/dev/null ], + AS_IF([ echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --target=$YJIT_TARGET --emit asm=/dev/null ], [YJIT_SUPPORT=yes], [YJIT_SUPPORT=no] ) @@ -3772,7 +3783,6 @@ AC_ARG_ENABLE(yjit, CARGO= CARGO_BUILD_ARGS= YJIT_LIBS= -AS_IF([test "$cross_compiling" = yes], [YJIT_SUPPORT=no]) AS_CASE(["${YJIT_SUPPORT}"], [yes|dev|stats|dev_nodebug], [ AS_IF([test x"$enable_jit_support" = "xno"], From 12883f8fa6222324880e2b0f161f8c6d6cf365c7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 6 Nov 2022 23:39:55 +0900 Subject: [PATCH 004/104] GNU make 4.4 now uses a fifo for the jobserver --- tool/lib/test/unit.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tool/lib/test/unit.rb b/tool/lib/test/unit.rb index 59409f016c85be..fb1a53ce0565c3 100644 --- a/tool/lib/test/unit.rb +++ b/tool/lib/test/unit.rb @@ -286,10 +286,15 @@ def non_options(files, options) @jobserver = nil makeflags = ENV.delete("MAKEFLAGS") if !options[:parallel] and - /(?:\A|\s)--jobserver-(?:auth|fds)=(\d+),(\d+)/ =~ makeflags + /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:(.*))/ =~ makeflags begin - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + if fifo = $3 + r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY, autoclose: false) + w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY, autoclose: false) + else + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + end rescue r.close if r nil From 180d37c817dc55f27b7f4ea1278a36a691491148 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 7 Nov 2022 09:06:15 +0900 Subject: [PATCH 005/104] Set `autoclose:` for inherited FDs only [ci skip] --- tool/lib/test/unit.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tool/lib/test/unit.rb b/tool/lib/test/unit.rb index fb1a53ce0565c3..80d69dd38aa4b0 100644 --- a/tool/lib/test/unit.rb +++ b/tool/lib/test/unit.rb @@ -289,8 +289,8 @@ def non_options(files, options) /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:(.*))/ =~ makeflags begin if fifo = $3 - r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY, autoclose: false) - w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY, autoclose: false) + r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) + w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) else r = IO.for_fd($1.to_i(10), "rb", autoclose: false) w = IO.for_fd($2.to_i(10), "wb", autoclose: false) From ab01b8f23f1e218fa5a360806120a1d6eaaa228f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 7 Nov 2022 09:43:45 +0900 Subject: [PATCH 006/104] jobserver option may not be at the last --- tool/lib/test/unit.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tool/lib/test/unit.rb b/tool/lib/test/unit.rb index 80d69dd38aa4b0..0449fa215a0078 100644 --- a/tool/lib/test/unit.rb +++ b/tool/lib/test/unit.rb @@ -286,9 +286,10 @@ def non_options(files, options) @jobserver = nil makeflags = ENV.delete("MAKEFLAGS") if !options[:parallel] and - /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:(.*))/ =~ makeflags + /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ makeflags begin if fifo = $3 + fifo.gsub!(/\\(?=.)/, '') r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) else From b02b8e77562661865b380d78e7621edfd8149a8f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 7 Nov 2022 10:08:30 +0900 Subject: [PATCH 007/104] Let other test runners follow the change of GNU make 4.4 jobserver --- bootstraptest/runner.rb | 12 +++++++++--- spec/default.mspec | 14 +++++++++++--- spec/ruby/optional/capi/spec_helper.rb | 14 +++++++++++--- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/bootstraptest/runner.rb b/bootstraptest/runner.rb index 1d219be71efbe7..f9b3e919b89e89 100755 --- a/bootstraptest/runner.rb +++ b/bootstraptest/runner.rb @@ -108,10 +108,16 @@ def putc(c) def wn=(wn) unless wn == 1 - if /(?:\A|\s)--jobserver-(?:auth|fds)=\K(\d+),(\d+)/ =~ ENV.delete("MAKEFLAGS") + if /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ ENV.delete("MAKEFLAGS") begin - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + if fifo = $3 + fifo.gsub!(/\\(?=.)/, '') + r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) + w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) + else + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + end rescue => e r.close if r else diff --git a/spec/default.mspec b/spec/default.mspec index 0dba98306c1a33..33ac691b9421dd 100644 --- a/spec/default.mspec +++ b/spec/default.mspec @@ -29,11 +29,19 @@ end module MSpecScript::JobServer def cores(max = 1) - if max > 1 and /(?:\A|\s)--jobserver-(?:auth|fds)=(\d+),(\d+)/ =~ ENV["MAKEFLAGS"] + if max > 1 and /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ ENV["MAKEFLAGS"] cores = 1 begin - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + if fifo = $3 + fifo.gsub!(/\\(?=.)/, '') + r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) + w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) + else + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + end + r.close_on_exec = true + w.close_on_exec = true jobtokens = r.read_nonblock(max - 1) cores = jobtokens.size if cores > 0 diff --git a/spec/ruby/optional/capi/spec_helper.rb b/spec/ruby/optional/capi/spec_helper.rb index ec6b9093977c3d..9c857519746b64 100644 --- a/spec/ruby/optional/capi/spec_helper.rb +++ b/spec/ruby/optional/capi/spec_helper.rb @@ -113,12 +113,20 @@ def setup_make end opts = {} - if /(?:\A|\s)--jobserver-(?:auth|fds)=(\d+),(\d+)/ =~ make_flags + if /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ make_flags begin - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + if fifo = $3 + fifo.gsub!(/\\(?=.)/, '') + r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) + w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) + else + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) + end rescue Errno::EBADF else + r.close_on_exec = true + w.close_on_exec = true opts[r] = r opts[w] = w end From ee86b57ee5df34390ba1d20343593adf8331c010 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 7 Nov 2022 15:51:04 +0900 Subject: [PATCH 008/104] Revert jobserver handling in spec --- spec/default.mspec | 14 +++----------- spec/ruby/optional/capi/spec_helper.rb | 14 +++----------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/spec/default.mspec b/spec/default.mspec index 33ac691b9421dd..0dba98306c1a33 100644 --- a/spec/default.mspec +++ b/spec/default.mspec @@ -29,19 +29,11 @@ end module MSpecScript::JobServer def cores(max = 1) - if max > 1 and /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ ENV["MAKEFLAGS"] + if max > 1 and /(?:\A|\s)--jobserver-(?:auth|fds)=(\d+),(\d+)/ =~ ENV["MAKEFLAGS"] cores = 1 begin - if fifo = $3 - fifo.gsub!(/\\(?=.)/, '') - r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) - w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) - else - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) - end - r.close_on_exec = true - w.close_on_exec = true + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) jobtokens = r.read_nonblock(max - 1) cores = jobtokens.size if cores > 0 diff --git a/spec/ruby/optional/capi/spec_helper.rb b/spec/ruby/optional/capi/spec_helper.rb index 9c857519746b64..ec6b9093977c3d 100644 --- a/spec/ruby/optional/capi/spec_helper.rb +++ b/spec/ruby/optional/capi/spec_helper.rb @@ -113,20 +113,12 @@ def setup_make end opts = {} - if /(?:\A|\s)--jobserver-(?:auth|fds)=(?:(\d+),(\d+)|fifo:((?:\\.|\S)+))/ =~ make_flags + if /(?:\A|\s)--jobserver-(?:auth|fds)=(\d+),(\d+)/ =~ make_flags begin - if fifo = $3 - fifo.gsub!(/\\(?=.)/, '') - r = File.open(fifo, IO::RDONLY|IO::NONBLOCK|IO::BINARY) - w = File.open(fifo, IO::WRONLY|IO::NONBLOCK|IO::BINARY) - else - r = IO.for_fd($1.to_i(10), "rb", autoclose: false) - w = IO.for_fd($2.to_i(10), "wb", autoclose: false) - end + r = IO.for_fd($1.to_i(10), "rb", autoclose: false) + w = IO.for_fd($2.to_i(10), "wb", autoclose: false) rescue Errno::EBADF else - r.close_on_exec = true - w.close_on_exec = true opts[r] = r opts[w] = w end From 011d4c57d21220249600dfb76db84840550da019 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 7 Nov 2022 13:23:23 +0900 Subject: [PATCH 009/104] [Bug #19106] Normalize time at 24:00:00 with a timezone object --- test/ruby/test_time_tz.rb | 5 +++++ time.c | 24 ++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/test/ruby/test_time_tz.rb b/test/ruby/test_time_tz.rb index 6ae12dea5de622..6fdb95bafe7748 100644 --- a/test/ruby/test_time_tz.rb +++ b/test/ruby/test_time_tz.rb @@ -612,6 +612,11 @@ def subtest_new(time_class, tz, tzarg, tzname, abbr, utc_offset) assert_raise(ArgumentError) {time_class.new(2018, 9, 1, 12, 0, 0, tzarg, in: tzarg)} end + def subtest_hour24(time_class, tz, tzarg, tzname, abbr, utc_offset) + t = time_class.new(2000, 1, 1, 24, 0, 0, tzarg) + assert_equal([0, 0, 0, 2, 1, 2000], [t.sec, t.min, t.hour, t.mday, t.mon, t.year]) + end + def subtest_now(time_class, tz, tzarg, tzname, abbr, utc_offset) t = time_class.now(in: tzarg) assert_equal(tz, t.zone) diff --git a/time.c b/time.c index b16a0865378ce7..b3604dd48874af 100644 --- a/time.c +++ b/time.c @@ -2331,6 +2331,19 @@ find_timezone(VALUE time, VALUE zone) return rb_check_funcall_default(klass, id_find_timezone, 1, &zone, Qnil); } +/* Turn the special case 24:00:00 of already validated vtm into + * 00:00:00 the next day */ +static void +vtm_day_wraparound(struct vtm *vtm) +{ + if (vtm->hour < 24) return; + + /* Assuming UTC and no care of DST, just reset hour and advance + * date, not to discard the validated vtm. */ + vtm->hour = 0; + vtm_add_day(vtm, 1); +} + static VALUE time_init_args(rb_execution_context_t *ec, VALUE time, VALUE year, VALUE mon, VALUE mday, VALUE hour, VALUE min, VALUE sec, VALUE zone) { @@ -2386,6 +2399,7 @@ time_init_args(rb_execution_context_t *ec, VALUE time, VALUE year, VALUE mon, VA if (!NIL_P(zone)) { tobj->timew = timegmw(&vtm); + vtm_day_wraparound(&vtm); tobj->vtm = vtm; tobj->tm_got = 1; TZMODE_SET_LOCALTIME(tobj); @@ -2400,13 +2414,7 @@ time_init_args(rb_execution_context_t *ec, VALUE time, VALUE year, VALUE mon, VA if (utc == UTC_ZONE) { tobj->timew = timegmw(&vtm); - if (vtm.hour == 24) { /* special case: 24:00:00 only */ - /* Since no need to take care of DST in UTC, just reset - * hour and advance date, not to discard the validated - * vtm. */ - vtm.hour = 0; - vtm_add_day(&vtm, 1); - } + vtm_day_wraparound(&vtm); tobj->vtm = vtm; tobj->tm_got = 1; TZMODE_SET_UTC(tobj); @@ -4105,7 +4113,7 @@ time_inspect(VALUE time) GetTimeval(time, tobj); str = strftimev("%Y-%m-%d %H:%M:%S", time, rb_usascii_encoding()); subsec = w2v(wmod(tobj->timew, WINT2FIXWV(TIME_SCALE))); - if (FIXNUM_P(subsec) && FIX2LONG(subsec) == 0) { + if (subsec == INT2FIX(0)) { } else if (FIXNUM_P(subsec) && FIX2LONG(subsec) < TIME_SCALE) { long len; From 0fb7a1c77c8d080f1d5099e07ed705cfd652f029 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 3 Nov 2022 14:41:40 -0400 Subject: [PATCH 010/104] [rubygems/rubygems] Drop support for HP-UX Support for HP-UX was dropped in Ruby in ruby/ruby#5457. https://github.com/rubygems/rubygems/commit/a3a8df3582 --- lib/bundler/vendor/thor/lib/thor/shell/basic.rb | 2 +- lib/rubygems/platform.rb | 1 - test/rubygems/test_gem_platform.rb | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/bundler/vendor/thor/lib/thor/shell/basic.rb b/lib/bundler/vendor/thor/lib/thor/shell/basic.rb index 8eff00bf3db4e8..6ffb21672ffe5b 100644 --- a/lib/bundler/vendor/thor/lib/thor/shell/basic.rb +++ b/lib/bundler/vendor/thor/lib/thor/shell/basic.rb @@ -425,7 +425,7 @@ def dynamic_width_tput end def unix? - RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix|hpux)/i + RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix)/i end def truncate(string, width) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 6f4ead1af81639..93330c2865a4ac 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -97,7 +97,6 @@ def initialize(arch) when /darwin(\d+)?/ then [ "darwin", $1 ] when /^macruby$/ then [ "macruby", nil ] when /freebsd(\d+)?/ then [ "freebsd", $1 ] - when /hpux(\d+)?/ then [ "hpux", $1 ] when /^java$/, /^jruby$/ then [ "java", nil ] when /^java([\d.]*)/ then [ "java", $1 ] when /^dalvik(\d+)?$/ then [ "dalvik", $1 ] diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index ecb902ddbe8f08..6e8d598c3350c9 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -86,7 +86,6 @@ def test_self_new def test_initialize test_cases = { "amd64-freebsd6" => ["amd64", "freebsd", "6"], - "hppa2.0w-hpux11.31" => ["hppa2.0w", "hpux", "11"], "java" => [nil, "java", nil], "jruby" => [nil, "java", nil], "universal-dotnet" => ["universal", "dotnet", nil], From 1e53ebae57aa8bfb6724f346099e35757184cfd7 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 3 Nov 2022 14:43:03 -0400 Subject: [PATCH 011/104] [rubygems/rubygems] Drop support for bitrig The bitrig OS is no longer maintained with the last release being 7 years ago. https://github.com/rubygems/rubygems/commit/85ed90ddd0 --- lib/rubygems/platform.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 93330c2865a4ac..1e7a5c503abb31 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -111,7 +111,6 @@ def initialize(arch) [os, version] when /netbsdelf/ then [ "netbsdelf", nil ] when /openbsd(\d+\.\d+)?/ then [ "openbsd", $1 ] - when /bitrig(\d+\.\d+)?/ then [ "bitrig", $1 ] when /solaris(\d+\.\d+)?/ then [ "solaris", $1 ] # test when /^(\w+_platform)(\d+)?/ then [ $1, $2 ] From b228effd0cf9ec72f51ce3ca8ff597a00a653339 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 3 Nov 2022 14:44:46 -0400 Subject: [PATCH 012/104] [rubygems/rubygems] Drop support for IRIX The IRIX OS is no longer maintained with the last release being 16 years ago. https://github.com/rubygems/rubygems/commit/5381c6a871 --- lib/bundler/vendor/thor/lib/thor/shell/basic.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bundler/vendor/thor/lib/thor/shell/basic.rb b/lib/bundler/vendor/thor/lib/thor/shell/basic.rb index 6ffb21672ffe5b..ef97d52ae72065 100644 --- a/lib/bundler/vendor/thor/lib/thor/shell/basic.rb +++ b/lib/bundler/vendor/thor/lib/thor/shell/basic.rb @@ -425,7 +425,7 @@ def dynamic_width_tput end def unix? - RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix)/i + RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris)/i end def truncate(string, width) From 33bc398b731d699cbdd37110240dd54df15a8f9c Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 3 Nov 2022 14:54:14 -0400 Subject: [PATCH 013/104] [rubygems/rubygems] Fix rubocop violations https://github.com/rubygems/rubygems/commit/cc12e68637 --- test/rubygems/test_gem_platform.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index 6e8d598c3350c9..2fbf5c817736e1 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -85,8 +85,8 @@ def test_self_new def test_initialize test_cases = { - "amd64-freebsd6" => ["amd64", "freebsd", "6"], - "java" => [nil, "java", nil], + "amd64-freebsd6" => ["amd64", "freebsd", "6"], + "java" => [nil, "java", nil], "jruby" => [nil, "java", nil], "universal-dotnet" => ["universal", "dotnet", nil], "universal-dotnet2.0" => ["universal", "dotnet", "2.0"], From 72c7dba436a5ebb53dfb37f3e400e84b0c2f9f45 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 30 Aug 2022 16:02:02 +0900 Subject: [PATCH 014/104] [ruby/fileutils] Revert "FileUtils.rm* methods swallows only Errno::ENOENT when force is true" This reverts commit https://github.com/ruby/fileutils/commit/fa65d676ece9. This caused some incompatibility problems in real-world cases. https://bugs.ruby-lang.org/issues/18784#change-98927 https://bugs.ruby-lang.org/issues/18784#change-98967 https://github.com/ruby/fileutils/commit/42983c2553 --- lib/fileutils.rb | 17 ++++++----------- test/fileutils/test_fileutils.rb | 20 -------------------- 2 files changed, 6 insertions(+), 31 deletions(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 745170a121c1bf..a33f086a3ea529 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -1165,7 +1165,7 @@ def mv(src, dest, force: nil, noop: nil, verbose: nil, secure: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of Errno::ENOENT + # - force: true - ignores raised exceptions of StandardError # and its descendants. # - noop: true - does not remove files; returns +nil+. # - verbose: true - prints an equivalent command: @@ -1248,7 +1248,7 @@ def rm_f(list, noop: nil, verbose: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of Errno::ENOENT + # - force: true - ignores raised exceptions of StandardError # and its descendants. # - noop: true - does not remove entries; returns +nil+. # - secure: true - removes +src+ securely; @@ -1315,7 +1315,7 @@ def rm_rf(list, noop: nil, verbose: nil, secure: nil) # see {Avoiding the TOCTTOU Vulnerability}[rdoc-ref:FileUtils@Avoiding+the+TOCTTOU+Vulnerability]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of Errno::ENOENT and its descendants. + # raised exceptions of StandardError and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1384,12 +1384,10 @@ def remove_entry_secure(path, force = false) ent.remove rescue raise unless force - raise unless Errno::ENOENT === $! end end rescue raise unless force - raise unless Errno::ENOENT === $! end module_function :remove_entry_secure @@ -1415,7 +1413,7 @@ def fu_stat_identical_entry?(a, b) #:nodoc: # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of Errno::ENOENT and its descendants. + # raised exceptions of StandardError and its descendants. # # Related: FileUtils.remove_entry_secure. # @@ -1425,12 +1423,10 @@ def remove_entry(path, force = false) ent.remove rescue raise unless force - raise unless Errno::ENOENT === $! end end rescue raise unless force - raise unless Errno::ENOENT === $! end module_function :remove_entry @@ -1441,7 +1437,7 @@ def remove_entry(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of Errno::ENOENT and its descendants. + # raised exceptions of StandardError and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1449,7 +1445,6 @@ def remove_file(path, force = false) Entry_.new(path).remove_file rescue raise unless force - raise unless Errno::ENOENT === $! end module_function :remove_file @@ -1461,7 +1456,7 @@ def remove_file(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of Errno::ENOENT and its descendants. + # raised exceptions of StandardError and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # diff --git a/test/fileutils/test_fileutils.rb b/test/fileutils/test_fileutils.rb index bce7271a3bda46..05ba8d184ae307 100644 --- a/test/fileutils/test_fileutils.rb +++ b/test/fileutils/test_fileutils.rb @@ -1822,26 +1822,6 @@ def test_rm_rf assert_file_not_exist 'tmpdatadir' end - def test_rm_rf_no_permissions - check_singleton :rm_rf - - return if /mswin|mingw/ =~ RUBY_PLATFORM - - mkdir 'tmpdatadir' - touch 'tmpdatadir/tmpdata' - chmod "-x", 'tmpdatadir' - - begin - assert_raise Errno::EACCES do - rm_rf 'tmpdatadir' - end - - assert_file_exist 'tmpdatadir' - ensure - chmod "+x", 'tmpdatadir' - end - end - def test_rmdir check_singleton :rmdir From ca0b59267352dd23bc53b5cb6f09aacd0025f536 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Sun, 6 Nov 2022 14:55:24 +0000 Subject: [PATCH 015/104] [ruby/irb] Don't lazily retrieve gem specs for completion There are a few downsides of the current approach: 1. Because gem specs are lazily retrieved, this computation happens in every irb completion test case, which is not necessary. (In tests we don't cache the result of `retrieve_files_to_require_from_load_path`) 2. Gem::Specification.latest_specs is sensible to the content of LOAD_PATH. And when combined with 1, tests fail "randomly" if they try to mutate LOAD_PATH, even though the test subject it's something else. So by pre-computing and storing the gem paths in a constant, it guarantees that the computation only happens once and it doesn't get affected by test cases. One argument could be made against the change is that, it'll store unnecessary data for users that disable autocompletion. But the counter-arguments are: 1. Since autocompletion is enabled by default, this should not be the case for most users. 2. For users with autocompletion enabled, IRB already caches the result of `retrieve_files_to_require_from_load_path` in memory, which should have a similar size of GEM_SPECS. And we currently haven't received any report about problems caused by such memory consumption. https://github.com/ruby/irb/commit/c671d39020 --- lib/irb/completion.rb | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/lib/irb/completion.rb b/lib/irb/completion.rb index dbd652769eb34c..b41d5de35e3119 100644 --- a/lib/irb/completion.rb +++ b/lib/irb/completion.rb @@ -64,25 +64,27 @@ def self.absolute_path?(p) # TODO Remove this method after 2.6 EOL. if File.respond_to?(:absolute_path?) File.absolute_path?(p) else - if File.absolute_path(p) == p - true - else - false - end + File.absolute_path(p) == p end end + GEM_PATHS = + if defined?(Gem::Specification) + Gem::Specification.latest_specs(true).map { |s| + s.require_paths.map { |p| + if absolute_path?(p) + p + else + File.join(s.full_gem_path, p) + end + } + }.flatten + else + [] + end.freeze + def self.retrieve_gem_and_system_load_path - gem_paths = Gem::Specification.latest_specs(true).map { |s| - s.require_paths.map { |p| - if absolute_path?(p) - p - else - File.join(s.full_gem_path, p) - end - } - }.flatten if defined?(Gem::Specification) - candidates = (gem_paths.to_a | $LOAD_PATH) + candidates = (GEM_PATHS | $LOAD_PATH) candidates.map do |p| if p.respond_to?(:to_path) p.to_path From 7442cb461b32de2eec3b37f52d80752d30627de0 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 7 Nov 2022 07:48:26 -0800 Subject: [PATCH 016/104] YJIT: Free pages after ObjectSpace API usages (#6676) --- yjit/src/asm/mod.rs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 3d7de7cd79bed8..497f7687edb4cb 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -563,23 +563,14 @@ impl CodeBlock { pages_in_use[*page] = true; } }); - // Outlined code generated by CodegenGlobals::init() should also be kept. - for page in CodegenGlobals::get_ocb_pages() { - pages_in_use[*page] = true; - } - - // Let VirtuamMem free the pages - let mut freed_pages: Vec = pages_in_use.iter().enumerate() - .filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect(); - self.free_pages(&freed_pages); // Avoid accumulating freed pages for future code GC for_each_off_stack_iseq_payload(|iseq_payload: &mut IseqPayload| { iseq_payload.pages.clear(); }); - - // Append virtual pages in case RubyVM::YJIT.code_gc is manually triggered. - let mut virtual_pages: Vec = (self.num_mapped_pages()..self.num_virtual_pages()).collect(); - freed_pages.append(&mut virtual_pages); + // Outlined code generated by CodegenGlobals::init() should also be kept. + for page in CodegenGlobals::get_ocb_pages() { + pages_in_use[*page] = true; + } // Invalidate everything to have more compact code after code GC. // This currently patches every ISEQ, which works, but in the future, @@ -591,6 +582,17 @@ impl CodeBlock { // can be safely reset to pass the frozen bytes check on invalidation. CodegenGlobals::set_inline_frozen_bytes(0); + // Let VirtuamMem free the pages + let mut freed_pages: Vec = pages_in_use.iter().enumerate() + .filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect(); + // ObjectSpace API may trigger Ruby's GC, which marks gc_offsets in JIT code. + // So this should be called after for_each_*_iseq_payload and rb_yjit_tracing_invalidate_all. + self.free_pages(&freed_pages); + + // Append virtual pages in case RubyVM::YJIT.code_gc is manually triggered. + let mut virtual_pages: Vec = (self.num_mapped_pages()..self.num_virtual_pages()).collect(); + freed_pages.append(&mut virtual_pages); + if let Some(&first_page) = freed_pages.first() { let mut cb = CodegenGlobals::get_inline_cb(); cb.write_pos = cb.get_page_pos(first_page); From 9001e53e68d282493f513ed67824e4014fd01d57 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 7 Nov 2022 09:29:24 -0800 Subject: [PATCH 017/104] [ruby/irb] Support non-string input in show_source (https://github.com/ruby/irb/pull/430) * Support non-string input in show_source * Test show_source as a method --- lib/irb/cmd/show_source.rb | 18 ++++++++++++++++++ lib/irb/context.rb | 9 ++++++++- lib/irb/extend-command.rb | 14 ++++++++++++++ test/irb/test_cmd.rb | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/lib/irb/cmd/show_source.rb b/lib/irb/cmd/show_source.rb index f8a17822dfe4d7..1fcff3e8979a1a 100644 --- a/lib/irb/cmd/show_source.rb +++ b/lib/irb/cmd/show_source.rb @@ -9,6 +9,24 @@ module IRB module ExtendCommand class ShowSource < Nop + class << self + def transform_args(args) + # Return a string literal as is for backward compatibility + if args.empty? || string_literal?(args) + args + else # Otherwise, consider the input as a String for convenience + args.strip.dump + end + end + + private + + def string_literal?(args) + sexp = Ripper.sexp(args) + sexp && sexp.size == 2 && sexp.last&.first&.first == :string_literal + end + end + def execute(str = nil) unless str.is_a?(String) puts "Error: Expected a string but got #{str.inspect}" diff --git a/lib/irb/context.rb b/lib/irb/context.rb index d1ae2cb605b936..72c74f081dca0e 100644 --- a/lib/irb/context.rb +++ b/lib/irb/context.rb @@ -484,9 +484,16 @@ def evaluate(line, line_no, exception: nil) # :nodoc: end # Transform a non-identifier alias (ex: @, $) - command = line.split(/\s/, 2).first + command, args = line.split(/\s/, 2) if original = symbol_alias(command) line = line.gsub(/\A#{Regexp.escape(command)}/, original.to_s) + command = original + end + + # Hook command-specific transformation + command_class = ExtendCommandBundle.load_command(command) + if command_class&.respond_to?(:transform_args) + line = "#{command} #{command_class.transform_args(args)}" end set_last_value(@workspace.evaluate(self, line, irb_path, line_no)) diff --git a/lib/irb/extend-command.rb b/lib/irb/extend-command.rb index 08a258fc53212c..acc23c9920f1ca 100644 --- a/lib/irb/extend-command.rb +++ b/lib/irb/extend-command.rb @@ -147,6 +147,20 @@ def irb_context ] + # Convert a command name to its implementation class if such command exists + def self.load_command(command) + command = command.to_sym + @EXTEND_COMMANDS.each do |cmd_name, cmd_class, load_file, *aliases| + next if cmd_name != command && aliases.all? { |alias_name, _| alias_name != command } + + if !defined?(ExtendCommand) || !ExtendCommand.const_defined?(cmd_class, false) + require_relative load_file + end + return ExtendCommand.const_get(cmd_class, false) + end + nil + end + # Installs the default irb commands: # # +irb_current_working_workspace+:: Context#main diff --git a/test/irb/test_cmd.rb b/test/irb/test_cmd.rb index 2728aa656a8f5f..531ea519f31194 100644 --- a/test/irb/test_cmd.rb +++ b/test/irb/test_cmd.rb @@ -547,6 +547,40 @@ def test_ls_with_no_singleton_class end def test_show_source + input = TestInputMethod.new([ + "show_source IRB.conf\n", + ]) + IRB.init_config(nil) + workspace = IRB::WorkSpace.new(self) + IRB.conf[:VERBOSE] = false + irb = IRB::Irb.new(workspace, input) + IRB.conf[:MAIN_CONTEXT] = irb.context + irb.context.return_format = "=> %s\n" + out, err = capture_output do + irb.eval_input + end + assert_empty err + assert_match(%r[/irb\.rb], out) + end + + def test_show_source_method + input = TestInputMethod.new([ + "p show_source('IRB.conf')\n", + ]) + IRB.init_config(nil) + workspace = IRB::WorkSpace.new(self) + IRB.conf[:VERBOSE] = false + irb = IRB::Irb.new(workspace, input) + IRB.conf[:MAIN_CONTEXT] = irb.context + irb.context.return_format = "=> %s\n" + out, err = capture_output do + irb.eval_input + end + assert_empty err + assert_match(%r[/irb\.rb], out) + end + + def test_show_source_string input = TestInputMethod.new([ "show_source 'IRB.conf'\n", ]) From c99e4c427897e82a3419abed894d28705f70fa13 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 7 Nov 2022 20:05:18 +0100 Subject: [PATCH 018/104] Update to ruby/mspec@1e16420 --- spec/mspec/tool/tag_from_output.rb | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/spec/mspec/tool/tag_from_output.rb b/spec/mspec/tool/tag_from_output.rb index ebe13434c2d738..23a5dc0fb333d2 100755 --- a/spec/mspec/tool/tag_from_output.rb +++ b/spec/mspec/tool/tag_from_output.rb @@ -11,6 +11,11 @@ output = ARGF.readlines +# Automatically strip datetime of GitHub Actions +if output.first =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z / + output = output.map { |line| line.split(' ', 2).last } +end + NUMBER = /^\d+\)$/ ERROR_OR_FAILED = / (ERROR|FAILED)$/ SPEC_FILE = /^(\/.+_spec\.rb)\:\d+/ @@ -22,11 +27,24 @@ description = error_line.match(ERROR_OR_FAILED).pre_match spec_file = rest.find { |line| line =~ SPEC_FILE } - unless spec_file - warn "Could not find file for:\n#{error_line}" - next + if spec_file + spec_file = spec_file[SPEC_FILE, 1] or raise + else + if error_line =~ /^(\w+)#(\w+) / + module_method = error_line.split(' ', 2).first + file = "#{$1.downcase}/#{$2}_spec.rb" + spec_file = ['spec/ruby/core', 'spec/ruby/library', *Dir.glob('spec/ruby/library/*')].find { |dir| + path = "#{dir}/#{file}" + break path if File.exist?(path) + } + end + + unless spec_file + warn "Could not find file for:\n#{error_line}" + next + end end - spec_file = spec_file[SPEC_FILE, 1] + prefix = spec_file.index('spec/ruby/') || spec_file.index('spec/truffle/') spec_file = spec_file[prefix..-1] From 83decbb62b8b3f1638927033f12b55f9b11f78c6 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 7 Nov 2022 20:05:30 +0100 Subject: [PATCH 019/104] Update to ruby/spec@740ccc8 --- spec/ruby/.rubocop.yml | 4 + spec/ruby/README.md | 1 - spec/ruby/core/array/pack/m_spec.rb | 10 +- spec/ruby/core/enumerable/each_cons_spec.rb | 6 + spec/ruby/core/enumerable/each_slice_spec.rb | 6 + .../ruby/core/enumerator/lazy/compact_spec.rb | 11 ++ spec/ruby/core/enumerator/lazy/lazy_spec.rb | 10 -- spec/ruby/core/false/case_compare_spec.rb | 14 ++ spec/ruby/core/fiber/blocking_spec.rb | 4 +- spec/ruby/core/float/shared/to_i.rb | 4 + spec/ruby/core/hash/hash_spec.rb | 9 ++ spec/ruby/core/io/read_spec.rb | 7 - spec/ruby/core/io/readpartial_spec.rb | 10 +- spec/ruby/core/io/set_encoding_spec.rb | 32 ++++- spec/ruby/core/kernel/Complex_spec.rb | 89 +++++++++++- spec/ruby/core/kernel/fixtures/Complex.rb | 5 + spec/ruby/core/kernel/shared/load.rb | 16 +++ .../core/matchdata/element_reference_spec.rb | 15 ++ spec/ruby/core/method/fixtures/classes.rb | 6 + spec/ruby/core/method/private_spec.rb | 21 +++ spec/ruby/core/method/protected_spec.rb | 21 +++ spec/ruby/core/method/public_spec.rb | 21 +++ spec/ruby/core/method/super_method_spec.rb | 10 +- spec/ruby/core/process/_fork_spec.rb | 24 ++++ spec/ruby/core/process/spawn_spec.rb | 18 +++ spec/ruby/core/regexp/timeout_spec.rb | 35 +++++ spec/ruby/core/string/element_set_spec.rb | 4 +- spec/ruby/core/string/fixtures/to_c.rb | 5 + spec/ruby/core/string/gsub_spec.rb | 35 ++++- spec/ruby/core/string/index_spec.rb | 8 ++ spec/ruby/core/string/partition_spec.rb | 22 +++ spec/ruby/core/string/rindex_spec.rb | 8 ++ spec/ruby/core/string/rpartition_spec.rb | 22 +++ spec/ruby/core/string/sub_spec.rb | 32 +++++ spec/ruby/core/string/to_c_spec.rb | 105 ++++---------- spec/ruby/core/string/unpack/b_spec.rb | 6 +- spec/ruby/core/string/unpack/m_spec.rb | 5 + spec/ruby/core/struct/initialize_spec.rb | 8 ++ spec/ruby/core/struct/keyword_init_spec.rb | 21 +++ spec/ruby/core/thread/backtrace/limit_spec.rb | 15 ++ .../ruby/core/thread/native_thread_id_spec.rb | 15 ++ spec/ruby/core/time/at_spec.rb | 5 + spec/ruby/core/time/new_spec.rb | 51 +++++++ spec/ruby/core/time/now_spec.rb | 45 ++++++ spec/ruby/core/time/shared/local.rb | 11 +- spec/ruby/core/time/strftime_spec.rb | 9 ++ .../core/tracepoint/allow_reentry_spec.rb | 32 +++++ .../core/unboundmethod/fixtures/classes.rb | 6 + spec/ruby/core/unboundmethod/private_spec.rb | 21 +++ .../ruby/core/unboundmethod/protected_spec.rb | 21 +++ spec/ruby/core/unboundmethod/public_spec.rb | 21 +++ .../core/unboundmethod/super_method_spec.rb | 10 +- spec/ruby/language/block_spec.rb | 43 ++++++ spec/ruby/language/keyword_arguments_spec.rb | 15 ++ spec/ruby/language/method_spec.rb | 29 ++++ spec/ruby/library/cmath/math/acos_spec.rb | 1 - spec/ruby/library/cmath/math/acosh_spec.rb | 1 - spec/ruby/library/cmath/math/asin_spec.rb | 1 - spec/ruby/library/cmath/math/asinh_spec.rb | 1 - spec/ruby/library/cmath/math/atan2_spec.rb | 1 - spec/ruby/library/cmath/math/atan_spec.rb | 1 - spec/ruby/library/cmath/math/atanh_spec.rb | 1 - spec/ruby/library/cmath/math/cos_spec.rb | 1 - spec/ruby/library/cmath/math/cosh_spec.rb | 1 - spec/ruby/library/cmath/math/exp_spec.rb | 1 - .../library/cmath/math/fixtures/classes.rb | 4 - spec/ruby/library/cmath/math/log10_spec.rb | 1 - spec/ruby/library/cmath/math/log_spec.rb | 1 - spec/ruby/library/cmath/math/shared/acos.rb | 41 ------ spec/ruby/library/cmath/math/shared/acosh.rb | 37 ----- spec/ruby/library/cmath/math/shared/asin.rb | 47 ------- spec/ruby/library/cmath/math/shared/asinh.rb | 32 ----- spec/ruby/library/cmath/math/shared/atan.rb | 32 ----- spec/ruby/library/cmath/math/shared/atan2.rb | 34 ----- spec/ruby/library/cmath/math/shared/atanh.rb | 30 ---- spec/ruby/library/cmath/math/shared/cos.rb | 30 ---- spec/ruby/library/cmath/math/shared/cosh.rb | 28 ---- spec/ruby/library/cmath/math/shared/exp.rb | 28 ---- spec/ruby/library/cmath/math/shared/log.rb | 39 ----- spec/ruby/library/cmath/math/shared/log10.rb | 41 ------ spec/ruby/library/cmath/math/shared/sin.rb | 30 ---- spec/ruby/library/cmath/math/shared/sinh.rb | 28 ---- spec/ruby/library/cmath/math/shared/sqrt.rb | 34 ----- spec/ruby/library/cmath/math/shared/tan.rb | 28 ---- spec/ruby/library/cmath/math/shared/tanh.rb | 32 ----- spec/ruby/library/cmath/math/sin_spec.rb | 1 - spec/ruby/library/cmath/math/sinh_spec.rb | 1 - spec/ruby/library/cmath/math/sqrt_spec.rb | 1 - spec/ruby/library/cmath/math/tan_spec.rb | 1 - spec/ruby/library/cmath/math/tanh_spec.rb | 1 - spec/ruby/library/erb/new_spec.rb | 16 +++ .../ruby/library/scanf/io/block_scanf_spec.rb | 1 - spec/ruby/library/scanf/io/fixtures/date.txt | 4 - .../library/scanf/io/fixtures/helloworld.txt | 1 - spec/ruby/library/scanf/io/scanf_spec.rb | 1 - .../library/scanf/io/shared/block_scanf.rb | 28 ---- .../library/scanf/string/block_scanf_spec.rb | 1 - spec/ruby/library/scanf/string/scanf_spec.rb | 1 - .../scanf/string/shared/block_scanf.rb | 25 ---- spec/ruby/library/stringio/putc_spec.rb | 15 ++ spec/ruby/library/stringio/puts_spec.rb | 14 ++ spec/ruby/library/stringio/shared/write.rb | 15 ++ spec/ruby/optional/capi/class_spec.rb | 27 ++++ spec/ruby/optional/capi/ext/encoding_spec.c | 13 +- spec/ruby/optional/capi/ext/gc_spec.c | 31 ++++ spec/ruby/optional/capi/ext/globals_spec.c | 34 +++++ spec/ruby/optional/capi/ext/rubyspec.h | 30 ---- spec/ruby/optional/capi/gc_spec.rb | 30 +++- spec/ruby/optional/capi/globals_spec.rb | 54 ++++++- spec/ruby/shared/kernel/complex.rb | 133 ++++++++++++++++++ 110 files changed, 1274 insertions(+), 831 deletions(-) create mode 100644 spec/ruby/core/enumerator/lazy/compact_spec.rb create mode 100644 spec/ruby/core/false/case_compare_spec.rb create mode 100644 spec/ruby/core/kernel/fixtures/Complex.rb create mode 100644 spec/ruby/core/method/private_spec.rb create mode 100644 spec/ruby/core/method/protected_spec.rb create mode 100644 spec/ruby/core/method/public_spec.rb create mode 100644 spec/ruby/core/process/_fork_spec.rb create mode 100644 spec/ruby/core/regexp/timeout_spec.rb create mode 100644 spec/ruby/core/string/fixtures/to_c.rb create mode 100644 spec/ruby/core/struct/keyword_init_spec.rb create mode 100644 spec/ruby/core/thread/backtrace/limit_spec.rb create mode 100644 spec/ruby/core/thread/native_thread_id_spec.rb create mode 100644 spec/ruby/core/tracepoint/allow_reentry_spec.rb create mode 100644 spec/ruby/core/unboundmethod/private_spec.rb create mode 100644 spec/ruby/core/unboundmethod/protected_spec.rb create mode 100644 spec/ruby/core/unboundmethod/public_spec.rb delete mode 100644 spec/ruby/library/cmath/math/acos_spec.rb delete mode 100644 spec/ruby/library/cmath/math/acosh_spec.rb delete mode 100644 spec/ruby/library/cmath/math/asin_spec.rb delete mode 100644 spec/ruby/library/cmath/math/asinh_spec.rb delete mode 100644 spec/ruby/library/cmath/math/atan2_spec.rb delete mode 100644 spec/ruby/library/cmath/math/atan_spec.rb delete mode 100644 spec/ruby/library/cmath/math/atanh_spec.rb delete mode 100644 spec/ruby/library/cmath/math/cos_spec.rb delete mode 100644 spec/ruby/library/cmath/math/cosh_spec.rb delete mode 100644 spec/ruby/library/cmath/math/exp_spec.rb delete mode 100644 spec/ruby/library/cmath/math/fixtures/classes.rb delete mode 100644 spec/ruby/library/cmath/math/log10_spec.rb delete mode 100644 spec/ruby/library/cmath/math/log_spec.rb delete mode 100644 spec/ruby/library/cmath/math/shared/acos.rb delete mode 100644 spec/ruby/library/cmath/math/shared/acosh.rb delete mode 100644 spec/ruby/library/cmath/math/shared/asin.rb delete mode 100644 spec/ruby/library/cmath/math/shared/asinh.rb delete mode 100644 spec/ruby/library/cmath/math/shared/atan.rb delete mode 100644 spec/ruby/library/cmath/math/shared/atan2.rb delete mode 100644 spec/ruby/library/cmath/math/shared/atanh.rb delete mode 100644 spec/ruby/library/cmath/math/shared/cos.rb delete mode 100644 spec/ruby/library/cmath/math/shared/cosh.rb delete mode 100644 spec/ruby/library/cmath/math/shared/exp.rb delete mode 100644 spec/ruby/library/cmath/math/shared/log.rb delete mode 100644 spec/ruby/library/cmath/math/shared/log10.rb delete mode 100644 spec/ruby/library/cmath/math/shared/sin.rb delete mode 100644 spec/ruby/library/cmath/math/shared/sinh.rb delete mode 100644 spec/ruby/library/cmath/math/shared/sqrt.rb delete mode 100644 spec/ruby/library/cmath/math/shared/tan.rb delete mode 100644 spec/ruby/library/cmath/math/shared/tanh.rb delete mode 100644 spec/ruby/library/cmath/math/sin_spec.rb delete mode 100644 spec/ruby/library/cmath/math/sinh_spec.rb delete mode 100644 spec/ruby/library/cmath/math/sqrt_spec.rb delete mode 100644 spec/ruby/library/cmath/math/tan_spec.rb delete mode 100644 spec/ruby/library/cmath/math/tanh_spec.rb delete mode 100644 spec/ruby/library/scanf/io/block_scanf_spec.rb delete mode 100644 spec/ruby/library/scanf/io/fixtures/date.txt delete mode 100644 spec/ruby/library/scanf/io/fixtures/helloworld.txt delete mode 100644 spec/ruby/library/scanf/io/scanf_spec.rb delete mode 100644 spec/ruby/library/scanf/io/shared/block_scanf.rb delete mode 100644 spec/ruby/library/scanf/string/block_scanf_spec.rb delete mode 100644 spec/ruby/library/scanf/string/scanf_spec.rb delete mode 100644 spec/ruby/library/scanf/string/shared/block_scanf.rb create mode 100644 spec/ruby/shared/kernel/complex.rb diff --git a/spec/ruby/.rubocop.yml b/spec/ruby/.rubocop.yml index 3a16fc43f8ee07..82733c4b4d9205 100644 --- a/spec/ruby/.rubocop.yml +++ b/spec/ruby/.rubocop.yml @@ -115,6 +115,10 @@ Lint/EmptyWhen: - language/case_spec.rb - optional/capi/spec_helper.rb +Lint/ErbNewArguments: + Exclude: + - 'library/erb/new_spec.rb' + Lint/FormatParameterMismatch: Exclude: - 'core/kernel/shared/sprintf.rb' diff --git a/spec/ruby/README.md b/spec/ruby/README.md index 24b4719fdda515..018bf0ca3e12f4 100644 --- a/spec/ruby/README.md +++ b/spec/ruby/README.md @@ -1,7 +1,6 @@ # The Ruby Spec Suite [![Actions Build Status](https://github.com/ruby/spec/workflows/CI/badge.svg)](https://github.com/ruby/spec/actions) -[![Gitter](https://badges.gitter.im/ruby/spec.svg)](https://gitter.im/ruby/spec) The Ruby Spec Suite, abbreviated `ruby/spec`, is a test suite for the behavior of the Ruby programming language. diff --git a/spec/ruby/core/array/pack/m_spec.rb b/spec/ruby/core/array/pack/m_spec.rb index 2b1a84abcab802..c6364af12da7f2 100644 --- a/spec/ruby/core/array/pack/m_spec.rb +++ b/spec/ruby/core/array/pack/m_spec.rb @@ -80,8 +80,16 @@ ].should be_computed_by(:pack, "M") end - it "encodes a tab followed by a newline with an encoded newline" do + it "encodes a tab at the end of a line with an encoded newline" do + ["\t"].pack("M").should == "\t=\n" ["\t\n"].pack("M").should == "\t=\n\n" + ["abc\t\nxyz"].pack("M").should == "abc\t=\n\nxyz=\n" + end + + it "encodes a space at the end of a line with an encoded newline" do + [" "].pack("M").should == " =\n" + [" \n"].pack("M").should == " =\n\n" + ["abc \nxyz"].pack("M").should == "abc =\n\nxyz=\n" end it "encodes 127..255 in hex format" do diff --git a/spec/ruby/core/enumerable/each_cons_spec.rb b/spec/ruby/core/enumerable/each_cons_spec.rb index ba658203a26598..8fb31fb9257738 100644 --- a/spec/ruby/core/enumerable/each_cons_spec.rb +++ b/spec/ruby/core/enumerable/each_cons_spec.rb @@ -56,6 +56,12 @@ multi.each_cons(2).to_a.should == [[[1, 2], [3, 4, 5]], [[3, 4, 5], [6, 7, 8, 9]]] end + ruby_version_is "3.1" do + it "returns self when a block is given" do + @enum.each_cons(3){}.should == @enum + end + end + describe "when no block is given" do it "returns an enumerator" do e = @enum.each_cons(3) diff --git a/spec/ruby/core/enumerable/each_slice_spec.rb b/spec/ruby/core/enumerable/each_slice_spec.rb index 2ea89f5e72e43f..a57a1dba81fbad 100644 --- a/spec/ruby/core/enumerable/each_slice_spec.rb +++ b/spec/ruby/core/enumerable/each_slice_spec.rb @@ -57,6 +57,12 @@ e.to_a.should == @sliced end + ruby_version_is "3.1" do + it "returns self when a block is given" do + @enum.each_slice(3){}.should == @enum + end + end + it "gathers whole arrays as elements when each yields multiple" do multi = EnumerableSpecs::YieldsMulti.new multi.each_slice(2).to_a.should == [[[1, 2], [3, 4, 5]], [[6, 7, 8, 9]]] diff --git a/spec/ruby/core/enumerator/lazy/compact_spec.rb b/spec/ruby/core/enumerator/lazy/compact_spec.rb new file mode 100644 index 00000000000000..80b6f9481d4fc5 --- /dev/null +++ b/spec/ruby/core/enumerator/lazy/compact_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../../spec_helper' + +ruby_version_is '3.1' do + describe "Enumerator::Lazy#compact" do + it 'returns array without nil elements' do + arr = [1, nil, 3, false, 5].to_enum.lazy.compact + arr.should be_an_instance_of(Enumerator::Lazy) + arr.force.should == [1, 3, false, 5] + end + end +end diff --git a/spec/ruby/core/enumerator/lazy/lazy_spec.rb b/spec/ruby/core/enumerator/lazy/lazy_spec.rb index 683dfb81d76d7a..0fb104e25ab7cf 100644 --- a/spec/ruby/core/enumerator/lazy/lazy_spec.rb +++ b/spec/ruby/core/enumerator/lazy/lazy_spec.rb @@ -30,13 +30,3 @@ lazy.lazy.should equal(lazy) end end - -ruby_version_is '3.1' do - describe "Enumerator::Lazy#compact" do - it 'returns array without nil elements' do - arr = [1, nil, 3, false, 5].to_enum.lazy.compact - arr.should be_an_instance_of(Enumerator::Lazy) - arr.force.should == [1, 3, false, 5] - end - end -end diff --git a/spec/ruby/core/false/case_compare_spec.rb b/spec/ruby/core/false/case_compare_spec.rb new file mode 100644 index 00000000000000..0bd0ab44aec906 --- /dev/null +++ b/spec/ruby/core/false/case_compare_spec.rb @@ -0,0 +1,14 @@ +require_relative '../../spec_helper' + +describe "FalseClass#===" do + it "returns true for false" do + (false === false).should == true + end + + it "returns false for non-false object" do + (false === 0).should == false + (false === "").should == false + (false === Object).should == false + (false === nil).should == false + end +end diff --git a/spec/ruby/core/fiber/blocking_spec.rb b/spec/ruby/core/fiber/blocking_spec.rb index 852861d12f1ec0..eeee5a71c13abb 100644 --- a/spec/ruby/core/fiber/blocking_spec.rb +++ b/spec/ruby/core/fiber/blocking_spec.rb @@ -66,8 +66,8 @@ context "when fiber is non-blocking" do it "can become blocking" do fiber = Fiber.new(blocking: false) do - Fiber.blocking do |fiber| - fiber.blocking? ? :blocking : :non_blocking + Fiber.blocking do |f| + f.blocking? ? :blocking : :non_blocking end end diff --git a/spec/ruby/core/float/shared/to_i.rb b/spec/ruby/core/float/shared/to_i.rb index 960295f09585e7..33b32ca5332d8d 100644 --- a/spec/ruby/core/float/shared/to_i.rb +++ b/spec/ruby/core/float/shared/to_i.rb @@ -7,4 +7,8 @@ -9223372036854775808.1.send(@method).should eql(-9223372036854775808) 9223372036854775808.1.send(@method).should eql(9223372036854775808) end + + it "raises a FloatDomainError for NaN" do + -> { nan_value.send(@method) }.should raise_error(FloatDomainError) + end end diff --git a/spec/ruby/core/hash/hash_spec.rb b/spec/ruby/core/hash/hash_spec.rb index 3649d4d8de0f62..2ccb4831208d2c 100644 --- a/spec/ruby/core/hash/hash_spec.rb +++ b/spec/ruby/core/hash/hash_spec.rb @@ -41,4 +41,13 @@ h.hash.should == {x: [h]}.hash # Like above, because h.eql?(x: [h]) end + + ruby_version_is "3.1" do + it "allows ommiting values" do + a = 1 + b = 2 + + eval('{a:, b:}.should == { a: 1, b: 2 }') + end + end end diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index d34f7bd0eb5022..529afbf0ffc385 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -402,13 +402,6 @@ xE2 = [226].pack('C*') result.should == ("abc" + xE2 + "def").force_encoding(Encoding::BINARY) end - - it "does not transcode file contents when an internal encoding is specified" do - result = File.open(@name, "r:binary:utf-8") { |f| f.read }.chomp - result.encoding.should == Encoding::BINARY - xE2 = [226].pack('C*') - result.should == ("abc" + xE2 + "def").force_encoding(Encoding::BINARY) - end end describe "IO#read in text mode" do diff --git a/spec/ruby/core/io/readpartial_spec.rb b/spec/ruby/core/io/readpartial_spec.rb index 324ae0b6e6e5ee..2901b429c25181 100644 --- a/spec/ruby/core/io/readpartial_spec.rb +++ b/spec/ruby/core/io/readpartial_spec.rb @@ -93,10 +93,12 @@ @rd.readpartial(0).should == "" end - it "clears and returns the given buffer if the length argument is 0" do - buffer = "existing content" - @rd.readpartial(0, buffer).should == buffer - buffer.should == "" + ruby_bug "#18421", ""..."3.0.4" do + it "clears and returns the given buffer if the length argument is 0" do + buffer = "existing content" + @rd.readpartial(0, buffer).should == buffer + buffer.should == "" + end end it "preserves the encoding of the given buffer" do diff --git a/spec/ruby/core/io/set_encoding_spec.rb b/spec/ruby/core/io/set_encoding_spec.rb index bc448acfceef94..22d9017635709f 100644 --- a/spec/ruby/core/io/set_encoding_spec.rb +++ b/spec/ruby/core/io/set_encoding_spec.rb @@ -1,7 +1,7 @@ require_relative '../../spec_helper' describe :io_set_encoding_write, shared: true do - it "sets the encodings to nil" do + it "sets the encodings to nil when they were set previously" do @io = new_io @name, "#{@object}:ibm437:ibm866" @io.set_encoding nil, nil @@ -9,6 +9,19 @@ @io.internal_encoding.should be_nil end + it "sets the encodings to nil when the IO is built with no explicit encoding" do + @io = new_io @name, @object + + # Checking our assumptions first + @io.external_encoding.should be_nil + @io.internal_encoding.should be_nil + + @io.set_encoding nil, nil + + @io.external_encoding.should be_nil + @io.internal_encoding.should be_nil + end + it "prevents the encodings from changing when Encoding defaults are changed" do @io = new_io @name, "#{@object}:utf-8:us-ascii" @io.set_encoding nil, nil @@ -38,6 +51,7 @@ @external = Encoding.default_external @internal = Encoding.default_internal + # The defaults Encoding.default_external = Encoding::UTF_8 Encoding.default_internal = nil @@ -113,6 +127,22 @@ describe "with 'a+' mode" do it_behaves_like :io_set_encoding_write, nil, "a+" end + + describe "with standard IOs" do + it "correctly resets them" do + STDOUT.external_encoding.should == nil + STDOUT.internal_encoding.should == nil + + begin + STDOUT.set_encoding(Encoding::US_ASCII, Encoding::ISO_8859_1) + ensure + STDOUT.set_encoding(nil, nil) + end + + STDOUT.external_encoding.should == nil + STDOUT.internal_encoding.should == nil + end + end end describe "IO#set_encoding" do diff --git a/spec/ruby/core/kernel/Complex_spec.rb b/spec/ruby/core/kernel/Complex_spec.rb index 4f043526b8fe85..cc8177fa02b28e 100644 --- a/spec/ruby/core/kernel/Complex_spec.rb +++ b/spec/ruby/core/kernel/Complex_spec.rb @@ -1,4 +1,6 @@ require_relative '../../spec_helper' +require_relative '../../shared/kernel/complex' +require_relative 'fixtures/Complex' describe "Kernel.Complex()" do describe "when passed [Complex, Complex]" do @@ -58,7 +60,92 @@ end end - describe "when passed a String" do + describe "when passed [String]" do + it_behaves_like :kernel_complex, :Complex_method, KernelSpecs + + context "invalid argument" do + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + Complex("79+4i".encode("UTF-16")) + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") + end + + it "raises ArgumentError for unrecognised Strings" do + -> { + Complex("ruby") + }.should raise_error(ArgumentError, 'invalid value for convert(): "ruby"') + end + + it "raises ArgumentError for trailing garbage" do + -> { + Complex("79+4iruby") + }.should raise_error(ArgumentError, 'invalid value for convert(): "79+4iruby"') + end + + it "does not understand Float::INFINITY" do + -> { + Complex("Infinity") + }.should raise_error(ArgumentError, 'invalid value for convert(): "Infinity"') + + -> { + Complex("-Infinity") + }.should raise_error(ArgumentError, 'invalid value for convert(): "-Infinity"') + end + + it "does not understand Float::NAN" do + -> { + Complex("NaN") + }.should raise_error(ArgumentError, 'invalid value for convert(): "NaN"') + end + + it "does not understand a sequence of _" do + -> { + Complex("7__9+4__0i") + }.should raise_error(ArgumentError, 'invalid value for convert(): "7__9+4__0i"') + end + + it "does not allow null-byte" do + -> { + Complex("1-2i\0") + }.should raise_error(ArgumentError, "string contains null byte") + end + end + + context "invalid argument and exception: false passed" do + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + Complex("79+4i".encode("UTF-16"), exception: false) + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") + end + + it "returns nil for unrecognised Strings" do + Complex("ruby", exception: false).should == nil + end + + it "returns nil when trailing garbage" do + Complex("79+4iruby", exception: false).should == nil + end + + it "returns nil for Float::INFINITY" do + Complex("Infinity", exception: false).should == nil + Complex("-Infinity", exception: false).should == nil + end + + it "returns nil for Float::NAN" do + Complex("NaN", exception: false).should == nil + end + + it "returns nil when there is a sequence of _" do + Complex("7__9+4__0i", exception: false).should == nil + end + + it "returns nil when String contains null-byte" do + Complex("1-2i\0", exception: false).should == nil + end + end + end + + describe "when passes [String, String]" do it "needs to be reviewed for spec completeness" end diff --git a/spec/ruby/core/kernel/fixtures/Complex.rb b/spec/ruby/core/kernel/fixtures/Complex.rb new file mode 100644 index 00000000000000..bf14d55ad57a5c --- /dev/null +++ b/spec/ruby/core/kernel/fixtures/Complex.rb @@ -0,0 +1,5 @@ +module KernelSpecs + def self.Complex_method(string) + Complex(string) + end +end diff --git a/spec/ruby/core/kernel/shared/load.rb b/spec/ruby/core/kernel/shared/load.rb index 120619abef6e93..cc84daeb886605 100644 --- a/spec/ruby/core/kernel/shared/load.rb +++ b/spec/ruby/core/kernel/shared/load.rb @@ -154,6 +154,22 @@ end end + describe "when passed a module for 'wrap'" do + ruby_version_is "3.1" do + it "sets the enclosing scope to the supplied module" do + path = File.expand_path "wrap_fixture.rb", CODE_LOADING_DIR + mod = Module.new + @object.load(path, mod) + + Object.const_defined?(:LoadSpecWrap).should be_false + mod.const_defined?(:LoadSpecWrap).should be_true + + wrap_module = ScratchPad.recorded[1] + wrap_module.should == mod + end + end + end + describe "(shell expansion)" do before :each do @env_home = ENV["HOME"] diff --git a/spec/ruby/core/matchdata/element_reference_spec.rb b/spec/ruby/core/matchdata/element_reference_spec.rb index 8965f902a0d8ec..7c0f089bb4b0d9 100644 --- a/spec/ruby/core/matchdata/element_reference_spec.rb +++ b/spec/ruby/core/matchdata/element_reference_spec.rb @@ -26,6 +26,21 @@ it "supports ranges [start..end]" do /(.)(.)(\d+)(\d)/.match("THX1138.")[1..3].should == %w|H X 113| + /(.)(.)(\d+)(\d)/.match("THX1138.")[3..10].should == %w|113 8| + /(.)(.)(\d+)(\d)/.match("THX1138.")[-30..2].should == nil + /(.)(.)(\d+)(\d)/.match("THX1138.")[3..1].should == [] + end + + it "supports endless ranges [start..]" do + /(.)(.)(\d+)(\d)/.match("THX1138.")[3..].should == %w|113 8| + end + + it "supports beginningless ranges [..end]" do + /(.)(.)(\d+)(\d)/.match("THX1138.")[..1].should == %w|HX1138 H| + end + + it "supports beginningless endless ranges [nil..nil]" do + /(.)(.)(\d+)(\d)/.match("THX1138.")[nil..nil].should == %w|HX1138 H X 113 8| end ruby_version_is "3.0" do diff --git a/spec/ruby/core/method/fixtures/classes.rb b/spec/ruby/core/method/fixtures/classes.rb index 50daa773e1179e..464a519aeacbac 100644 --- a/spec/ruby/core/method/fixtures/classes.rb +++ b/spec/ruby/core/method/fixtures/classes.rb @@ -84,6 +84,12 @@ def one_req_one_opt_with_splat_and_block(a, b=nil, *c, &blk); end def two_req_one_opt_with_splat_and_block(a, b, c=nil, *d, &blk); end def one_req_two_opt_with_splat_and_block(a, b=nil, c=nil, *d, &blk); end + def my_public_method; end + def my_protected_method; end + def my_private_method; end + protected :my_protected_method + private :my_private_method + define_method(:zero_defined_method, Proc.new {||}) define_method(:zero_with_splat_defined_method, Proc.new {|*x|}) define_method(:one_req_defined_method, Proc.new {|x|}) diff --git a/spec/ruby/core/method/private_spec.rb b/spec/ruby/core/method/private_spec.rb new file mode 100644 index 00000000000000..230a4e9e81185b --- /dev/null +++ b/spec/ruby/core/method/private_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "Method#private?" do + it "returns false when the method is public" do + obj = MethodSpecs::Methods.new + obj.method(:my_public_method).private?.should == false + end + + it "returns false when the method is protected" do + obj = MethodSpecs::Methods.new + obj.method(:my_protected_method).private?.should == false + end + + it "returns true when the method is private" do + obj = MethodSpecs::Methods.new + obj.method(:my_private_method).private?.should == true + end + end +end diff --git a/spec/ruby/core/method/protected_spec.rb b/spec/ruby/core/method/protected_spec.rb new file mode 100644 index 00000000000000..6ee85f77387931 --- /dev/null +++ b/spec/ruby/core/method/protected_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "Method#protected?" do + it "returns false when the method is public" do + obj = MethodSpecs::Methods.new + obj.method(:my_public_method).protected?.should == false + end + + it "returns true when the method is protected" do + obj = MethodSpecs::Methods.new + obj.method(:my_protected_method).protected?.should == true + end + + it "returns false when the method is private" do + obj = MethodSpecs::Methods.new + obj.method(:my_private_method).protected?.should == false + end + end +end diff --git a/spec/ruby/core/method/public_spec.rb b/spec/ruby/core/method/public_spec.rb new file mode 100644 index 00000000000000..3988468551da56 --- /dev/null +++ b/spec/ruby/core/method/public_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "Method#public?" do + it "returns true when the method is public" do + obj = MethodSpecs::Methods.new + obj.method(:my_public_method).public?.should == true + end + + it "returns false when the method is protected" do + obj = MethodSpecs::Methods.new + obj.method(:my_protected_method).public?.should == false + end + + it "returns false when the method is private" do + obj = MethodSpecs::Methods.new + obj.method(:my_private_method).public?.should == false + end + end +end diff --git a/spec/ruby/core/method/super_method_spec.rb b/spec/ruby/core/method/super_method_spec.rb index c63a7aaa0fc4af..f9a18f38785861 100644 --- a/spec/ruby/core/method/super_method_spec.rb +++ b/spec/ruby/core/method/super_method_spec.rb @@ -55,10 +55,12 @@ def overridden; end end end - context "after aliasing an inherited method" do - it "returns the expected super_method" do - method = MethodSpecs::InheritedMethods::C.new.method(:meow) - method.super_method.owner.should == MethodSpecs::InheritedMethods::A + ruby_version_is "2.7.3" do + context "after aliasing an inherited method" do + it "returns the expected super_method" do + method = MethodSpecs::InheritedMethods::C.new.method(:meow) + method.super_method.owner.should == MethodSpecs::InheritedMethods::A + end end end end diff --git a/spec/ruby/core/process/_fork_spec.rb b/spec/ruby/core/process/_fork_spec.rb new file mode 100644 index 00000000000000..6f711ad2dd7d0f --- /dev/null +++ b/spec/ruby/core/process/_fork_spec.rb @@ -0,0 +1,24 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.1" do + describe "Process._fork" do + it "for #respond_to? returns the same as Process.respond_to?(:fork)" do + Process.respond_to?(:_fork).should == Process.respond_to?(:fork) + end + + guard_not -> { Process.respond_to?(:fork) } do + it "raises a NotImplementedError when called" do + -> { Process._fork }.should raise_error(NotImplementedError) + end + end + + guard -> { Process.respond_to?(:fork) } do + it "is called by Process#fork" do + Process.should_receive(:_fork).once.and_return(42) + + pid = Process.fork {} + pid.should equal(42) + end + end + end +end diff --git a/spec/ruby/core/process/spawn_spec.rb b/spec/ruby/core/process/spawn_spec.rb index 9aa8da81251c5b..ad4800d4f8a3bb 100644 --- a/spec/ruby/core/process/spawn_spec.rb +++ b/spec/ruby/core/process/spawn_spec.rb @@ -567,6 +567,24 @@ def child_pids(pid) end end + platform_is_not :windows do + it "redirects non-default file descriptor to itself" do + File.open(@name, 'w') do |file| + -> do + Process.wait Process.spawn( + ruby_cmd("f = IO.new(#{file.fileno}, 'w'); f.print(:bang); f.flush"), file.fileno => file.fileno) + end.should output_to_fd("bang", file) + end + end + end + + it "redirects default file descriptor to itself" do + -> do + Process.wait Process.spawn( + ruby_cmd("f = IO.new(#{STDOUT.fileno}, 'w'); f.print(:bang); f.flush"), STDOUT.fileno => STDOUT.fileno) + end.should output_to_fd("bang", STDOUT) + end + # :close_others platform_is_not :windows do diff --git a/spec/ruby/core/regexp/timeout_spec.rb b/spec/ruby/core/regexp/timeout_spec.rb new file mode 100644 index 00000000000000..6fce261814ef48 --- /dev/null +++ b/spec/ruby/core/regexp/timeout_spec.rb @@ -0,0 +1,35 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.2" do + describe "Regexp.timeout" do + after :each do + Regexp.timeout = nil + end + + it "returns global timeout" do + Regexp.timeout = 3 + Regexp.timeout.should == 3 + end + + it "raises Regexp::TimeoutError after global timeout elapsed" do + Regexp.timeout = 0.001 + Regexp.timeout.should == 0.001 + + -> { + # A typical ReDoS case + /^(a*)*$/ =~ "a" * 1000000 + "x" + }.should raise_error(Regexp::TimeoutError, "regexp match timeout") + end + + it "raises Regexp::TimeoutError after timeout keyword value elapsed" do + Regexp.timeout = 3 # This should be ignored + Regexp.timeout.should == 3 + + re = Regexp.new("^a*b?a*$", timeout: 0.001) + + -> { + re =~ "a" * 1000000 + "x" + }.should raise_error(Regexp::TimeoutError, "regexp match timeout") + end + end +end diff --git a/spec/ruby/core/string/element_set_spec.rb b/spec/ruby/core/string/element_set_spec.rb index 881b4343d4f27c..fa041fa31da2f9 100644 --- a/spec/ruby/core/string/element_set_spec.rb +++ b/spec/ruby/core/string/element_set_spec.rb @@ -357,11 +357,11 @@ end it "raises a RangeError if negative Range begin is out of range" do - -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError) + -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError, "-4..-2 out of range") end it "raises a RangeError if positive Range begin is greater than String size" do - -> { "abc"[4..2] = "x" }.should raise_error(RangeError) + -> { "abc"[4..2] = "x" }.should raise_error(RangeError, "4..2 out of range") end it "uses the Range end as an index rather than a count" do diff --git a/spec/ruby/core/string/fixtures/to_c.rb b/spec/ruby/core/string/fixtures/to_c.rb new file mode 100644 index 00000000000000..77769332637515 --- /dev/null +++ b/spec/ruby/core/string/fixtures/to_c.rb @@ -0,0 +1,5 @@ +module StringSpecs + def self.to_c_method(string) + string.to_c + end +end diff --git a/spec/ruby/core/string/gsub_spec.rb b/spec/ruby/core/string/gsub_spec.rb index 3211ebbd0ac8ce..c87a56659105c0 100644 --- a/spec/ruby/core/string/gsub_spec.rb +++ b/spec/ruby/core/string/gsub_spec.rb @@ -210,8 +210,6 @@ def replacement.to_str() "hello_replacement" end end end - # Note: $~ cannot be tested because mspec messes with it - it "sets $~ to MatchData of last match and nil when there's none" do 'hello.'.gsub('hello', 'x') $~[0].should == 'hello' @@ -225,6 +223,18 @@ def replacement.to_str() "hello_replacement" end 'hello.'.gsub(/not/, 'x') $~.should == nil end + + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).gsub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.gsub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end end describe "String#gsub with pattern and Hash" do @@ -521,6 +531,27 @@ def obj.to_s() "ok" end -> { s.gsub!(/e/, "e") }.should raise_error(FrozenError) -> { s.gsub!(/[aeiou]/, '*') }.should raise_error(FrozenError) end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end end describe "String#gsub! with pattern and block" do diff --git a/spec/ruby/core/string/index_spec.rb b/spec/ruby/core/string/index_spec.rb index 5d77a88e4e000d..2eeee9be8741b6 100644 --- a/spec/ruby/core/string/index_spec.rb +++ b/spec/ruby/core/string/index_spec.rb @@ -159,6 +159,14 @@ "あれ".index char end.should raise_error(Encoding::CompatibilityError) end + + it "handles a substring in a superset encoding" do + 'abc'.force_encoding(Encoding::US_ASCII).index('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.index('t'.force_encoding(Encoding::US_ASCII)).should == 1 + end end describe "String#index with Regexp" do diff --git a/spec/ruby/core/string/partition_spec.rb b/spec/ruby/core/string/partition_spec.rb index 98311f2be4fdf0..9cb3672881ff02 100644 --- a/spec/ruby/core/string/partition_spec.rb +++ b/spec/ruby/core/string/partition_spec.rb @@ -38,4 +38,26 @@ it "takes precedence over a given block" do "hello world".partition("o") { true }.should == ["hell", "o", " world"] end + + it "handles a pattern in a superset encoding" do + string = "hello".force_encoding(Encoding::US_ASCII) + + result = string.partition("é") + + result.should == ["hello", "", ""] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".force_encoding(Encoding::US_ASCII) + + result = "héllo world".partition(pattern) + + result.should == ["héll", "o", " world"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end end diff --git a/spec/ruby/core/string/rindex_spec.rb b/spec/ruby/core/string/rindex_spec.rb index a3b437a1e478c6..e795105e1da113 100644 --- a/spec/ruby/core/string/rindex_spec.rb +++ b/spec/ruby/core/string/rindex_spec.rb @@ -196,6 +196,14 @@ def obj.method_missing(*args) 5 end it "raises a TypeError when given offset is nil" do -> { "str".rindex("st", nil) }.should raise_error(TypeError) end + + it "handles a substring in a superset encoding" do + 'abc'.force_encoding(Encoding::US_ASCII).rindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.rindex('t'.force_encoding(Encoding::US_ASCII)).should == 1 + end end describe "String#rindex with Regexp" do diff --git a/spec/ruby/core/string/rpartition_spec.rb b/spec/ruby/core/string/rpartition_spec.rb index c8f9afaee9225a..21e87f530a8013 100644 --- a/spec/ruby/core/string/rpartition_spec.rb +++ b/spec/ruby/core/string/rpartition_spec.rb @@ -46,4 +46,26 @@ ->{ "hello".rpartition(5) }.should raise_error(TypeError) ->{ "hello".rpartition(nil) }.should raise_error(TypeError) end + + it "handles a pattern in a superset encoding" do + string = "hello".force_encoding(Encoding::US_ASCII) + + result = string.rpartition("é") + + result.should == ["", "", "hello"] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".force_encoding(Encoding::US_ASCII) + + result = "héllo world".rpartition(pattern) + + result.should == ["héllo w", "o", "rld"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end end diff --git a/spec/ruby/core/string/sub_spec.rb b/spec/ruby/core/string/sub_spec.rb index 9effe88c27891f..99dd7b45a83e06 100644 --- a/spec/ruby/core/string/sub_spec.rb +++ b/spec/ruby/core/string/sub_spec.rb @@ -214,6 +214,17 @@ "ababa".sub(/(b)/, '\\\\\1').should == "a\\baba" end + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).sub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.sub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end end describe "String#sub with pattern and block" do @@ -299,6 +310,27 @@ -> { s.sub!(/e/, "e") }.should raise_error(FrozenError) -> { s.sub!(/[aeiou]/, '*') }.should raise_error(FrozenError) end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.sub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.sub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end end describe "String#sub! with pattern and block" do diff --git a/spec/ruby/core/string/to_c_spec.rb b/spec/ruby/core/string/to_c_spec.rb index 9c84b14f4d7ca5..994bdf99f661b9 100644 --- a/spec/ruby/core/string/to_c_spec.rb +++ b/spec/ruby/core/string/to_c_spec.rb @@ -1,99 +1,42 @@ require_relative '../../spec_helper' +require_relative '../../shared/kernel/complex' +require_relative 'fixtures/to_c' describe "String#to_c" do - it "returns a Complex object" do - '9'.to_c.should be_an_instance_of(Complex) - end - - it "understands integers" do - '20'.to_c.should == Complex(20) - end - - it "understands negative integers" do - '-3'.to_c.should == Complex(-3) - end - - it "understands fractions (numerator/denominator) for the real part" do - '2/3'.to_c.should == Complex(Rational(2, 3)) - end - - it "understands fractions (numerator/denominator) for the imaginary part" do - '4+2/3i'.to_c.should == Complex(4, Rational(2, 3)) - end - - it "understands negative fractions (-numerator/denominator) for the real part" do - '-2/3'.to_c.should == Complex(Rational(-2, 3)) - end - - it "understands negative fractions (-numerator/denominator) for the imaginary part" do - '7-2/3i'.to_c.should == Complex(7, Rational(-2, 3)) - end - - it "understands floats (a.b) for the real part" do - '2.3'.to_c.should == Complex(2.3) - end - - it "understands floats (a.b) for the imaginary part" do - '4+2.3i'.to_c.should == Complex(4, 2.3) - end - - it "understands negative floats (-a.b) for the real part" do - '-2.33'.to_c.should == Complex(-2.33) - end - - it "understands negative floats (-a.b) for the imaginary part" do - '7-28.771i'.to_c.should == Complex(7, -28.771) - end - - it "understands an integer followed by 'i' to mean that integer is the imaginary part" do - '35i'.to_c.should == Complex(0,35) - end - - it "understands a negative integer followed by 'i' to mean that negative integer is the imaginary part" do - '-29i'.to_c.should == Complex(0,-29) - end - - it "understands an 'i' by itself as denoting a complex number with an imaginary part of 1" do - 'i'.to_c.should == Complex(0,1) - end - - it "understands a '-i' by itself as denoting a complex number with an imaginary part of -1" do - '-i'.to_c.should == Complex(0,-1) - end - - it "understands 'a+bi' to mean a complex number with 'a' as the real part, 'b' as the imaginary" do - '79+4i'.to_c.should == Complex(79,4) - end - - it "understands 'a-bi' to mean a complex number with 'a' as the real part, '-b' as the imaginary" do - '79-4i'.to_c.should == Complex(79,-4) - end + it_behaves_like :kernel_complex, :to_c_method, StringSpecs +end - it "understands scientific notation for the real part" do - '2e3+4i'.to_c.should == Complex(2e3,4) +describe "String#to_c" do + it "returns a complex number with 0 as the real part, 0 as the imaginary part for unrecognised Strings" do + 'ruby'.to_c.should == Complex(0, 0) end - it "understands negative scientific notation for the real part" do - '-2e3+4i'.to_c.should == Complex(-2e3,4) + it "ignores trailing garbage" do + '79+4iruby'.to_c.should == Complex(79, 4) end - it "understands scientific notation for the imaginary part" do - '4+2e3i'.to_c.should == Complex(4, 2e3) + it "understands Float::INFINITY" do + 'Infinity'.to_c.should == Complex(0, 1) + '-Infinity'.to_c.should == Complex(0, -1) end - it "understands negative scientific notation for the imaginary part" do - '4-2e3i'.to_c.should == Complex(4, -2e3) + it "understands Float::NAN" do + 'NaN'.to_c.should == Complex(0, 0) end - it "understands scientific notation for the real and imaginary part in the same String" do - '2e3+2e4i'.to_c.should == Complex(2e3,2e4) + it "understands a sequence of _" do + '7__9+4__0i'.to_c.should == Complex(79, 40) end - it "understands negative scientific notation for the real and imaginary part in the same String" do - '-2e3-2e4i'.to_c.should == Complex(-2e3,-2e4) + it "allows null-byte" do + "1-2i\0".to_c.should == Complex(1, -2) + "1\0-2i".to_c.should == Complex(1, 0) + "\01-2i".to_c.should == Complex(0, 0) end - it "returns a complex number with 0 as the real part, 0 as the imaginary part for unrecognised Strings" do - 'ruby'.to_c.should == Complex(0,0) + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + '79+4i'.encode("UTF-16").to_c + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") end end diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb index 1a838d6c7c3c20..fcabc99731f761 100644 --- a/spec/ruby/core/string/unpack/b_spec.rb +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -93,6 +93,11 @@ it "ignores spaces between directives" do "\x80\x00".unpack("B B").should == ["1", "0"] end + + it "decodes into US-ASCII string values" do + str = "s".force_encoding('UTF-8').unpack("B*")[0] + str.encoding.name.should == 'US-ASCII' + end end describe "String#unpack with format 'b'" do @@ -189,5 +194,4 @@ str = "s".force_encoding('UTF-8').unpack("b*")[0] str.encoding.name.should == 'US-ASCII' end - end diff --git a/spec/ruby/core/string/unpack/m_spec.rb b/spec/ruby/core/string/unpack/m_spec.rb index 21134514a19cca..c551c755d16ee9 100644 --- a/spec/ruby/core/string/unpack/m_spec.rb +++ b/spec/ruby/core/string/unpack/m_spec.rb @@ -97,6 +97,11 @@ ["=FF=\n", ["\xff"]] ].should be_computed_by(:unpack, "M") end + + it "unpacks incomplete escape sequences as literal characters" do + "foo=".unpack("M").should == ["foo="] + "foo=4".unpack("M").should == ["foo=4"] + end end describe "String#unpack with format 'm'" do diff --git a/spec/ruby/core/struct/initialize_spec.rb b/spec/ruby/core/struct/initialize_spec.rb index e82289071ae085..cfb302209e6f6e 100644 --- a/spec/ruby/core/struct/initialize_spec.rb +++ b/spec/ruby/core/struct/initialize_spec.rb @@ -40,4 +40,12 @@ it "can be overridden" do StructClasses::SubclassX.new(:y).new.key.should == :value end + + ruby_version_is "3.1"..."3.2" do + it "warns about passing only keyword arguments" do + -> { + StructClasses::Ruby.new(version: "3.1", platform: "OS") + }.should complain(/warning: Passing only keyword arguments/) + end + end end diff --git a/spec/ruby/core/struct/keyword_init_spec.rb b/spec/ruby/core/struct/keyword_init_spec.rb new file mode 100644 index 00000000000000..061f4c56e0df8a --- /dev/null +++ b/spec/ruby/core/struct/keyword_init_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.1" do + # See https://bugs.ruby-lang.org/issues/18008 + describe "StructClass#keyword_init?" do + it "returns true for a struct that accepts keyword arguments to initialize" do + struct = Struct.new(:arg, keyword_init: true) + struct.keyword_init?.should be_true + end + + it "returns false for a struct that does not accept keyword arguments to initialize" do + struct = Struct.new(:arg, keyword_init: false) + struct.keyword_init?.should be_false + end + + it "returns nil for a struct that did not explicitly specify keyword_init" do + struct = Struct.new(:arg) + struct.keyword_init?.should be_nil + end + end +end diff --git a/spec/ruby/core/thread/backtrace/limit_spec.rb b/spec/ruby/core/thread/backtrace/limit_spec.rb new file mode 100644 index 00000000000000..26a87a806c174d --- /dev/null +++ b/spec/ruby/core/thread/backtrace/limit_spec.rb @@ -0,0 +1,15 @@ +require_relative '../../../spec_helper' + +ruby_version_is "3.1" do + describe "Thread::Backtrace.limit" do + it "returns maximum backtrace length set by --backtrace-limit command-line option" do + out = ruby_exe("print Thread::Backtrace.limit", options: "--backtrace-limit=2") + out.should == "2" + end + + it "returns -1 when --backtrace-limit command-line option is not set" do + out = ruby_exe("print Thread::Backtrace.limit") + out.should == "-1" + end + end +end diff --git a/spec/ruby/core/thread/native_thread_id_spec.rb b/spec/ruby/core/thread/native_thread_id_spec.rb new file mode 100644 index 00000000000000..5a6c0c86326a13 --- /dev/null +++ b/spec/ruby/core/thread/native_thread_id_spec.rb @@ -0,0 +1,15 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.1" do + describe "Thread#native_thread_id" do + it "returns an integer when the thread is alive" do + Thread.current.native_thread_id.should be_kind_of(Integer) + end + + it "returns nil when the thread is not running" do + t = Thread.new {} + t.join + t.native_thread_id.should == nil + end + end +end diff --git a/spec/ruby/core/time/at_spec.rb b/spec/ruby/core/time/at_spec.rb index 2cc46ab8c966b1..74b1962a95fdca 100644 --- a/spec/ruby/core/time/at_spec.rb +++ b/spec/ruby/core/time/at_spec.rb @@ -266,5 +266,10 @@ time.zone.should == zone time.to_i.should == @epoch_time end + + it "raises ArgumentError if format is invalid" do + -> { Time.at(@epoch_time, in: "+09:99") }.should raise_error(ArgumentError) + -> { Time.at(@epoch_time, in: "ABC") }.should raise_error(ArgumentError) + end end end diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index 09b4d03a44db2d..aabf28e71286e5 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -332,4 +332,55 @@ def zone.local_to_utc(t) end end end + + ruby_version_is '3.1' do # https://bugs.ruby-lang.org/issues/17485 + describe ":in keyword argument" do + it "could be UTC offset as a String in '+HH:MM or '-HH:MM' format" do + time = Time.new(2000, 1, 1, 12, 0, 0, in: "+05:00") + + time.utc_offset.should == 5*60*60 + time.zone.should == nil + + time = Time.new(2000, 1, 1, 12, 0, 0, in: "-09:00") + + time.utc_offset.should == -9*60*60 + time.zone.should == nil + end + + it "could be UTC offset as a number of seconds" do + time = Time.new(2000, 1, 1, 12, 0, 0, in: 5*60*60) + + time.utc_offset.should == 5*60*60 + time.zone.should == nil + + time = Time.new(2000, 1, 1, 12, 0, 0, in: -9*60*60) + + time.utc_offset.should == -9*60*60 + time.zone.should == nil + end + + it "could be a timezone object" do + zone = TimeSpecs::TimezoneWithName.new(name: "Asia/Colombo") + time = Time.new(2000, 1, 1, 12, 0, 0, in: zone) + + time.utc_offset.should == 5*3600+30*60 + time.zone.should == zone + + zone = TimeSpecs::TimezoneWithName.new(name: "PST") + time = Time.new(2000, 1, 1, 12, 0, 0, in: zone) + + time.utc_offset.should == -9*60*60 + time.zone.should == zone + end + + it "raises ArgumentError if format is invalid" do + -> { Time.new(2000, 1, 1, 12, 0, 0, in: "+09:99") }.should raise_error(ArgumentError) + -> { Time.new(2000, 1, 1, 12, 0, 0, in: "ABC") }.should raise_error(ArgumentError) + end + + it "raises ArgumentError if two offset arguments are given" do + -> { Time.new(2000, 1, 1, 12, 0, 0, "+05:00", in: "+05:00") }.should raise_error(ArgumentError) + end + end + end end diff --git a/spec/ruby/core/time/now_spec.rb b/spec/ruby/core/time/now_spec.rb index 7dc79519967ab0..2b2e53a17c83ed 100644 --- a/spec/ruby/core/time/now_spec.rb +++ b/spec/ruby/core/time/now_spec.rb @@ -3,4 +3,49 @@ describe "Time.now" do it_behaves_like :time_now, :now + + describe ":in keyword argument" do + it "could be UTC offset as a String in '+HH:MM or '-HH:MM' format" do + time = Time.now(in: "+05:00") + + time.utc_offset.should == 5*60*60 + time.zone.should == nil + + time = Time.now(in: "-09:00") + + time.utc_offset.should == -9*60*60 + time.zone.should == nil + end + + it "could be UTC offset as a number of seconds" do + time = Time.now(in: 5*60*60) + + time.utc_offset.should == 5*60*60 + time.zone.should == nil + + time = Time.now(in: -9*60*60) + + time.utc_offset.should == -9*60*60 + time.zone.should == nil + end + + it "could be a timezone object" do + zone = TimeSpecs::TimezoneWithName.new(name: "Asia/Colombo") + time = Time.now(in: zone) + + time.utc_offset.should == 5*3600+30*60 + time.zone.should == zone + + zone = TimeSpecs::TimezoneWithName.new(name: "PST") + time = Time.now(in: zone) + + time.utc_offset.should == -9*60*60 + time.zone.should == zone + end + + it "raises ArgumentError if format is invalid" do + -> { Time.now(in: "+09:99") }.should raise_error(ArgumentError) + -> { Time.now(in: "ABC") }.should raise_error(ArgumentError) + end + end end diff --git a/spec/ruby/core/time/shared/local.rb b/spec/ruby/core/time/shared/local.rb index c4aa7a7ea9b4d2..068e31499911a6 100644 --- a/spec/ruby/core/time/shared/local.rb +++ b/spec/ruby/core/time/shared/local.rb @@ -7,12 +7,10 @@ end platform_is_not :windows do - describe "timezone changes" do - it "correctly adjusts the timezone change to 'CET' on 'Europe/Amsterdam'" do - with_timezone("Europe/Amsterdam") do - Time.send(@method, 1970, 5, 16).to_a.should == - [0, 0, 0, 16, 5, 1970, 6, 136, false, "CET"] - end + it "uses the 'CET' timezone with TZ=Europe/Amsterdam in 1970" do + with_timezone("Europe/Amsterdam") do + Time.send(@method, 1970, 5, 16).to_a.should == + [0, 0, 0, 16, 5, 1970, 6, 136, false, "CET"] end end end @@ -41,5 +39,4 @@ end end end - end diff --git a/spec/ruby/core/time/strftime_spec.rb b/spec/ruby/core/time/strftime_spec.rb index 1bd24b05385edd..c133e220082626 100644 --- a/spec/ruby/core/time/strftime_spec.rb +++ b/spec/ruby/core/time/strftime_spec.rb @@ -49,4 +49,13 @@ time = @new_time_with_offset[2012, 1, 1, 0, 0, 0, Rational(36645, 10)] time.strftime("%::z").should == "+01:01:05" end + + ruby_version_is "3.1" do + it "supports RFC 3339 UTC for unknown offset local time, -0000, as %-z" do + @time.strftime("%z").should == "+0000" + @time.strftime("%-z").should == "-0000" + @time.strftime("%-:z").should == "-00:00" + @time.strftime("%-::z").should == "-00:00:00" + end + end end diff --git a/spec/ruby/core/tracepoint/allow_reentry_spec.rb b/spec/ruby/core/tracepoint/allow_reentry_spec.rb new file mode 100644 index 00000000000000..6bff1bed7649ff --- /dev/null +++ b/spec/ruby/core/tracepoint/allow_reentry_spec.rb @@ -0,0 +1,32 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1" do + describe 'TracePoint.allow_reentry' do + it 'allows the reentrance in a given block' do + event_lines = [] + l1 = l2 = l3 = l4 = nil + TracePoint.new(:line) do |tp| + next unless TracePointSpec.target_thread? + + event_lines << tp.lineno + next if (__LINE__ + 2 .. __LINE__ + 4).cover?(tp.lineno) + TracePoint.allow_reentry do + a = 1; l3 = __LINE__ + b = 2; l4 = __LINE__ + end + end.enable do + c = 3; l1 = __LINE__ + d = 4; l2 = __LINE__ + end + + event_lines.should == [l1, l3, l4, l2, l3, l4] + end + + it 'raises RuntimeError when not called inside a TracePoint' do + -> { + TracePoint.allow_reentry{} + }.should raise_error(RuntimeError) + end + end +end diff --git a/spec/ruby/core/unboundmethod/fixtures/classes.rb b/spec/ruby/core/unboundmethod/fixtures/classes.rb index 1f466e39d86e8d..6ab958d447b9cc 100644 --- a/spec/ruby/core/unboundmethod/fixtures/classes.rb +++ b/spec/ruby/core/unboundmethod/fixtures/classes.rb @@ -53,6 +53,12 @@ def neg_four(a, b, *c, &d); end def discard_1(); :discard; end def discard_2(); :discard; end + + def my_public_method; end + def my_protected_method; end + def my_private_method; end + protected :my_protected_method + private :my_private_method end class Parent diff --git a/spec/ruby/core/unboundmethod/private_spec.rb b/spec/ruby/core/unboundmethod/private_spec.rb new file mode 100644 index 00000000000000..fa735846bbac3c --- /dev/null +++ b/spec/ruby/core/unboundmethod/private_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "UnboundMethod#private?" do + it "returns false when the method is public" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_public_method).unbind.private?.should == false + end + + it "returns false when the method is protected" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_protected_method).unbind.private?.should == false + end + + it "returns true when the method is private" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_private_method).unbind.private?.should == true + end + end +end diff --git a/spec/ruby/core/unboundmethod/protected_spec.rb b/spec/ruby/core/unboundmethod/protected_spec.rb new file mode 100644 index 00000000000000..db00e7ef43b45f --- /dev/null +++ b/spec/ruby/core/unboundmethod/protected_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "UnboundMethod#protected?" do + it "returns false when the method is public" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_public_method).unbind.protected?.should == false + end + + it "returns true when the method is protected" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_protected_method).unbind.protected?.should == true + end + + it "returns false when the method is private" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_private_method).unbind.protected?.should == false + end + end +end diff --git a/spec/ruby/core/unboundmethod/public_spec.rb b/spec/ruby/core/unboundmethod/public_spec.rb new file mode 100644 index 00000000000000..7b87a03b1584a2 --- /dev/null +++ b/spec/ruby/core/unboundmethod/public_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +ruby_version_is "3.1"..."3.2" do + describe "UnboundMethod#public?" do + it "returns true when the method is public" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_public_method).unbind.public?.should == true + end + + it "returns false when the method is protected" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_protected_method).unbind.public?.should == false + end + + it "returns false when the method is private" do + obj = UnboundMethodSpecs::Methods.new + obj.method(:my_private_method).unbind.public?.should == false + end + end +end diff --git a/spec/ruby/core/unboundmethod/super_method_spec.rb b/spec/ruby/core/unboundmethod/super_method_spec.rb index aa7c1293772782..101c83b8b333eb 100644 --- a/spec/ruby/core/unboundmethod/super_method_spec.rb +++ b/spec/ruby/core/unboundmethod/super_method_spec.rb @@ -40,10 +40,12 @@ end end - context "after aliasing an inherited method" do - it "returns the expected super_method" do - method = MethodSpecs::InheritedMethods::C.instance_method(:meow) - method.super_method.owner.should == MethodSpecs::InheritedMethods::A + ruby_version_is "2.7.3" do + context "after aliasing an inherited method" do + it "returns the expected super_method" do + method = MethodSpecs::InheritedMethods::C.instance_method(:meow) + method.super_method.owner.should == MethodSpecs::InheritedMethods::A + end end end end diff --git a/spec/ruby/language/block_spec.rb b/spec/ruby/language/block_spec.rb index d918c12beb2ec9..8488b945d5df3e 100644 --- a/spec/ruby/language/block_spec.rb +++ b/spec/ruby/language/block_spec.rb @@ -263,12 +263,55 @@ def m(a) yield a end m(obj) { |a, b, c| [a, b, c] }.should == [obj, nil, nil] end + it "receives the object if it does not respond to #to_ary" do + obj = Object.new + + m(obj) { |a, b, c| [a, b, c] }.should == [obj, nil, nil] + end + + it "calls #respond_to? to check if object has method #to_ary" do + obj = mock("destructure block arguments") + obj.should_receive(:respond_to?).with(:to_ary, true).and_return(true) + obj.should_receive(:to_ary).and_return([1, 2]) + + m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] + end + + it "receives the object if it does not respond to #respond_to?" do + obj = BasicObject.new + + m(obj) { |a, b, c| [a, b, c] }.should == [obj, nil, nil] + end + + it "calls #to_ary on the object when it is defined dynamically" do + obj = Object.new + def obj.method_missing(name, *args, &block) + if name == :to_ary + [1, 2] + else + super + end + end + def obj.respond_to_missing?(name, include_private) + name == :to_ary + end + + m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] + end + it "raises a TypeError if #to_ary does not return an Array" do obj = mock("destructure block arguments") obj.should_receive(:to_ary).and_return(1) -> { m(obj) { |a, b| } }.should raise_error(TypeError) end + + it "raises error transparently if #to_ary raises error on its own" do + obj = Object.new + def obj.to_ary; raise "Exception raised in #to_ary" end + + -> { m(obj) { |a, b| } }.should raise_error(RuntimeError, "Exception raised in #to_ary") + end end end diff --git a/spec/ruby/language/keyword_arguments_spec.rb b/spec/ruby/language/keyword_arguments_spec.rb index 8771c5806c0c36..c47b7b0ae95145 100644 --- a/spec/ruby/language/keyword_arguments_spec.rb +++ b/spec/ruby/language/keyword_arguments_spec.rb @@ -321,6 +321,21 @@ def m(*args) m({a: 1}).should == [[{a: 1}], {}] end + ruby_version_is "3.1" do + describe "omitted values" do + it "accepts short notation 'key' for 'key: value' syntax" do + def m(a:, b:) + [a, b] + end + + a = 1 + b = 2 + + eval('m(a:, b:).should == [1, 2]') + end + end + end + ruby_version_is "3.2" do it "does not work with call(*ruby2_keyword_args) with missing ruby2_keywords in between" do class << self diff --git a/spec/ruby/language/method_spec.rb b/spec/ruby/language/method_spec.rb index acca074974743e..b80b314f6f40e4 100644 --- a/spec/ruby/language/method_spec.rb +++ b/spec/ruby/language/method_spec.rb @@ -1679,6 +1679,15 @@ def m() = 42 m.should == 42 end + + context "without parenthesis" do + evaluate <<-ruby do + def m = 42 + ruby + + m.should == 42 + end + end end context "with arguments" do @@ -1716,6 +1725,16 @@ def m(...) = mm(...) + mm(...) m("meow", num: 2).should == "meow" * 4 end end + + ruby_version_is ""..."3.0" do + context "inside 'endless' method definitions" do + it "does not allow method calls without parenthesis" do + -> { + eval("def greet(person) = 'Hi, '.concat person") + }.should raise_error(SyntaxError) + end + end + end end describe "Keyword arguments are now separated from positional arguments" do @@ -1824,4 +1843,14 @@ def foo(val) end end end + + describe "Inside 'endless' method definitions" do + it "allows method calls without parenthesis" do + eval <<-ruby + def greet(person) = "Hi, ".concat person + ruby + + greet("Homer").should == "Hi, Homer" + end + end end diff --git a/spec/ruby/library/cmath/math/acos_spec.rb b/spec/ruby/library/cmath/math/acos_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/acos_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/acosh_spec.rb b/spec/ruby/library/cmath/math/acosh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/acosh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/asin_spec.rb b/spec/ruby/library/cmath/math/asin_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/asin_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/asinh_spec.rb b/spec/ruby/library/cmath/math/asinh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/asinh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/atan2_spec.rb b/spec/ruby/library/cmath/math/atan2_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/atan2_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/atan_spec.rb b/spec/ruby/library/cmath/math/atan_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/atan_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/atanh_spec.rb b/spec/ruby/library/cmath/math/atanh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/atanh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/cos_spec.rb b/spec/ruby/library/cmath/math/cos_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/cos_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/cosh_spec.rb b/spec/ruby/library/cmath/math/cosh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/cosh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/exp_spec.rb b/spec/ruby/library/cmath/math/exp_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/exp_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/fixtures/classes.rb b/spec/ruby/library/cmath/math/fixtures/classes.rb deleted file mode 100644 index 443c1a9ace3007..00000000000000 --- a/spec/ruby/library/cmath/math/fixtures/classes.rb +++ /dev/null @@ -1,4 +0,0 @@ -require 'cmath' -class IncludesMath - include CMath -end diff --git a/spec/ruby/library/cmath/math/log10_spec.rb b/spec/ruby/library/cmath/math/log10_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/log10_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/log_spec.rb b/spec/ruby/library/cmath/math/log_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/log_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/shared/acos.rb b/spec/ruby/library/cmath/math/shared/acos.rb deleted file mode 100644 index 65637fa838d188..00000000000000 --- a/spec/ruby/library/cmath/math/shared/acos.rb +++ /dev/null @@ -1,41 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_acos, shared: true do - it "returns the arccosine of the passed argument" do - @object.send(:acos, 1).should be_close(0.0, TOLERANCE) - @object.send(:acos, 0).should be_close(1.5707963267949, TOLERANCE) - @object.send(:acos, -1).should be_close(Math::PI,TOLERANCE) - end - - it "returns the arccosine for Complex numbers" do - @object.send(:acos, Complex(3, 4)).should be_close(Complex(0.93681246115572, -2.30550903124348), TOLERANCE) - end - - it "returns the arccosine for numbers greater than 1.0 as a Complex number" do - @object.send(:acos, 1.0001).should be_close(Complex(0.0, 0.0141420177752494), TOLERANCE) - end - - it "returns the arccosine for numbers less than -1.0 as a Complex number" do - @object.send(:acos, -1.0001).should be_close(Complex(3.14159265358979, -0.0141420177752495), TOLERANCE) - end -end - -describe :complex_math_acos_bang, shared: true do - it "returns the arccosine of the argument" do - @object.send(:acos!, 1).should be_close(0.0, TOLERANCE) - @object.send(:acos!, 0).should be_close(1.5707963267949, TOLERANCE) - @object.send(:acos!, -1).should be_close(Math::PI,TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:acos!, Complex(4, 5)) }.should raise_error(TypeError) - end - - it "raises an Errno::EDOM for numbers greater than 1.0" do - -> { @object.send(:acos!, 1.0001) }.should raise_error(Errno::EDOM) - end - - it "raises an Errno::EDOM for numbers less than -1.0" do - -> { @object.send(:acos!, -1.0001) }.should raise_error(Errno::EDOM) - end -end diff --git a/spec/ruby/library/cmath/math/shared/acosh.rb b/spec/ruby/library/cmath/math/shared/acosh.rb deleted file mode 100644 index 285b0b823f6fee..00000000000000 --- a/spec/ruby/library/cmath/math/shared/acosh.rb +++ /dev/null @@ -1,37 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_acosh, shared: true do - it "returns the principle value of the inverse hyperbolic cosine of the argument" do - @object.send(:acosh, 14.2).should be_close(3.345146999647, TOLERANCE) - @object.send(:acosh, 1.0).should be_close(0.0, TOLERANCE) - end - - it "returns the principle value of the inverse hyperbolic cosine for numbers less than 1.0 as a Complex number" do - @object.send(:acosh, 1.0 - TOLERANCE).should be_close(Complex(0.0, 0.00774598605746135), TOLERANCE) - @object.send(:acosh, 0).should be_close(Complex(0.0, 1.5707963267949), TOLERANCE) - @object.send(:acosh, -1.0).should be_close(Complex(0.0, 3.14159265358979), TOLERANCE) - end - - it "returns the principle value of the inverse hyperbolic cosine for Complex numbers" do - @object.send(:acosh, Complex(3, 4)) - @object.send(:acosh, Complex(3, 4)).imaginary.should be_close(0.93681246115572, TOLERANCE) - @object.send(:acosh, Complex(3, 4)).real.should be_close(2.305509031243477, TOLERANCE) - end -end - -describe :complex_math_acosh_bang, shared: true do - it "returns the principle value of the inverse hyperbolic cosine of the argument" do - @object.send(:acosh!, 14.2).should be_close(3.345146999647, TOLERANCE) - @object.send(:acosh!, 1.0).should be_close(0.0, TOLERANCE) - end - - it "raises Errno::EDOM for numbers less than 1.0" do - -> { @object.send(:acosh!, 1.0 - TOLERANCE) }.should raise_error(Errno::EDOM) - -> { @object.send(:acosh!, 0) }.should raise_error(Errno::EDOM) - -> { @object.send(:acosh!, -1.0) }.should raise_error(Errno::EDOM) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:acosh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/asin.rb b/spec/ruby/library/cmath/math/shared/asin.rb deleted file mode 100644 index 91fed7aa06903f..00000000000000 --- a/spec/ruby/library/cmath/math/shared/asin.rb +++ /dev/null @@ -1,47 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_asin, shared: true do - it "returns the arcsine of the argument" do - @object.send(:asin, 1).should be_close(Math::PI/2, TOLERANCE) - @object.send(:asin, 0).should be_close(0.0, TOLERANCE) - @object.send(:asin, -1).should be_close(-Math::PI/2, TOLERANCE) - @object.send(:asin, 0.25).should be_close(0.252680255142079, TOLERANCE) - @object.send(:asin, 0.50).should be_close(0.523598775598299, TOLERANCE) - @object.send(:asin, 0.75).should be_close(0.8480620789814816,TOLERANCE) - end - - it "returns the arcsine for Complex numbers" do - @object.send(:asin, Complex(3, 4)).should be_close(Complex(0.633983865639174, 2.30550903124347), TOLERANCE) - end - - it "returns a Complex number when the argument is greater than 1.0" do - @object.send(:asin, 1.0001).should be_close(Complex(1.5707963267949, -0.0141420177752494), TOLERANCE) - end - - it "returns a Complex number when the argument is less than -1.0" do - @object.send(:asin, -1.0001).should be_close(Complex(-1.5707963267949, 0.0141420177752494), TOLERANCE) - end -end - -describe :complex_math_asin_bang, shared: true do - it "returns the arcsine of the argument" do - @object.send(:asin!, 1).should be_close(Math::PI/2, TOLERANCE) - @object.send(:asin!, 0).should be_close(0.0, TOLERANCE) - @object.send(:asin!, -1).should be_close(-Math::PI/2, TOLERANCE) - @object.send(:asin!, 0.25).should be_close(0.252680255142079, TOLERANCE) - @object.send(:asin!, 0.50).should be_close(0.523598775598299, TOLERANCE) - @object.send(:asin!, 0.75).should be_close(0.8480620789814816,TOLERANCE) - end - - it "raises an Errno::EDOM if the argument is greater than 1.0" do - -> { @object.send(:asin!, 1.0001) }.should raise_error( Errno::EDOM) - end - - it "raises an Errno::EDOM if the argument is less than -1.0" do - -> { @object.send(:asin!, -1.0001) }.should raise_error( Errno::EDOM) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:asin!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/asinh.rb b/spec/ruby/library/cmath/math/shared/asinh.rb deleted file mode 100644 index b4ddd3a22ea6a9..00000000000000 --- a/spec/ruby/library/cmath/math/shared/asinh.rb +++ /dev/null @@ -1,32 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_asinh, shared: true do - it "returns the inverse hyperbolic sin of the argument" do - @object.send(:asinh, 1.5).should be_close(1.19476321728711, TOLERANCE) - @object.send(:asinh, -2.97).should be_close(-1.8089166921397, TOLERANCE) - @object.send(:asinh, 0.0).should == 0.0 - @object.send(:asinh, -0.0).should == -0.0 - @object.send(:asinh, 1.05367e-08).should be_close(1.05367e-08, TOLERANCE) - @object.send(:asinh, -1.05367e-08).should be_close(-1.05367e-08, TOLERANCE) - end - - it "returns the inverse hyperbolic sin for Complex numbers" do - @object.send(:asinh, Complex(3, 4)).should be_close(Complex(2.29991404087927, 0.917616853351479), TOLERANCE) - @object.send(:asinh, Complex(3.5, -4)).should be_close(Complex(2.36263337274419, -0.843166327537659), TOLERANCE) - end -end - -describe :complex_math_asinh_bang, shared: true do - it "returns the inverse hyperbolic sin of the argument" do - @object.send(:asinh!, 1.5).should be_close(1.19476321728711, TOLERANCE) - @object.send(:asinh!, -2.97).should be_close(-1.8089166921397, TOLERANCE) - @object.send(:asinh!, 0.0).should == 0.0 - @object.send(:asinh!, -0.0).should == -0.0 - @object.send(:asinh!, 1.05367e-08).should be_close(1.05367e-08, TOLERANCE) - @object.send(:asinh!, -1.05367e-08).should be_close(-1.05367e-08, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:asinh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/atan.rb b/spec/ruby/library/cmath/math/shared/atan.rb deleted file mode 100644 index 63a496e841cd6d..00000000000000 --- a/spec/ruby/library/cmath/math/shared/atan.rb +++ /dev/null @@ -1,32 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_atan, shared: true do - it "returns the arctangent of the argument" do - @object.send(:atan, 1).should be_close(Math::PI/4, TOLERANCE) - @object.send(:atan, 0).should be_close(0.0, TOLERANCE) - @object.send(:atan, -1).should be_close(-Math::PI/4, TOLERANCE) - @object.send(:atan, 0.25).should be_close(0.244978663126864, TOLERANCE) - @object.send(:atan, 0.50).should be_close(0.463647609000806, TOLERANCE) - @object.send(:atan, 0.75).should be_close(0.643501108793284, TOLERANCE) - end - - it "returns the arctangent for Complex numbers" do - @object.send(:atan, Complex(3, 4)).should be_close(Complex(1.44830699523146, 0.158997191679999), TOLERANCE) - @object.send(:atan, Complex(3.5, -4)).should be_close(Complex(1.44507428165589, -0.140323762363786), TOLERANCE) - end -end - -describe :complex_math_atan_bang, shared: true do - it "returns the arctangent of the argument" do - @object.send(:atan!, 1).should be_close(Math::PI/4, TOLERANCE) - @object.send(:atan!, 0).should be_close(0.0, TOLERANCE) - @object.send(:atan!, -1).should be_close(-Math::PI/4, TOLERANCE) - @object.send(:atan!, 0.25).should be_close(0.244978663126864, TOLERANCE) - @object.send(:atan!, 0.50).should be_close(0.463647609000806, TOLERANCE) - @object.send(:atan!, 0.75).should be_close(0.643501108793284, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:atan!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/atan2.rb b/spec/ruby/library/cmath/math/shared/atan2.rb deleted file mode 100644 index 6d89423924f252..00000000000000 --- a/spec/ruby/library/cmath/math/shared/atan2.rb +++ /dev/null @@ -1,34 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_atan2, shared: true do - it "returns the arc tangent of the passed arguments" do - @object.send(:atan2, 4.2, 0.3).should be_close(1.49948886200961, TOLERANCE) - @object.send(:atan2, 0.0, 1.0).should be_close(0.0, TOLERANCE) - @object.send(:atan2, -9.1, 3.2).should be_close(-1.23265379809025, TOLERANCE) - @object.send(:atan2, 7.22, -3.3).should be_close(1.99950888779256, TOLERANCE) - end - - it "returns the arc tangent for two Complex numbers" do - CMath.atan2(Complex(3, 4), Complex(3.5, -4)).should be_close(Complex(-0.641757436698881, 1.10829873031207), TOLERANCE) - end - - it "returns the arc tangent for Complex and real numbers" do - CMath.atan2(Complex(3, 4), -7).should be_close(Complex(2.61576754731561, -0.494290673139855), TOLERANCE) - CMath.atan2(5, Complex(3.5, -4)).should be_close(Complex(0.739102348493673, 0.487821626522923), TOLERANCE) - end -end - -describe :complex_math_atan2_bang, shared: true do - it "returns the arc tangent of the passed arguments" do - @object.send(:atan2!, 4.2, 0.3).should be_close(1.49948886200961, TOLERANCE) - @object.send(:atan2!, 0.0, 1.0).should be_close(0.0, TOLERANCE) - @object.send(:atan2!, -9.1, 3.2).should be_close(-1.23265379809025, TOLERANCE) - @object.send(:atan2!, 7.22, -3.3).should be_close(1.99950888779256, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:atan2!, Complex(4, 5), Complex(4, 5)) }.should raise_error(TypeError) - -> { @object.send(:atan2!, 4, Complex(4, 5)) }.should raise_error(TypeError) - -> { @object.send(:atan2!, Complex(4, 5), 5) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/atanh.rb b/spec/ruby/library/cmath/math/shared/atanh.rb deleted file mode 100644 index ae80e61bec9ade..00000000000000 --- a/spec/ruby/library/cmath/math/shared/atanh.rb +++ /dev/null @@ -1,30 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_atanh_complex, shared: true do - it "returns the inverse hyperbolic tangent as a Complex number for arguments greater than 1.0" do - value = Complex(18.36840028483855, 1.5707963267948966) - @object.send(@method, 1.0 + Float::EPSILON).should be_close(value, TOLERANCE) - - value = Complex(0.100335347731076, 1.5707963267949) - @object.send(@method, 10).should be_close(value, TOLERANCE) - end - - it "returns the inverse hyperbolic tangent as a Complex number for arguments greater than 1.0" do - value = Complex(-18.36840028483855, 1.5707963267948966) - @object.send(@method, -1.0 - Float::EPSILON).should be_close(value, TOLERANCE) - - value = Complex(0.100335347731076, 1.5707963267949) - @object.send(@method, 10).should be_close(value, TOLERANCE) - end - - it "returns the inverse hyperbolic tangent for Complex numbers" do - value = Complex(0.117500907311434, 1.40992104959658) - @object.send(@method, Complex(3, 4)).should be_close(value, TOLERANCE) - end -end - -describe :complex_math_atanh_no_complex, shared: true do - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:atanh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/cos.rb b/spec/ruby/library/cmath/math/shared/cos.rb deleted file mode 100644 index 31cb5ab1e52dce..00000000000000 --- a/spec/ruby/library/cmath/math/shared/cos.rb +++ /dev/null @@ -1,30 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_cos, shared: true do - it "returns the cosine of the argument expressed in radians" do - @object.send(:cos, CMath::PI).should be_close(-1.0, TOLERANCE) - @object.send(:cos, 0).should be_close(1.0, TOLERANCE) - @object.send(:cos, CMath::PI/2).should be_close(0.0, TOLERANCE) - @object.send(:cos, 3*Math::PI/2).should be_close(0.0, TOLERANCE) - @object.send(:cos, 2*Math::PI).should be_close(1.0, TOLERANCE) - end - - it "returns the cosine for Complex numbers" do - @object.send(:cos, Complex(0, CMath::PI)).should be_close(Complex(11.5919532755215, 0.0), TOLERANCE) - @object.send(:cos, Complex(3, 4)).should be_close(Complex(-27.0349456030742, -3.85115333481178), TOLERANCE) - end -end - -describe :complex_math_cos_bang, shared: true do - it "returns the cosine of the argument expressed in radians" do - @object.send(:cos!, CMath::PI).should be_close(-1.0, TOLERANCE) - @object.send(:cos!, 0).should be_close(1.0, TOLERANCE) - @object.send(:cos!, CMath::PI/2).should be_close(0.0, TOLERANCE) - @object.send(:cos!, 3*Math::PI/2).should be_close(0.0, TOLERANCE) - @object.send(:cos!, 2*Math::PI).should be_close(1.0, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:cos!, Complex(3, 4)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/cosh.rb b/spec/ruby/library/cmath/math/shared/cosh.rb deleted file mode 100644 index 7cf561c9851ad7..00000000000000 --- a/spec/ruby/library/cmath/math/shared/cosh.rb +++ /dev/null @@ -1,28 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_cosh, shared: true do - it "returns the hyperbolic cosine of the passed argument" do - @object.send(:cosh, 0.0).should == 1.0 - @object.send(:cosh, -0.0).should == 1.0 - @object.send(:cosh, 1.5).should be_close(2.35240961524325, TOLERANCE) - @object.send(:cosh, -2.99).should be_close(9.96798496414416, TOLERANCE) - end - - it "returns the hyperbolic cosine for Complex numbers" do - @object.send(:cosh, Complex(0, CMath::PI)).should be_close(Complex(-1.0, 0.0), TOLERANCE) - @object.send(:cosh, Complex(3, 4)).should be_close(Complex(-6.58066304055116, -7.58155274274654), TOLERANCE) - end -end - -describe :complex_math_cosh_bang, shared: true do - it "returns the hyperbolic cosine of the passed argument" do - @object.send(:cosh!, 0.0).should == 1.0 - @object.send(:cosh!, -0.0).should == 1.0 - @object.send(:cosh!, 1.5).should be_close(2.35240961524325, TOLERANCE) - @object.send(:cosh!, -2.99).should be_close(9.96798496414416, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:cosh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/exp.rb b/spec/ruby/library/cmath/math/shared/exp.rb deleted file mode 100644 index 6715ac63d32ba5..00000000000000 --- a/spec/ruby/library/cmath/math/shared/exp.rb +++ /dev/null @@ -1,28 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_exp, shared: true do - it "returns the base-e exponential of the passed argument" do - @object.send(:exp, 0.0).should == 1.0 - @object.send(:exp, -0.0).should == 1.0 - @object.send(:exp, -1.8).should be_close(0.165298888221587, TOLERANCE) - @object.send(:exp, 1.25).should be_close(3.49034295746184, TOLERANCE) - end - - it "returns the base-e exponential for Complex numbers" do - @object.send(:exp, Complex(0, 0)).should == Complex(1.0, 0.0) - @object.send(:exp, Complex(1, 3)).should be_close(Complex(-2.69107861381979, 0.383603953541131), TOLERANCE) - end -end - -describe :complex_math_exp_bang, shared: true do - it "returns the base-e exponential of the passed argument" do - @object.send(:exp!, 0.0).should == 1.0 - @object.send(:exp!, -0.0).should == 1.0 - @object.send(:exp!, -1.8).should be_close(0.165298888221587, TOLERANCE) - @object.send(:exp!, 1.25).should be_close(3.49034295746184, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:exp!, Complex(1, 3)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/log.rb b/spec/ruby/library/cmath/math/shared/log.rb deleted file mode 100644 index 4b23e8c5f2f6dc..00000000000000 --- a/spec/ruby/library/cmath/math/shared/log.rb +++ /dev/null @@ -1,39 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_log, shared: true do - it "returns the natural logarithm of the passed argument" do - @object.send(:log, 0.0001).should be_close(-9.21034037197618, TOLERANCE) - @object.send(:log, 0.000000000001e-15).should be_close(-62.1697975108392, TOLERANCE) - @object.send(:log, 1).should be_close(0.0, TOLERANCE) - @object.send(:log, 10).should be_close( 2.30258509299405, TOLERANCE) - @object.send(:log, 10e15).should be_close(36.8413614879047, TOLERANCE) - end - - it "returns the natural logarithm for Complex numbers" do - @object.send(:log, Complex(3, 4)).should be_close(Complex(1.6094379124341, 0.927295218001612), TOLERANCE) - @object.send(:log, Complex(-3, 4)).should be_close(Complex(1.6094379124341, 2.21429743558818), TOLERANCE) - end - - it "returns the natural logarithm for negative numbers as a Complex number" do - @object.send(:log, -10).should be_close(Complex(2.30258509299405, 3.14159265358979), TOLERANCE) - @object.send(:log, -20).should be_close(Complex(2.99573227355399, 3.14159265358979), TOLERANCE) - end -end - -describe :complex_math_log_bang, shared: true do - it "returns the natural logarithm of the argument" do - @object.send(:log!, 0.0001).should be_close(-9.21034037197618, TOLERANCE) - @object.send(:log!, 0.000000000001e-15).should be_close(-62.1697975108392, TOLERANCE) - @object.send(:log!, 1).should be_close(0.0, TOLERANCE) - @object.send(:log!, 10).should be_close( 2.30258509299405, TOLERANCE) - @object.send(:log!, 10e15).should be_close(36.8413614879047, TOLERANCE) - end - - it "raises an Errno::EDOM if the argument is less than 0" do - -> { @object.send(:log!, -10) }.should raise_error(Errno::EDOM) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:log!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/log10.rb b/spec/ruby/library/cmath/math/shared/log10.rb deleted file mode 100644 index f49934d958224d..00000000000000 --- a/spec/ruby/library/cmath/math/shared/log10.rb +++ /dev/null @@ -1,41 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_log10, shared: true do - it "returns the base-10 logarithm of the passed argument" do - @object.send(:log10, 0.0001).should be_close(-4.0, TOLERANCE) - @object.send(:log10, 0.000000000001e-15).should be_close(-27.0, TOLERANCE) - @object.send(:log10, 1).should be_close(0.0, TOLERANCE) - @object.send(:log10, 10).should be_close(1.0, TOLERANCE) - @object.send(:log10, 10e15).should be_close(16.0, TOLERANCE) - end - - it "returns the base-10 logarithm for Complex numbers" do - @object.send(:log10, Complex(3, 4)).should be_close(Complex(0.698970004336019, 0.402719196273373), TOLERANCE) - @object.send(:log10, Complex(-3, 4)).should be_close(Complex(0.698970004336019, 0.961657157568468), TOLERANCE) - end - - # BUG: does not work correctly, because Math#log10 - # does not check for negative values - #it "returns the base-10 logarithm for negative numbers as a Complex number" do - # @object.send(:log10, -10).should be_close(Complex(2.30258509299405, 3.14159265358979), TOLERANCE) - # @object.send(:log10, -20).should be_close(Complex(2.99573227355399, 3.14159265358979), TOLERANCE) - #end -end - -describe :complex_math_log10_bang, shared: true do - it "returns the base-10 logarithm of the argument" do - @object.send(:log10!, 0.0001).should be_close(-4.0, TOLERANCE) - @object.send(:log10!, 0.000000000001e-15).should be_close(-27.0, TOLERANCE) - @object.send(:log10!, 1).should be_close(0.0, TOLERANCE) - @object.send(:log10!, 10).should be_close(1.0, TOLERANCE) - @object.send(:log10!, 10e15).should be_close(16.0, TOLERANCE) - end - - it "raises an Errno::EDOM when the passed argument is negative" do - -> { @object.send(:log10!, -10) }.should raise_error(Errno::EDOM) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:log10!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/sin.rb b/spec/ruby/library/cmath/math/shared/sin.rb deleted file mode 100644 index 1cb1b29cda94ea..00000000000000 --- a/spec/ruby/library/cmath/math/shared/sin.rb +++ /dev/null @@ -1,30 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_sin, shared: true do - it "returns the sine of the passed argument expressed in radians" do - @object.send(:sin, CMath::PI).should be_close(0.0, TOLERANCE) - @object.send(:sin, 0).should be_close(0.0, TOLERANCE) - @object.send(:sin, CMath::PI/2).should be_close(1.0, TOLERANCE) - @object.send(:sin, 3*Math::PI/2).should be_close(-1.0, TOLERANCE) - @object.send(:sin, 2*Math::PI).should be_close(0.0, TOLERANCE) - end - - it "returns the sine for Complex numbers" do - @object.send(:sin, Complex(0, CMath::PI)).should be_close(Complex(0.0, 11.5487393572577), TOLERANCE) - @object.send(:sin, Complex(3, 4)).should be_close(Complex(3.85373803791938, -27.0168132580039), TOLERANCE) - end -end - -describe :complex_math_sin_bang, shared: true do - it "returns the sine of the passed argument expressed in radians" do - @object.send(:sin!, CMath::PI).should be_close(0.0, TOLERANCE) - @object.send(:sin!, 0).should be_close(0.0, TOLERANCE) - @object.send(:sin!, CMath::PI/2).should be_close(1.0, TOLERANCE) - @object.send(:sin!, 3*Math::PI/2).should be_close(-1.0, TOLERANCE) - @object.send(:sin!, 2*Math::PI).should be_close(0.0, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:sin!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/sinh.rb b/spec/ruby/library/cmath/math/shared/sinh.rb deleted file mode 100644 index de80a376da0995..00000000000000 --- a/spec/ruby/library/cmath/math/shared/sinh.rb +++ /dev/null @@ -1,28 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_sinh, shared: true do - it "returns the hyperbolic sin of the argument" do - @object.send(:sinh, 0.0).should == 0.0 - @object.send(:sinh, -0.0).should == 0.0 - @object.send(:sinh, 1.5).should be_close(2.12927945509482, TOLERANCE) - @object.send(:sinh, -2.8).should be_close(-8.19191835423591, TOLERANCE) - end - - it "returns the hyperbolic sin for Complex numbers" do - @object.send(:sinh, Complex(0, CMath::PI)).should be_close(Complex(-0.0, 1.22464679914735e-16), TOLERANCE) - @object.send(:sinh, Complex(3, 4)).should be_close(Complex(-6.548120040911, -7.61923172032141), TOLERANCE) - end -end - -describe :complex_math_sinh_bang, shared: true do - it "returns the hyperbolic sin of the argument" do - @object.send(:sinh!, 0.0).should == 0.0 - @object.send(:sinh!, -0.0).should == 0.0 - @object.send(:sinh!, 1.5).should be_close(2.12927945509482, TOLERANCE) - @object.send(:sinh!, -2.8).should be_close(-8.19191835423591, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:sinh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/sqrt.rb b/spec/ruby/library/cmath/math/shared/sqrt.rb deleted file mode 100644 index 23b1ba48ffef75..00000000000000 --- a/spec/ruby/library/cmath/math/shared/sqrt.rb +++ /dev/null @@ -1,34 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_sqrt, shared: true do - it "returns the square root for positive numbers" do - @object.send(:sqrt, 4).should == 2 - @object.send(:sqrt, 19.36).should == 4.4 - end - - it "returns the square root for negative numbers" do - @object.send(:sqrt, -4).should == Complex(0, 2.0) - @object.send(:sqrt, -19.36).should == Complex(0, 4.4) - end - - it "returns the square root for Complex numbers" do - @object.send(:sqrt, Complex(4, 5)).should be_close(Complex(2.2806933416653, 1.09615788950152), TOLERANCE) - @object.send(:sqrt, Complex(4, -5)).should be_close(Complex(2.2806933416653, -1.09615788950152), TOLERANCE) - end -end - -describe :complex_math_sqrt_bang, shared: true do - it "returns the square root for positive numbers" do - @object.send(:sqrt!, 4).should == 2 - @object.send(:sqrt!, 19.36).should == 4.4 - end - - it "raises Errno::EDOM when the passed argument is negative" do - -> { @object.send(:sqrt!, -4) }.should raise_error(Errno::EDOM) - -> { @object.send(:sqrt!, -19.36) }.should raise_error(Errno::EDOM) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:sqrt!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/tan.rb b/spec/ruby/library/cmath/math/shared/tan.rb deleted file mode 100644 index 9022c84fc9b384..00000000000000 --- a/spec/ruby/library/cmath/math/shared/tan.rb +++ /dev/null @@ -1,28 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_tan, shared: true do - it "returns the tangent of the argument" do - @object.send(:tan, 0.0).should == 0.0 - @object.send(:tan, -0.0).should == -0.0 - @object.send(:tan, 4.22).should be_close(1.86406937682395, TOLERANCE) - @object.send(:tan, -9.65).should be_close(-0.229109052606441, TOLERANCE) - end - - it "returns the tangent for Complex numbers" do - @object.send(:tan, Complex(0, CMath::PI)).should be_close(Complex(0.0, 0.99627207622075), TOLERANCE) - @object.send(:tan, Complex(3, 4)).should be_close(Complex(-0.000187346204629452, 0.999355987381473), TOLERANCE) - end -end - -describe :complex_math_tan_bang, shared: true do - it "returns the tangent of the argument" do - @object.send(:tan!, 0.0).should == 0.0 - @object.send(:tan!, -0.0).should == -0.0 - @object.send(:tan!, 4.22).should be_close(1.86406937682395, TOLERANCE) - @object.send(:tan!, -9.65).should be_close(-0.229109052606441, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:tan!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/shared/tanh.rb b/spec/ruby/library/cmath/math/shared/tanh.rb deleted file mode 100644 index f2c9a5abb1f356..00000000000000 --- a/spec/ruby/library/cmath/math/shared/tanh.rb +++ /dev/null @@ -1,32 +0,0 @@ -require_relative '../fixtures/classes' - -describe :complex_math_tanh, shared: true do - it "returns the hyperbolic tangent of the argument" do - @object.send(:tanh, 0.0).should == 0.0 - @object.send(:tanh, -0.0).should == -0.0 - @object.send(:tanh, infinity_value).should == 1.0 - @object.send(:tanh, -infinity_value).should == -1.0 - @object.send(:tanh, 2.5).should be_close(0.98661429815143, TOLERANCE) - @object.send(:tanh, -4.892).should be_close(-0.999887314427707, TOLERANCE) - end - - it "returns the hyperbolic tangent for Complex numbers" do - @object.send(:tanh, Complex(0, CMath::PI)).should be_close(Complex(0.0, -1.22464679914735e-16), TOLERANCE) - @object.send(:tanh, Complex(3, 4)).should be_close(Complex(1.00070953606723, 0.00490825806749599), TOLERANCE) - end -end - -describe :complex_math_tanh_bang, shared: true do - it "returns the hyperbolic tangent of the argument" do - @object.send(:tanh!, 0.0).should == 0.0 - @object.send(:tanh!, -0.0).should == -0.0 - @object.send(:tanh!, infinity_value).should == 1.0 - @object.send(:tanh!, -infinity_value).should == -1.0 - @object.send(:tanh!, 2.5).should be_close(0.98661429815143, TOLERANCE) - @object.send(:tanh!, -4.892).should be_close(-0.999887314427707, TOLERANCE) - end - - it "raises a TypeError when passed a Complex number" do - -> { @object.send(:tanh!, Complex(4, 5)) }.should raise_error(TypeError) - end -end diff --git a/spec/ruby/library/cmath/math/sin_spec.rb b/spec/ruby/library/cmath/math/sin_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/sin_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/sinh_spec.rb b/spec/ruby/library/cmath/math/sinh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/sinh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/sqrt_spec.rb b/spec/ruby/library/cmath/math/sqrt_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/sqrt_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/tan_spec.rb b/spec/ruby/library/cmath/math/tan_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/tan_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/cmath/math/tanh_spec.rb b/spec/ruby/library/cmath/math/tanh_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/cmath/math/tanh_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/erb/new_spec.rb b/spec/ruby/library/erb/new_spec.rb index f18e25939ef594..4d7f7bf36a3346 100644 --- a/spec/ruby/library/erb/new_spec.rb +++ b/spec/ruby/library/erb/new_spec.rb @@ -138,4 +138,20 @@ ERB.new(@eruby_str).result ->{ ERB.new("<%= list %>").result }.should raise_error(NameError) end + + describe "warning about arguments" do + ruby_version_is "3.1" do + it "warns when passed safe_level and later arguments" do + -> { + ERB.new(@eruby_str, nil, '%') + }.should complain(/warning: Passing safe_level with the 2nd argument of ERB.new is deprecated. Do not use it, and specify other arguments as keyword arguments./) + end + + it "does not warn when passed arguments as keyword argument" do + -> { + ERB.new(@eruby_str, trim_mode: '%') + }.should_not complain(/warning: Passing safe_level with the 2nd argument of ERB.new is deprecated. Do not use it, and specify other arguments as keyword arguments./) + end + end + end end diff --git a/spec/ruby/library/scanf/io/block_scanf_spec.rb b/spec/ruby/library/scanf/io/block_scanf_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/scanf/io/block_scanf_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/scanf/io/fixtures/date.txt b/spec/ruby/library/scanf/io/fixtures/date.txt deleted file mode 100644 index a1bd635c0c3af2..00000000000000 --- a/spec/ruby/library/scanf/io/fixtures/date.txt +++ /dev/null @@ -1,4 +0,0 @@ -Beethoven 1770 -Bach 1685 -Handel 1685 - diff --git a/spec/ruby/library/scanf/io/fixtures/helloworld.txt b/spec/ruby/library/scanf/io/fixtures/helloworld.txt deleted file mode 100644 index 3b18e512dba79e..00000000000000 --- a/spec/ruby/library/scanf/io/fixtures/helloworld.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/spec/ruby/library/scanf/io/scanf_spec.rb b/spec/ruby/library/scanf/io/scanf_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/scanf/io/scanf_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/scanf/io/shared/block_scanf.rb b/spec/ruby/library/scanf/io/shared/block_scanf.rb deleted file mode 100644 index d938f4373460b3..00000000000000 --- a/spec/ruby/library/scanf/io/shared/block_scanf.rb +++ /dev/null @@ -1,28 +0,0 @@ -require 'scanf' - -describe :scanf_io_block_scanf, shared: true do - before :each do - @data = File.open(fixture(__FILE__, 'date.txt'), 'rb') - end - - after :each do - @data.close unless @data.closed? - end - - it "passes each match to the block as an array" do - res = @data.send(@method, "%s%d") { |name, year| "#{name} was born in #{year}." } - res.should == ["Beethoven was born in 1770.", "Bach was born in 1685.", "Handel was born in 1685."] - end - - it "keeps scanning the input and cycling back to the beginning of the input string" do - a = [] - @data.send(@method, "%s"){|w| a << w} - a.should == [["Beethoven"], ["1770"], ["Bach"], ["1685"], ["Handel"], ["1685"]] - end - - it "returns an empty array when a wrong specifier is passed" do - a = [] - @data.send(@method, "%z"){|w| a << w} - a.empty?.should be_true - end -end diff --git a/spec/ruby/library/scanf/string/block_scanf_spec.rb b/spec/ruby/library/scanf/string/block_scanf_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/scanf/string/block_scanf_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/scanf/string/scanf_spec.rb b/spec/ruby/library/scanf/string/scanf_spec.rb deleted file mode 100644 index e15f14f95f5d6f..00000000000000 --- a/spec/ruby/library/scanf/string/scanf_spec.rb +++ /dev/null @@ -1 +0,0 @@ -require_relative '../../../spec_helper' diff --git a/spec/ruby/library/scanf/string/shared/block_scanf.rb b/spec/ruby/library/scanf/string/shared/block_scanf.rb deleted file mode 100644 index 25ab3f442a88e7..00000000000000 --- a/spec/ruby/library/scanf/string/shared/block_scanf.rb +++ /dev/null @@ -1,25 +0,0 @@ -require 'scanf' - -describe :scanf_string_block_scanf, shared: true do - it "passes each match to the block as an array" do - a = [] - "hello world".send(@method, "%s%s"){|w| a << w} - a.should == [["hello", "world"]] - end - - it "keeps scanning the input and cycling back to the beginning of the input string" do - a = [] - "hello world".send(@method, "%s"){|w| a << w} - a.should == [["hello"], ["world"]] - - string = "123 abc 456 def 789 ghi" - s = string.send(@method, "%d%s"){|num,str| [num * 2, str.upcase]} - s.should == [[246, "ABC"], [912, "DEF"], [1578, "GHI"]] - end - - it "returns an empty array when a wrong specifier is passed" do - a = [] - "hello world".send(@method, "%z"){|w| a << w} - a.empty?.should be_true - end -end diff --git a/spec/ruby/library/stringio/putc_spec.rb b/spec/ruby/library/stringio/putc_spec.rb index 223b3523e52735..1ce53b7ef20ed4 100644 --- a/spec/ruby/library/stringio/putc_spec.rb +++ b/spec/ruby/library/stringio/putc_spec.rb @@ -35,6 +35,21 @@ @io.putc("t") @io.pos.should == 3 end + + it "handles concurrent writes correctly" do + @io = StringIO.new + n = 8 + go = false + threads = n.times.map { |i| + Thread.new { + Thread.pass until go + @io.putc i.to_s + } + } + go = true + threads.each(&:join) + @io.string.size.should == n + end end describe "StringIO#putc when passed [Object]" do diff --git a/spec/ruby/library/stringio/puts_spec.rb b/spec/ruby/library/stringio/puts_spec.rb index a9f289a5a56398..9c890262dd996f 100644 --- a/spec/ruby/library/stringio/puts_spec.rb +++ b/spec/ruby/library/stringio/puts_spec.rb @@ -101,6 +101,20 @@ @io.puts '' @io.string.should == "\n" end + + it "handles concurrent writes correctly" do + n = 8 + go = false + threads = n.times.map { |i| + Thread.new { + Thread.pass until go + @io.puts i + } + } + go = true + threads.each(&:join) + @io.string.size.should == n.times.map { |i| "#{i}\n" }.join.size + end end describe "StringIO#puts when passed no arguments" do diff --git a/spec/ruby/library/stringio/shared/write.rb b/spec/ruby/library/stringio/shared/write.rb index 0eb71466e36032..c5a0f8f513d68a 100644 --- a/spec/ruby/library/stringio/shared/write.rb +++ b/spec/ruby/library/stringio/shared/write.rb @@ -45,6 +45,21 @@ @io.pos.should eql(4) end + it "handles concurrent writes correctly" do + @io = StringIO.new + n = 8 + go = false + threads = n.times.map { |i| + Thread.new { + Thread.pass until go + @io.write i.to_s + } + } + go = true + threads.each(&:join) + @io.string.size.should == n.times.map(&:to_s).join.size + end + ruby_version_is ""..."3.0" do it "does not taint self when the passed argument is tainted" do @io.send(@method, "test".taint) diff --git a/spec/ruby/optional/capi/class_spec.rb b/spec/ruby/optional/capi/class_spec.rb index abeba0f741b622..66af3812430c96 100644 --- a/spec/ruby/optional/capi/class_spec.rb +++ b/spec/ruby/optional/capi/class_spec.rb @@ -323,6 +323,15 @@ @s.rb_define_class("ClassSpecDefineClass4", nil) }.should raise_error(ArgumentError) end + + it "allows arbitrary names, including constant names not valid in Ruby" do + cls = @s.rb_define_class("_INVALID_CLASS", CApiClassSpecs::Super) + cls.name.should == "_INVALID_CLASS" + + -> { + Object.const_get(cls.name) + }.should raise_error(NameError, /wrong constant name/) + end end describe "rb_define_class_under" do @@ -367,6 +376,15 @@ it "raises a TypeError if class is defined and its superclass mismatches the given one" do -> { @s.rb_define_class_under(CApiClassSpecs, "Sub", Object) }.should raise_error(TypeError) end + + it "allows arbitrary names, including constant names not valid in Ruby" do + cls = @s.rb_define_class_under(CApiClassSpecs, "_INVALID_CLASS", CApiClassSpecs::Super) + cls.name.should == "CApiClassSpecs::_INVALID_CLASS" + + -> { + CApiClassSpecs.const_get(cls.name) + }.should raise_error(NameError, /wrong constant name/) + end end describe "rb_define_class_id_under" do @@ -394,6 +412,15 @@ it "raises a TypeError if class is defined and its superclass mismatches the given one" do -> { @s.rb_define_class_id_under(CApiClassSpecs, :Sub, Object) }.should raise_error(TypeError) end + + it "allows arbitrary names, including constant names not valid in Ruby" do + cls = @s.rb_define_class_id_under(CApiClassSpecs, :_INVALID_CLASS2, CApiClassSpecs::Super) + cls.name.should == "CApiClassSpecs::_INVALID_CLASS2" + + -> { + CApiClassSpecs.const_get(cls.name) + }.should raise_error(NameError, /wrong constant name/) + end end describe "rb_define_class_variable" do diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index 865fc484be2f19..a0136530f2812e 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -71,11 +71,9 @@ static VALUE encoding_spec_rb_default_external_encoding(VALUE self) { return rb_str_new2(enc->name); } -#ifdef RUBY_VERSION_IS_2_6 static VALUE encoding_spec_rb_enc_alias(VALUE self, VALUE alias, VALUE orig) { return INT2NUM(rb_enc_alias(RSTRING_PTR(alias), RSTRING_PTR(orig))); } -#endif static VALUE encoding_spec_rb_enc_associate(VALUE self, VALUE obj, VALUE enc) { return rb_enc_associate(obj, NIL_P(enc) ? NULL : rb_enc_find(RSTRING_PTR(enc))); @@ -327,16 +325,9 @@ void Init_encoding_spec(void) { rb_define_method(cls, "rb_locale_encindex", encoding_spec_rb_locale_encindex, 0); rb_define_method(cls, "rb_filesystem_encoding", encoding_spec_rb_filesystem_encoding, 0); rb_define_method(cls, "rb_filesystem_encindex", encoding_spec_rb_filesystem_encindex, 0); - rb_define_method(cls, "rb_default_internal_encoding", - encoding_spec_rb_default_internal_encoding, 0); - - rb_define_method(cls, "rb_default_external_encoding", - encoding_spec_rb_default_external_encoding, 0); - -#ifdef RUBY_VERSION_IS_2_6 + rb_define_method(cls, "rb_default_internal_encoding", encoding_spec_rb_default_internal_encoding, 0); + rb_define_method(cls, "rb_default_external_encoding", encoding_spec_rb_default_external_encoding, 0); rb_define_method(cls, "rb_enc_alias", encoding_spec_rb_enc_alias, 2); -#endif - rb_define_method(cls, "MBCLEN_CHARFOUND_P", encoding_spec_MBCLEN_CHARFOUND_P, 1); rb_define_method(cls, "rb_enc_associate", encoding_spec_rb_enc_associate, 2); rb_define_method(cls, "rb_enc_associate_index", encoding_spec_rb_enc_associate_index, 2); diff --git a/spec/ruby/optional/capi/ext/gc_spec.c b/spec/ruby/optional/capi/ext/gc_spec.c index 7dc9c347c76f43..082e4af59ce25b 100644 --- a/spec/ruby/optional/capi/ext/gc_spec.c +++ b/spec/ruby/optional/capi/ext/gc_spec.c @@ -7,6 +7,9 @@ extern "C" { VALUE registered_tagged_value; VALUE registered_reference_value; +VALUE registered_before_rb_gc_register_address; +VALUE registered_before_rb_global_variable; +VALUE rb_gc_register_address_outside_init; static VALUE registered_tagged_address(VALUE self) { return registered_tagged_value; @@ -16,6 +19,25 @@ static VALUE registered_reference_address(VALUE self) { return registered_reference_value; } +static VALUE get_registered_before_rb_gc_register_address(VALUE self) { + return registered_before_rb_gc_register_address; +} + +static VALUE get_registered_before_rb_global_variable(VALUE self) { + return registered_before_rb_global_variable; +} + +static VALUE gc_spec_rb_gc_register_address(VALUE self) { + rb_gc_register_address_outside_init = rb_str_new_cstr("rb_gc_register_address() outside Init_"); + rb_gc_register_address(&rb_gc_register_address_outside_init); + return rb_gc_register_address_outside_init; +} + +static VALUE gc_spec_rb_gc_unregister_address(VALUE self) { + rb_gc_unregister_address(&rb_gc_register_address_outside_init); + return Qnil; +} + static VALUE gc_spec_rb_gc_enable(VALUE self) { return rb_gc_enable(); } @@ -50,9 +72,18 @@ void Init_gc_spec(void) { rb_gc_register_address(®istered_tagged_value); rb_gc_register_address(®istered_reference_value); + rb_gc_register_address(®istered_before_rb_gc_register_address); + rb_global_variable(®istered_before_rb_global_variable); + + registered_before_rb_gc_register_address = rb_str_new_cstr("registered before rb_gc_register_address()"); + registered_before_rb_global_variable = rb_str_new_cstr("registered before rb_global_variable()"); rb_define_method(cls, "registered_tagged_address", registered_tagged_address, 0); rb_define_method(cls, "registered_reference_address", registered_reference_address, 0); + rb_define_method(cls, "registered_before_rb_gc_register_address", get_registered_before_rb_gc_register_address, 0); + rb_define_method(cls, "registered_before_rb_global_variable", get_registered_before_rb_global_variable, 0); + rb_define_method(cls, "rb_gc_register_address", gc_spec_rb_gc_register_address, 0); + rb_define_method(cls, "rb_gc_unregister_address", gc_spec_rb_gc_unregister_address, 0); rb_define_method(cls, "rb_gc_enable", gc_spec_rb_gc_enable, 0); rb_define_method(cls, "rb_gc_disable", gc_spec_rb_gc_disable, 0); rb_define_method(cls, "rb_gc", gc_spec_rb_gc, 0); diff --git a/spec/ruby/optional/capi/ext/globals_spec.c b/spec/ruby/optional/capi/ext/globals_spec.c index 28a9633f98b67a..20dea1a05adaf3 100644 --- a/spec/ruby/optional/capi/ext/globals_spec.c +++ b/spec/ruby/optional/capi/ext/globals_spec.c @@ -20,6 +20,16 @@ static VALUE sb_define_hooked_variable(VALUE self, VALUE var_name) { return Qnil; } +static VALUE sb_define_hooked_variable_default_accessors(VALUE self, VALUE var_name) { + rb_define_hooked_variable(StringValuePtr(var_name), &g_hooked_var, (rb_gvar_getter_t*) NULL, (rb_gvar_setter_t*) NULL); + return Qnil; +} + +static VALUE sb_define_hooked_variable_null_var(VALUE self, VALUE var_name) { + rb_define_hooked_variable(StringValuePtr(var_name), NULL, (rb_gvar_getter_t*) NULL, (rb_gvar_setter_t*) NULL); + return Qnil; +} + VALUE g_ro_var; static VALUE sb_define_readonly_variable(VALUE self, VALUE var_name, VALUE val) { @@ -40,6 +50,26 @@ static VALUE sb_define_variable(VALUE self, VALUE var_name, VALUE val) { return Qnil; } +long virtual_var_storage; + +VALUE incrementing_getter(ID id, VALUE *data) { + return LONG2FIX(virtual_var_storage++); +} + +void incrementing_setter(VALUE val, ID id, VALUE *data) { + virtual_var_storage = FIX2LONG(val); +} + +static VALUE sb_define_virtual_variable_default_accessors(VALUE self, VALUE name) { + rb_define_virtual_variable(StringValuePtr(name), (rb_gvar_getter_t*) NULL, (rb_gvar_setter_t*) NULL); + return Qnil; +} + +static VALUE sb_define_virtual_variable_incrementing_accessors(VALUE self, VALUE name) { + rb_define_virtual_variable(StringValuePtr(name), incrementing_getter, incrementing_setter); + return Qnil; +} + static VALUE sb_f_global_variables(VALUE self) { return rb_f_global_variables(); } @@ -101,10 +131,14 @@ void Init_globals_spec(void) { VALUE cls = rb_define_class("CApiGlobalSpecs", rb_cObject); g_hooked_var = Qnil; rb_define_method(cls, "rb_define_hooked_variable_2x", sb_define_hooked_variable, 1); + rb_define_method(cls, "rb_define_hooked_variable_default_accessors", sb_define_hooked_variable_default_accessors, 1); + rb_define_method(cls, "rb_define_hooked_variable_null_var", sb_define_hooked_variable_null_var, 1); g_ro_var = Qnil; rb_define_method(cls, "rb_define_readonly_variable", sb_define_readonly_variable, 2); g_var = Qnil; rb_define_method(cls, "rb_define_variable", sb_define_variable, 2); + rb_define_method(cls, "rb_define_virtual_variable_default_accessors", sb_define_virtual_variable_default_accessors, 1); + rb_define_method(cls, "rb_define_virtual_variable_incrementing_accessors", sb_define_virtual_variable_incrementing_accessors, 1); rb_define_method(cls, "sb_get_global_value", sb_get_global_value, 0); rb_define_method(cls, "rb_f_global_variables", sb_f_global_variables, 0); rb_define_method(cls, "sb_gv_get", sb_gv_get, 1); diff --git a/spec/ruby/optional/capi/ext/rubyspec.h b/spec/ruby/optional/capi/ext/rubyspec.h index 426b1ddc04b3b7..245669d2007b1c 100644 --- a/spec/ruby/optional/capi/ext/rubyspec.h +++ b/spec/ruby/optional/capi/ext/rubyspec.h @@ -34,34 +34,4 @@ #define RUBY_VERSION_IS_3_0 #endif -#if RUBY_VERSION_MAJOR > 2 || (RUBY_VERSION_MAJOR == 2 && RUBY_VERSION_MINOR >= 7) -#define RUBY_VERSION_IS_2_7 -#endif - -#if RUBY_VERSION_MAJOR > 2 || (RUBY_VERSION_MAJOR == 2 && RUBY_VERSION_MINOR >= 6) -#define RUBY_VERSION_IS_2_6 -#endif - -#if defined(__cplusplus) && !defined(RUBY_VERSION_IS_2_7) -/* Ruby < 2.7 needs this to let these function with callbacks and compile in C++ code */ -#define rb_define_method(mod, name, func, argc) rb_define_method(mod, name, RUBY_METHOD_FUNC(func), argc) -#define rb_define_protected_method(mod, name, func, argc) rb_define_protected_method(mod, name, RUBY_METHOD_FUNC(func), argc) -#define rb_define_private_method(mod, name, func, argc) rb_define_private_method(mod, name, RUBY_METHOD_FUNC(func), argc) -#define rb_define_singleton_method(mod, name, func, argc) rb_define_singleton_method(mod, name, RUBY_METHOD_FUNC(func), argc) -#define rb_define_module_function(mod, name, func, argc) rb_define_module_function(mod, name, RUBY_METHOD_FUNC(func), argc) -#define rb_define_global_function(name, func, argc) rb_define_global_function(name, RUBY_METHOD_FUNC(func), argc) -#define rb_hash_foreach(hash, func, farg) rb_hash_foreach(hash, (int (*)(...))func, farg) -#define st_foreach(tab, func, arg) st_foreach(tab, (int (*)(...))func, arg) -#define rb_block_call(object, name, args_count, args, block_call_func, data) rb_block_call(object, name, args_count, args, RUBY_METHOD_FUNC(block_call_func), data) -#define rb_ensure(b_proc, data1, e_proc, data2) rb_ensure(RUBY_METHOD_FUNC(b_proc), data1, RUBY_METHOD_FUNC(e_proc), data2) -#define rb_rescue(b_proc, data1, e_proc, data2) rb_rescue(RUBY_METHOD_FUNC(b_proc), data1, RUBY_METHOD_FUNC(e_proc), data2) -#define rb_rescue2(b_proc, data1, e_proc, data2, ...) rb_rescue2(RUBY_METHOD_FUNC(b_proc), data1, RUBY_METHOD_FUNC(e_proc), data2, __VA_ARGS__) -#define rb_catch(tag, func, data) rb_catch(tag, RUBY_METHOD_FUNC(func), data) -#define rb_catch_obj(tag, func, data) rb_catch_obj(tag, RUBY_METHOD_FUNC(func), data) -#define rb_proc_new(fn, arg) rb_proc_new(RUBY_METHOD_FUNC(fn), arg) -#define rb_fiber_new(fn, arg) rb_fiber_new(RUBY_METHOD_FUNC(fn), arg) -#define rb_thread_create(fn, arg) rb_thread_create(RUBY_METHOD_FUNC(fn), arg) -#define rb_define_hooked_variable(name, var, getter, setter) rb_define_hooked_variable(name, var, RUBY_METHOD_FUNC(getter), (void (*)(...))setter) -#endif - #endif diff --git a/spec/ruby/optional/capi/gc_spec.rb b/spec/ruby/optional/capi/gc_spec.rb index 23e2b7c9ab9116..d76ea7394f304e 100644 --- a/spec/ruby/optional/capi/gc_spec.rb +++ b/spec/ruby/optional/capi/gc_spec.rb @@ -7,15 +7,33 @@ @f = CApiGCSpecs.new end - it "correctly gets the value from a registered address" do - @f.registered_tagged_address.should == 10 - @f.registered_tagged_address.should equal(@f.registered_tagged_address) - @f.registered_reference_address.should == "Globally registered data" - @f.registered_reference_address.should equal(@f.registered_reference_address) + describe "rb_gc_register_address" do + it "correctly gets the value from a registered address" do + @f.registered_tagged_address.should == 10 + @f.registered_tagged_address.should equal(@f.registered_tagged_address) + @f.registered_reference_address.should == "Globally registered data" + @f.registered_reference_address.should equal(@f.registered_reference_address) + end + + it "keeps the value alive even if the value is assigned after rb_gc_register_address() is called" do + GC.start + @f.registered_before_rb_gc_register_address.should == "registered before rb_gc_register_address()" + end + + it "can be called outside Init_" do + @f.rb_gc_register_address.should == "rb_gc_register_address() outside Init_" + @f.rb_gc_unregister_address + end end - describe "rb_gc_enable" do + describe "rb_global_variable" do + it "keeps the value alive even if the value is assigned after rb_global_variable() is called" do + GC.start + @f.registered_before_rb_global_variable.should == "registered before rb_global_variable()" + end + end + describe "rb_gc_enable" do after do GC.enable end diff --git a/spec/ruby/optional/capi/globals_spec.rb b/spec/ruby/optional/capi/globals_spec.rb index cc6f6ef3a83841..48677620bcf8a2 100644 --- a/spec/ruby/optional/capi/globals_spec.rb +++ b/spec/ruby/optional/capi/globals_spec.rb @@ -9,7 +9,7 @@ end it "correctly gets global values" do - @f.sb_gv_get("$BLAH").should == nil + suppress_warning { @f.sb_gv_get("$BLAH") }.should == nil @f.sb_gv_get("$\\").should == nil @f.sb_gv_get("\\").should == nil # rb_gv_get should change \ to $\ end @@ -21,7 +21,7 @@ end it "correctly sets global values" do - @f.sb_gv_get("$BLAH").should == nil + suppress_warning { @f.sb_gv_get("$BLAH") }.should == nil @f.sb_gv_set("$BLAH", 10) begin @f.sb_gv_get("$BLAH").should == 10 @@ -42,6 +42,10 @@ end it "rb_define_readonly_variable should define a new readonly global variable" do + # Check the gvar doesn't exist and ensure rb_gv_get doesn't implicitly declare the gvar, + # otherwise the rb_define_readonly_variable call will conflict. + suppress_warning { @f.sb_gv_get("ro_gvar") } .should == nil + @f.rb_define_readonly_variable("ro_gvar", 15) $ro_gvar.should == 15 -> { $ro_gvar = 10 }.should raise_error(NameError) @@ -53,6 +57,52 @@ $hooked_gvar.should == 4 end + it "rb_define_hooked_variable should use default accessors if NULL ones are supplied" do + @f.rb_define_hooked_variable_default_accessors("$hooked_gvar_default_accessors") + $hooked_gvar_default_accessors = 10 + $hooked_gvar_default_accessors.should == 10 + end + + it "rb_define_hooked_variable with default accessors should return nil for NULL variables" do + @f.rb_define_hooked_variable_null_var("$hooked_gvar_null_value") + $hooked_gvar_null_value.should == nil + end + + describe "rb_define_virtual_variable" do + describe "with default accessors" do + before :all do + @f.rb_define_virtual_variable_default_accessors("$virtual_variable_default_accessors") + end + + it "is read-only" do + -> { $virtual_variable_default_accessors = 10 }.should raise_error(NameError, /read-only/) + end + + it "returns false with the default getter" do + $virtual_variable_default_accessors.should == false + $virtual_variable_default_accessors.should == false + end + end + + describe "with supplied accessors" do + before :all do + @f.rb_define_virtual_variable_incrementing_accessors("$virtual_variable_incrementing_accessors") + end + + it "returns a dynamically changing value" do + $virtual_variable_incrementing_accessors = 20 + $virtual_variable_incrementing_accessors.should == 20 + $virtual_variable_incrementing_accessors.should == 21 + $virtual_variable_incrementing_accessors.should == 22 + + $virtual_variable_incrementing_accessors = 100 + $virtual_variable_incrementing_accessors.should == 100 + $virtual_variable_incrementing_accessors.should == 101 + $virtual_variable_incrementing_accessors.should == 102 + end + end + end + describe "rb_fs" do before :each do @field_separator = $; diff --git a/spec/ruby/shared/kernel/complex.rb b/spec/ruby/shared/kernel/complex.rb new file mode 100644 index 00000000000000..98ee0b2b3fbea7 --- /dev/null +++ b/spec/ruby/shared/kernel/complex.rb @@ -0,0 +1,133 @@ +# Specs shared by Kernel#Complex() and String#to_c() +describe :kernel_complex, shared: true do + + it "returns a Complex object" do + @object.send(@method, '9').should be_an_instance_of(Complex) + end + + it "understands integers" do + @object.send(@method, '20').should == Complex(20) + end + + it "understands negative integers" do + @object.send(@method, '-3').should == Complex(-3) + end + + it "understands fractions (numerator/denominator) for the real part" do + @object.send(@method, '2/3').should == Complex(Rational(2, 3)) + end + + it "understands fractions (numerator/denominator) for the imaginary part" do + @object.send(@method, '4+2/3i').should == Complex(4, Rational(2, 3)) + end + + it "understands negative fractions (-numerator/denominator) for the real part" do + @object.send(@method, '-2/3').should == Complex(Rational(-2, 3)) + end + + it "understands negative fractions (-numerator/denominator) for the imaginary part" do + @object.send(@method, '7-2/3i').should == Complex(7, Rational(-2, 3)) + end + + it "understands floats (a.b) for the real part" do + @object.send(@method, '2.3').should == Complex(2.3) + end + + it "understands floats (a.b) for the imaginary part" do + @object.send(@method, '4+2.3i').should == Complex(4, 2.3) + end + + it "understands negative floats (-a.b) for the real part" do + @object.send(@method, '-2.33').should == Complex(-2.33) + end + + it "understands negative floats (-a.b) for the imaginary part" do + @object.send(@method, '7-28.771i').should == Complex(7, -28.771) + end + + it "understands an integer followed by 'i' to mean that integer is the imaginary part" do + @object.send(@method, '35i').should == Complex(0,35) + end + + it "understands a negative integer followed by 'i' to mean that negative integer is the imaginary part" do + @object.send(@method, '-29i').should == Complex(0,-29) + end + + it "understands an 'i' by itself as denoting a complex number with an imaginary part of 1" do + @object.send(@method, 'i').should == Complex(0,1) + end + + it "understands a '-i' by itself as denoting a complex number with an imaginary part of -1" do + @object.send(@method, '-i').should == Complex(0,-1) + end + + it "understands 'a+bi' to mean a complex number with 'a' as the real part, 'b' as the imaginary" do + @object.send(@method, '79+4i').should == Complex(79,4) + end + + it "understands 'a-bi' to mean a complex number with 'a' as the real part, '-b' as the imaginary" do + @object.send(@method, '79-4i').should == Complex(79,-4) + end + + it "understands 'a+i' to mean a complex number with 'a' as the real part, 1i as the imaginary" do + @object.send(@method, '79+i').should == Complex(79, 1) + end + + it "understands 'a-i' to mean a complex number with 'a' as the real part, -1i as the imaginary" do + @object.send(@method, '79-i').should == Complex(79, -1) + end + + it "understands i, I, j, and J imaginary units" do + @object.send(@method, '79+4i').should == Complex(79, 4) + @object.send(@method, '79+4I').should == Complex(79, 4) + @object.send(@method, '79+4j').should == Complex(79, 4) + @object.send(@method, '79+4J').should == Complex(79, 4) + end + + it "understands scientific notation for the real part" do + @object.send(@method, '2e3+4i').should == Complex(2e3,4) + end + + it "understands negative scientific notation for the real part" do + @object.send(@method, '-2e3+4i').should == Complex(-2e3,4) + end + + it "understands scientific notation for the imaginary part" do + @object.send(@method, '4+2e3i').should == Complex(4, 2e3) + end + + it "understands negative scientific notation for the imaginary part" do + @object.send(@method, '4-2e3i').should == Complex(4, -2e3) + end + + it "understands scientific notation for the real and imaginary part in the same String" do + @object.send(@method, '2e3+2e4i').should == Complex(2e3,2e4) + end + + it "understands negative scientific notation for the real and imaginary part in the same String" do + @object.send(@method, '-2e3-2e4i').should == Complex(-2e3,-2e4) + end + + it "understands scientific notation with e and E" do + @object.send(@method, '2e3+2e4i').should == Complex(2e3, 2e4) + @object.send(@method, '2E3+2E4i').should == Complex(2e3, 2e4) + end + + it "understands 'm@a' to mean a complex number in polar form with 'm' as the modulus, 'a' as the argument" do + @object.send(@method, '79@4').should == Complex.polar(79, 4) + @object.send(@method, '-79@4').should == Complex.polar(-79, 4) + @object.send(@method, '79@-4').should == Complex.polar(79, -4) + end + + it "ignores leading whitespaces" do + @object.send(@method, ' 79+4i').should == Complex(79, 4) + end + + it "ignores trailing whitespaces" do + @object.send(@method, '79+4i ').should == Complex(79, 4) + end + + it "understands _" do + @object.send(@method, '7_9+4_0i').should == Complex(79, 40) + end +end From b14f133054bb04e1187f9897fa546faa433d37e7 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 7 Nov 2022 14:35:21 -0500 Subject: [PATCH 020/104] [DOC] Improve building_ruby.md --- doc/contributing/building_ruby.md | 55 +++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/doc/contributing/building_ruby.md b/doc/contributing/building_ruby.md index ace5fbed37c5e8..4fbb4bd2ccc446 100644 --- a/doc/contributing/building_ruby.md +++ b/doc/contributing/building_ruby.md @@ -10,7 +10,7 @@ * gperf - 3.0.3 or later * ruby - 2.7 or later -2. Install optional, recommended dependencies: +1. Install optional, recommended dependencies: * OpenSSL/LibreSSL * readline/editline (libedit) @@ -20,29 +20,64 @@ * libexecinfo (FreeBSD) * rustc - 1.58.1 or later (if you wish to build [YJIT](/doc/yjit/yjit.md)) -3. Checkout the CRuby source code: +1. Checkout the CRuby source code: ``` git clone https://github.com/ruby/ruby.git ``` -4. Generate the configuration files and build. It's generally advisable to use a build directory: +1. Generate the configure file: ``` ./autogen.sh - mkdir build && cd build # it's good practice to build outside of source dir - mkdir ~/.rubies # we will install to .rubies/ruby-master in our home dir + ``` + +1. Create a `build` directory outside of the source directory: + + ``` + mkdir build && cd build + ``` + + While it's not necessary to build in a separate directory, it's good practice to do so. + +1. We'll install Ruby in `~/.rubies/ruby-master`, so create the directory: + + ``` + mkdir ~/.rubies + ``` + +1. Run configure: + + ``` ../configure --prefix="${HOME}/.rubies/ruby-master" - make install ``` -5. Optional: If you are frequently building Ruby, disabling documentation will reduce the time it takes to `make`: + - If you are frequently building Ruby, add the `--disable-install-doc` flag to not build documentation which will speed up the build process. - ``` shell - ../configure --prefix="${HOME}/.rubies/ruby-master" --disable-install-doc +1. Build Ruby: + + ``` + make install ``` -6. [Run tests](testing_ruby.md) to confirm your build succeeded + - If you're on macOS and installed \OpenSSL through Homebrew, you may encounter failure to build \OpenSSL that look like this: + + ``` + openssl: + Could not be configured. It will not be installed. + ruby/ext/openssl/extconf.rb: OpenSSL library could not be found. You might want to use --with-openssl-dir= option to specify the prefix where OpenSSL is installed. + Check ext/openssl/mkmf.log for more details. + ``` + + Running the following command may solve the issue: + + ``` + brew link openssl --force + ``` + + Remember to delete your `build` directory and start again from the configure step. + +6. [Run tests](testing_ruby.md) to confirm your build succeeded. ### Unexplainable Build Errors From cb2323a98366bb7afb7b2fee2547804bbbc9d345 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 8 Nov 2022 09:31:58 +0900 Subject: [PATCH 021/104] Thread#native_thread_id is very platform specific --- spec/ruby/core/thread/native_thread_id_spec.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spec/ruby/core/thread/native_thread_id_spec.rb b/spec/ruby/core/thread/native_thread_id_spec.rb index 5a6c0c86326a13..d6cc332bf6b533 100644 --- a/spec/ruby/core/thread/native_thread_id_spec.rb +++ b/spec/ruby/core/thread/native_thread_id_spec.rb @@ -1,6 +1,8 @@ require_relative '../../spec_helper' -ruby_version_is "3.1" do +if ruby_version_is "3.1" and Thread.method_defined?(:native_thread_id) + # This method is very platform specific + describe "Thread#native_thread_id" do it "returns an integer when the thread is alive" do Thread.current.native_thread_id.should be_kind_of(Integer) From 7456647effc8c0d0fd85eb16b47635b96d2401df Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 7 Nov 2022 19:47:12 -0500 Subject: [PATCH 022/104] [DOC] Properly number the list in building_ruby.md --- doc/contributing/building_ruby.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/contributing/building_ruby.md b/doc/contributing/building_ruby.md index 4fbb4bd2ccc446..f7b489c4a986cf 100644 --- a/doc/contributing/building_ruby.md +++ b/doc/contributing/building_ruby.md @@ -10,7 +10,7 @@ * gperf - 3.0.3 or later * ruby - 2.7 or later -1. Install optional, recommended dependencies: +2. Install optional, recommended dependencies: * OpenSSL/LibreSSL * readline/editline (libedit) @@ -20,19 +20,19 @@ * libexecinfo (FreeBSD) * rustc - 1.58.1 or later (if you wish to build [YJIT](/doc/yjit/yjit.md)) -1. Checkout the CRuby source code: +3. Checkout the CRuby source code: ``` git clone https://github.com/ruby/ruby.git ``` -1. Generate the configure file: +4. Generate the configure file: ``` ./autogen.sh ``` -1. Create a `build` directory outside of the source directory: +5. Create a `build` directory outside of the source directory: ``` mkdir build && cd build @@ -40,13 +40,13 @@ While it's not necessary to build in a separate directory, it's good practice to do so. -1. We'll install Ruby in `~/.rubies/ruby-master`, so create the directory: +6. We'll install Ruby in `~/.rubies/ruby-master`, so create the directory: ``` mkdir ~/.rubies ``` -1. Run configure: +7. Run configure: ``` ../configure --prefix="${HOME}/.rubies/ruby-master" @@ -54,7 +54,7 @@ - If you are frequently building Ruby, add the `--disable-install-doc` flag to not build documentation which will speed up the build process. -1. Build Ruby: +8. Build Ruby: ``` make install @@ -77,7 +77,7 @@ Remember to delete your `build` directory and start again from the configure step. -6. [Run tests](testing_ruby.md) to confirm your build succeeded. +9. [Run tests](testing_ruby.md) to confirm your build succeeded. ### Unexplainable Build Errors From f7db1affd10767d729866e95c02ffb26266829ab Mon Sep 17 00:00:00 2001 From: yui-knk Date: Fri, 23 Sep 2022 23:01:55 +0900 Subject: [PATCH 023/104] Set default %printer for NODE nterms Before: ``` Reducing stack by rule 639 (line 5062): $1 = token "integer literal" (1.0-1.1: 1) -> $$ = nterm simple_numeric (1.0-1.1: ) ``` After: ``` Reducing stack by rule 641 (line 5078): $1 = token "integer literal" (1.0-1.1: 1) -> $$ = nterm simple_numeric (1.0-1.1: NODE_LIT) ``` `"<*>"` is supported by Bison 2.3b (2008-05-27) or later. https://git.savannah.gnu.org/cgit/bison.git/commit/?id=12e3584054c16ab255672c07af0ffc7bb220e8bc Therefore developers need to install Bison 2.3b+ to build ruby from source codes if their Bison is older. Minimum version requirement for Bison is changed to 3.0. See: https://bugs.ruby-lang.org/issues/19068 [Feature #19068] --- .github/workflows/macos.yml | 3 ++- ext/ripper/tools/preproc.rb | 2 +- parse.y | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e5f055f8c43242..6865c218b1a799 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -51,11 +51,12 @@ jobs: - name: Install libraries run: | brew upgrade - brew install gmp libffi openssl@1.1 zlib autoconf automake libtool readline + brew install gmp libffi openssl@1.1 zlib autoconf automake libtool readline bison working-directory: src - name: Set ENV run: | echo "MAKEFLAGS=-j$((1 + $(sysctl -n hw.activecpu)))" >> $GITHUB_ENV + echo "PATH="/usr/local/opt/bison/bin:$PATH"" >> $GITHUB_ENV - run: ./autogen.sh working-directory: src - name: Run configure diff --git a/ext/ripper/tools/preproc.rb b/ext/ripper/tools/preproc.rb index b838a78db71cfd..cd85a5da613c33 100644 --- a/ext/ripper/tools/preproc.rb +++ b/ext/ripper/tools/preproc.rb @@ -47,7 +47,7 @@ def prelude(f, out) when /\A%%/ out << "%%\n" return - when /\A%token/ + when /\A%token/, /\A} / out << line.sub(/<\w+>/, '') when /\A%type/ out << line.sub(/<\w+>/, '') diff --git a/parse.y b/parse.y index f4b4b8f3d15055..f6b32d5c979818 100644 --- a/parse.y +++ b/parse.y @@ -1140,6 +1140,14 @@ static int looking_at_eol_p(struct parser_params *p); %define api.pure %define parse.error verbose %printer { +#ifndef RIPPER + if ($$) { + rb_parser_printf(p, "%s", ruby_node_name(nd_type($$))); + } +#else +#endif +} +%printer { #ifndef RIPPER rb_parser_printf(p, "%"PRIsVALUE, rb_id2str($$)); #else From 4a7d6c2852aa734506be83c932168e8f974687b5 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 11:52:22 +0900 Subject: [PATCH 024/104] Fix false LocalJumpError when branch coverage is enabled `throw TAG_BREAK` instruction makes a jump only if the continuation of catch of TAG_BREAK exactly matches the instruction immediately following the "send" instruction that is currently being executed. Otherwise, it seems to determine break from proc-closure. Branch coverage may insert some recording instructions after "send" instruction, which broke the conditions for TAG_BREAK to work properly. This change forces to set the continuation of catch of TAG_BREAK immediately after "send" (or "invokesuper") instruction. [Bug #18991] --- compile.c | 25 ++++++++++++++++++++++++- test/coverage/test_coverage.rb | 14 ++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/compile.c b/compile.c index 359d55c0f26eeb..d8d2738eb8919b 100644 --- a/compile.c +++ b/compile.c @@ -7449,7 +7449,30 @@ compile_iter(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, in ISEQ_TYPE_BLOCK, line); CHECK(COMPILE(ret, "iter caller", node->nd_iter)); } - ADD_LABEL(ret, retry_end_l); + + { + // We need to put the label "retry_end_l" immediately after the last "send" instruction. + // This because vm_throw checks if the break cont is equal to the index of next insn of the "send". + // (Otherwise, it is considered "break from proc-closure". See "TAG_BREAK" handling in "vm_throw_start".) + // + // Normally, "send" instruction is at the last. + // However, qcall under branch coverage measurement adds some instructions after the "send". + // + // Note that "invokesuper" appears instead of "send". + INSN *iobj; + LINK_ELEMENT *last_elem = LAST_ELEMENT(ret); + iobj = IS_INSN(last_elem) ? (INSN*) last_elem : (INSN*) get_prev_insn((INSN*) last_elem); + while (INSN_OF(iobj) != BIN(send) && INSN_OF(iobj) != BIN(invokesuper)) { + iobj = (INSN*) get_prev_insn(iobj); + } + ELEM_INSERT_NEXT(&iobj->link, (LINK_ELEMENT*) retry_end_l); + + // LINK_ANCHOR has a pointer to the last element, but ELEM_INSERT_NEXT does not update it + // even if we add an insn to the last of LINK_ANCHOR. So this updates it manually. + if (&iobj->link == LAST_ELEMENT(ret)) { + ret->last = (LINK_ELEMENT*) retry_end_l; + } + } if (popped) { ADD_INSN(ret, line_node, pop); diff --git a/test/coverage/test_coverage.rb b/test/coverage/test_coverage.rb index 1a21235d0a983d..a2a7718a30a099 100644 --- a/test/coverage/test_coverage.rb +++ b/test/coverage/test_coverage.rb @@ -964,4 +964,18 @@ def test_double_suspend p :NG end; end + + def test_tag_break_with_branch_coverage + result = { + :branches => { + [:"&.", 0, 1, 0, 1, 6] => { + [:then, 1, 1, 0, 1, 6] => 1, + [:else, 2, 1, 0, 1, 6] => 0, + }, + }, + } + assert_coverage(<<~"end;", { branches: true }, result) + 1&.tap do break end + end; + end end From 001606097b3239b84a5910e2f2bc814074cb6973 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 8 Nov 2022 15:06:15 +0900 Subject: [PATCH 025/104] Suppress false warning by a bug of gcc GCC [Bug 99578] seems triggered by calling `rb_reg_last_match` before `match_check(match)`, probably by `NIL_P(match)` in `rb_reg_nth_match`. [Bug 99578]: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578 --- re.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/re.c b/re.c index c65e4a58eb1ef7..1b8314832156a5 100644 --- a/re.c +++ b/re.c @@ -1066,12 +1066,13 @@ update_char_offset(VALUE match) } } -static void +static VALUE match_check(VALUE match) { if (!RMATCH(match)->regexp) { rb_raise(rb_eTypeError, "uninitialized MatchData"); } + return match; } /* :nodoc: */ @@ -2268,16 +2269,16 @@ match_values_at(int argc, VALUE *argv, VALUE match) static VALUE match_to_s(VALUE match) { - VALUE str = rb_reg_last_match(match); + VALUE str = rb_reg_last_match(match_check(match)); - match_check(match); if (NIL_P(str)) str = rb_str_new(0,0); return str; } static int match_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, - int back_num, int *back_refs, OnigRegex regex, void *arg) { + int back_num, int *back_refs, OnigRegex regex, void *arg) +{ struct MEMO *memo = MEMO_CAST(arg); VALUE hash = memo->v1; VALUE match = memo->v2; From 4e728486b93eaec876ea8f876df9ecad350da269 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 14:41:04 +0900 Subject: [PATCH 026/104] [ruby/error_highlight] Bump version https://github.com/ruby/error_highlight/commit/59c291cce1 --- lib/error_highlight/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/error_highlight/version.rb b/lib/error_highlight/version.rb index 4279b6d05fc9ff..abd85116af8c6b 100644 --- a/lib/error_highlight/version.rb +++ b/lib/error_highlight/version.rb @@ -1,3 +1,3 @@ module ErrorHighlight - VERSION = "0.4.0" + VERSION = "0.5.0" end From cdb3ec3af84ce5ab1ae9c2bd72f8f0a29620f580 Mon Sep 17 00:00:00 2001 From: git Date: Tue, 8 Nov 2022 08:08:48 +0000 Subject: [PATCH 027/104] Update default gems list at 4e728486b93eaec876ea8f876df9ec [ci skip] --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0367e8bfd0eb1b..2b3bbc8390c05f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -232,7 +232,7 @@ Note: We're only listing outstanding class updates. * cgi 0.3.3 * date 3.2.3 * erb 3.0.0 - * error_highlight 0.4.0 + * error_highlight 0.5.0 * etc 1.4.0 * fiddle 1.1.1 * io-console 0.5.11 From eacedcfe44a0ae22bf54ddb7df193c48d4c857c6 Mon Sep 17 00:00:00 2001 From: Jean byroot Boussier Date: Tue, 8 Nov 2022 20:43:16 +0900 Subject: [PATCH 028/104] mutex: Raise a ThreadError when detecting a fiber deadlock (#6680) [Bug #19105] If no fiber scheduler is registered and the fiber that owns the lock and the one that try to acquire it both belong to the same thread, we're in a deadlock case. Co-authored-by: Jean Boussier --- test/fiber/test_mutex.rb | 22 +++++++++++++++++++++- thread_sync.c | 4 ++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/test/fiber/test_mutex.rb b/test/fiber/test_mutex.rb index b0655f06a5a84f..449c49f38bc81e 100644 --- a/test/fiber/test_mutex.rb +++ b/test/fiber/test_mutex.rb @@ -194,7 +194,7 @@ def test_queue_pop_waits end def test_mutex_deadlock - error_pattern = /No live threads left. Deadlock\?/ + error_pattern = /lock already owned by another fiber/ assert_in_out_err %W[-I#{__dir__} -], <<-RUBY, ['in synchronize'], error_pattern, success: false require 'scheduler' @@ -217,4 +217,24 @@ def test_mutex_deadlock thread.join RUBY end + + def test_mutex_fiber_deadlock_no_scheduler + thr = Thread.new do + loop do + sleep 1 + end + end + + mutex = Mutex.new + mutex.synchronize do + error = assert_raise ThreadError do + Fiber.new do + mutex.lock + end.resume + end + assert_includes error.message, "deadlock; lock already owned by another fiber belonging to the same thread" + end + ensure + thr&.kill&.join + end end diff --git a/thread_sync.c b/thread_sync.c index 2bcf59137eea97..2f43896cfb2ac0 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -327,6 +327,10 @@ do_mutex_lock(VALUE self, int interruptible_p) } } else { + if (!th->vm->thread_ignore_deadlock && rb_fiber_threadptr(mutex->fiber) == th) { + rb_raise(rb_eThreadError, "deadlock; lock already owned by another fiber belonging to the same thread"); + } + enum rb_thread_status prev_status = th->status; rb_hrtime_t *timeout = 0; rb_hrtime_t rel = rb_msec2hrtime(100); From 3703a81491a16554674e4b15bac87efa3eb18f3b Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 8 Nov 2022 11:57:11 -0500 Subject: [PATCH 029/104] YJIT: improve/fix code to automatically build YJIT when available (#6684) * YJIT: improve/fix code to automatically build YJIT when available * Set YJIT_SUPPORT=no * Fix rustc => $RUSTC --- configure.ac | 62 ++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/configure.ac b/configure.ac index 85e1359f5b59d0..45b9a3a631a568 100644 --- a/configure.ac +++ b/configure.ac @@ -3735,29 +3735,32 @@ AC_SUBST(MJIT_SUPPORT) AC_CHECK_PROG(RUSTC, [rustc], [rustc], [no]) dnl no ac_tool_prefix -dnl check if we can build YJIT on this target platform -AS_CASE(["$target_cpu"], - [arm64|aarch64], [ - YJIT_TARGET=aarch64 - ], - [x86_64], [ - YJIT_TARGET="$target_cpu" - ], - [YJIT_TARGET=] +dnl check if rustc is recent enough to build YJIT (rustc >= 1.58.0) +YJIT_RUSTC_OK=no +AS_IF([test "$RUSTC" != "no"], + AS_IF([echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --emit asm=/dev/null], + [YJIT_RUSTC_OK=yes] + ) ) -AS_CASE(["$YJIT_TARGET:$target_os"], - [:*], [ # unsupported CPU - ], - [darwin*], [ - YJIT_TARGET=${YJIT_TARGET}-apple-darwin - ], - [linux-android], [ # no target_vendor - YJIT_TARGET=${YJIT_TARGET}-${target_os} - ], - [*linux*], [ - YJIT_TARGET=${YJIT_TARGET}-${target_vendor}-${target_os} - ], - [YJIT_TARGET=] + +dnl check if we can build YJIT on this target platform +dnl we can't easily cross-compile with rustc so we don't support that +YJIT_TARGET_OK=no +AS_IF([test "$cross_compiling" = no], + AS_CASE(["$target_cpu-$target_os"], + [*android*], [ + YJIT_TARGET_OK=no + ], + [arm64-darwin*|aarch64-darwin*|x86_64-darwin*], [ + YJIT_TARGET_OK=yes + ], + [arm64-*linux*|aarch64-*linux*|x86_64-*linux*], [ + YJIT_TARGET_OK=yes + ], + [arm64-*bsd*|aarch64-*bsd*|x86_64-*bsd*], [ + YJIT_TARGET_OK=yes + ] + ) ) dnl build YJIT in release mode if rustc >= 1.58.0 is present and we are on a supported platform @@ -3765,18 +3768,11 @@ AC_ARG_ENABLE(yjit, AS_HELP_STRING([--enable-yjit], [enable experimental in-process JIT compiler that requires Rust build tools [default=no]]), [YJIT_SUPPORT=$enableval], - [AS_CASE(["$enable_jit_support:$YJIT_TARGET:$RUSTC"], - [no:*|yes::*|yes:*:no], [ - YJIT_SUPPORT=no + [AS_CASE(["$enable_jit_support:$YJIT_TARGET_OK:$YJIT_RUSTC_OK"], + [yes:yes:yes|:yes:yes], [ + YJIT_SUPPORT=yes ], - [yes:yes:*], [ - AS_IF([ echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --target=$YJIT_TARGET --emit asm=/dev/null ], - [YJIT_SUPPORT=yes], - [YJIT_SUPPORT=no] - ) - ], [ - [YJIT_SUPPORT=no] - ] + [YJIT_SUPPORT=no] )] ) From 2244d5084e32fea801bff1060ef528769bebaa36 Mon Sep 17 00:00:00 2001 From: Bo Anderson Date: Wed, 12 Oct 2022 14:08:35 +0100 Subject: [PATCH 030/104] [rubygems/rubygems] Map 'universal' to the real arch in Bundler for prebuilt gem selection https://github.com/rubygems/rubygems/commit/dd0c94f16a --- lib/bundler/rubygems_ext.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index d53d6880095853..22cb797b5eb273 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -308,6 +308,28 @@ def match_platforms?(platform, platforms) end end + # On universal Rubies, resolve the "universal" arch to the real CPU arch, without changing the extension directory. + class Specification + if /^universal\.(?.*?)-/ =~ RUBY_PLATFORM + local_platform = Platform.local + if local_platform.cpu == "universal" + ORIGINAL_LOCAL_PLATFORM = local_platform.to_s.freeze + + local_platform.cpu = if arch == "arm64e" # arm64e is only permitted for Apple system binaries + "arm64" + else + arch + end + + def extensions_dir + Gem.default_ext_dir_for(base_dir) || + File.join(base_dir, "extensions", ORIGINAL_LOCAL_PLATFORM, + Gem.extension_api_version) + end + end + end + end + require "rubygems/util" Util.singleton_class.module_eval do From 0df47fdaf9c8bcfad0180aab81f9ceb162b360a1 Mon Sep 17 00:00:00 2001 From: Bo Anderson Date: Sat, 15 Oct 2022 22:51:39 +0100 Subject: [PATCH 031/104] [rubygems/rubygems] Add tests for universal Ruby with arch-specific prebuilt gems https://github.com/rubygems/rubygems/commit/11229b16c3 --- lib/bundler/rubygems_ext.rb | 2 +- spec/bundler/install/gemfile/platform_spec.rb | 75 +++++++++++++++++++ spec/bundler/support/hax.rb | 5 ++ spec/bundler/support/helpers.rb | 8 ++ 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index 22cb797b5eb273..12d67890650fc6 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -310,7 +310,7 @@ def match_platforms?(platform, platforms) # On universal Rubies, resolve the "universal" arch to the real CPU arch, without changing the extension directory. class Specification - if /^universal\.(?.*?)-/ =~ RUBY_PLATFORM + if /^universal\.(?.*?)-/ =~ (CROSS_COMPILING || RUBY_PLATFORM) local_platform = Platform.local if local_platform.cpu == "universal" ORIGINAL_LOCAL_PLATFORM = local_platform.to_s.freeze diff --git a/spec/bundler/install/gemfile/platform_spec.rb b/spec/bundler/install/gemfile/platform_spec.rb index 1bae0bb3715d40..69918cf9f9b563 100644 --- a/spec/bundler/install/gemfile/platform_spec.rb +++ b/spec/bundler/install/gemfile/platform_spec.rb @@ -75,6 +75,81 @@ expect(the_bundle).to include_gems "platform_specific 1.0 RUBY" end + context "on universal Rubies" do + before do + build_repo4 do + build_gem "darwin_single_arch" do |s| + s.platform = "ruby" + s.write "lib/darwin_single_arch.rb", "DARWIN_SINGLE_ARCH = '1.0 RUBY'" + end + build_gem "darwin_single_arch" do |s| + s.platform = "arm64-darwin" + s.write "lib/darwin_single_arch.rb", "DARWIN_SINGLE_ARCH = '1.0 arm64-darwin'" + end + build_gem "darwin_single_arch" do |s| + s.platform = "x86_64-darwin" + s.write "lib/darwin_single_arch.rb", "DARWIN_SINGLE_ARCH = '1.0 x86_64-darwin'" + end + end + end + + it "pulls in the correct architecture gem" do + lockfile <<-G + GEM + remote: #{file_uri_for(gem_repo4)} + specs: + darwin_single_arch (1.0) + darwin_single_arch (1.0-arm64-darwin) + darwin_single_arch (1.0-x86_64-darwin) + + PLATFORMS + ruby + + DEPENDENCIES + darwin_single_arch + G + + simulate_platform "universal-darwin-21" + simulate_ruby_platform "universal.x86_64-darwin21" do + install_gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + + gem "darwin_single_arch" + G + + expect(the_bundle).to include_gems "darwin_single_arch 1.0 x86_64-darwin" + end + end + + it "pulls in the correct architecture gem on arm64e macOS Ruby" do + lockfile <<-G + GEM + remote: #{file_uri_for(gem_repo4)} + specs: + darwin_single_arch (1.0) + darwin_single_arch (1.0-arm64-darwin) + darwin_single_arch (1.0-x86_64-darwin) + + PLATFORMS + ruby + + DEPENDENCIES + darwin_single_arch + G + + simulate_platform "universal-darwin-21" + simulate_ruby_platform "universal.arm64e-darwin21" do + install_gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + + gem "darwin_single_arch" + G + + expect(the_bundle).to include_gems "darwin_single_arch 1.0 arm64-darwin" + end + end + end + it "works with gems that have different dependencies" do simulate_platform "java" install_gemfile <<-G diff --git a/spec/bundler/support/hax.rb b/spec/bundler/support/hax.rb index da67e8c5d1bfc0..76e3b05ee12af6 100644 --- a/spec/bundler/support/hax.rb +++ b/spec/bundler/support/hax.rb @@ -1,5 +1,10 @@ # frozen_string_literal: true +if ENV["BUNDLER_SPEC_RUBY_PLATFORM"] + Object.send(:remove_const, :RUBY_PLATFORM) + RUBY_PLATFORM = ENV["BUNDLER_SPEC_RUBY_PLATFORM"] +end + module Gem def self.ruby=(ruby) @ruby = ruby diff --git a/spec/bundler/support/helpers.rb b/spec/bundler/support/helpers.rb index dfc358796af539..1541f903c7fd7b 100644 --- a/spec/bundler/support/helpers.rb +++ b/spec/bundler/support/helpers.rb @@ -432,6 +432,14 @@ def simulate_new_machine pristine_system_gems :bundler end + def simulate_ruby_platform(ruby_platform) + old = ENV["BUNDLER_SPEC_RUBY_PLATFORM"] + ENV["BUNDLER_SPEC_RUBY_PLATFORM"] = ruby_platform.to_s + yield + ensure + ENV["BUNDLER_SPEC_RUBY_PLATFORM"] = old + end + def simulate_platform(platform) old = ENV["BUNDLER_SPEC_PLATFORM"] ENV["BUNDLER_SPEC_PLATFORM"] = platform.to_s From b7b78f062ff66c7a6da80e8d1dea734fcb7366c4 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Tue, 8 Nov 2022 17:19:55 +0000 Subject: [PATCH 032/104] [ruby/irb] Add execute_lines to reduce command tests' boilerplate code (https://github.com/ruby/irb/pull/436) https://github.com/ruby/irb/commit/1595337149 --- test/irb/test_cmd.rb | 423 ++++++++++++++++--------------------------- 1 file changed, 157 insertions(+), 266 deletions(-) diff --git a/test/irb/test_cmd.rb b/test/irb/test_cmd.rb index 531ea519f31194..f2d8a0299bc068 100644 --- a/test/irb/test_cmd.rb +++ b/test/irb/test_cmd.rb @@ -219,145 +219,139 @@ def test_irb_info_lang end def test_measure - IRB.init_config(nil) - IRB.conf[:PROMPT] = { - DEFAULT: { - PROMPT_I: '> ', - PROMPT_S: '> ', - PROMPT_C: '> ', - PROMPT_N: '> ' - } + conf = { + PROMPT: { + DEFAULT: { + PROMPT_I: '> ', + PROMPT_S: '> ', + PROMPT_C: '> ', + PROMPT_N: '> ' + } + }, + PROMPT_MODE: :DEFAULT, + MEASURE: false } - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :DEFAULT - IRB.conf[:MEASURE] = false - input = TestInputMethod.new([ + + c = Class.new(Object) + out, err = execute_lines( "3\n", "measure\n", "3\n", "measure :off\n", "3\n", - ]) - c = Class.new(Object) - irb = IRB::Irb.new(IRB::WorkSpace.new(c.new), input) - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: conf, + main: c + ) + assert_empty err assert_match(/\A=> 3\nTIME is added\.\n=> nil\nprocessing time: .+\n=> 3\n=> nil\n=> 3\n/, out) assert_empty(c.class_variables) end def test_measure_enabled_by_rc - IRB.init_config(nil) - IRB.conf[:PROMPT] = { - DEFAULT: { - PROMPT_I: '> ', - PROMPT_S: '> ', - PROMPT_C: '> ', - PROMPT_N: '> ' - } + conf = { + PROMPT: { + DEFAULT: { + PROMPT_I: '> ', + PROMPT_S: '> ', + PROMPT_C: '> ', + PROMPT_N: '> ' + } + }, + PROMPT_MODE: :DEFAULT, + MEASURE: true } - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :DEFAULT - IRB.conf[:MEASURE] = true - input = TestInputMethod.new([ + + out, err = execute_lines( "3\n", "measure :off\n", "3\n", - ]) - irb = IRB::Irb.new(IRB::WorkSpace.new(Object.new), input) - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: conf, + ) + assert_empty err assert_match(/\Aprocessing time: .+\n=> 3\n=> nil\n=> 3\n/, out) end def test_measure_enabled_by_rc_with_custom - IRB.init_config(nil) - IRB.conf[:PROMPT] = { - DEFAULT: { - PROMPT_I: '> ', - PROMPT_S: '> ', - PROMPT_C: '> ', - PROMPT_N: '> ' - } - } - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :DEFAULT - IRB.conf[:MEASURE] = true - IRB.conf[:MEASURE_PROC][:CUSTOM] = proc { |line, line_no, &block| + measuring_proc = proc { |line, line_no, &block| time = Time.now result = block.() puts 'custom processing time: %fs' % (Time.now - time) if IRB.conf[:MEASURE] result } - input = TestInputMethod.new([ + conf = { + PROMPT: { + DEFAULT: { + PROMPT_I: '> ', + PROMPT_S: '> ', + PROMPT_C: '> ', + PROMPT_N: '> ' + } + }, + PROMPT_MODE: :DEFAULT, + MEASURE: true, + MEASURE_PROC: { CUSTOM: measuring_proc } + } + + out, err = execute_lines( "3\n", "measure :off\n", "3\n", - ]) - irb = IRB::Irb.new(IRB::WorkSpace.new(Object.new), input) - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: conf, + ) assert_empty err assert_match(/\Acustom processing time: .+\n=> 3\n=> nil\n=> 3\n/, out) end def test_measure_with_custom - IRB.init_config(nil) - IRB.conf[:PROMPT] = { - DEFAULT: { - PROMPT_I: '> ', - PROMPT_S: '> ', - PROMPT_C: '> ', - PROMPT_N: '> ' - } - } - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :DEFAULT - IRB.conf[:MEASURE] = false - IRB.conf[:MEASURE_PROC][:CUSTOM] = proc { |line, line_no, &block| + measuring_proc = proc { |line, line_no, &block| time = Time.now result = block.() puts 'custom processing time: %fs' % (Time.now - time) if IRB.conf[:MEASURE] result } - input = TestInputMethod.new([ + conf = { + PROMPT: { + DEFAULT: { + PROMPT_I: '> ', + PROMPT_S: '> ', + PROMPT_C: '> ', + PROMPT_N: '> ' + } + }, + PROMPT_MODE: :DEFAULT, + MEASURE: false, + MEASURE_PROC: { CUSTOM: measuring_proc } + } + out, err = execute_lines( "3\n", "measure\n", "3\n", "measure :off\n", "3\n", - ]) - irb = IRB::Irb.new(IRB::WorkSpace.new(Object.new), input) - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: conf + ) + assert_empty err assert_match(/\A=> 3\nCUSTOM is added\.\n=> nil\ncustom processing time: .+\n=> 3\n=> nil\n=> 3\n/, out) end def test_measure_with_proc - IRB.init_config(nil) - IRB.conf[:PROMPT] = { - DEFAULT: { - PROMPT_I: '> ', - PROMPT_S: '> ', - PROMPT_C: '> ', - PROMPT_N: '> ' - } + conf = { + PROMPT: { + DEFAULT: { + PROMPT_I: '> ', + PROMPT_S: '> ', + PROMPT_C: '> ', + PROMPT_N: '> ' + } + }, + PROMPT_MODE: :DEFAULT, + MEASURE: false, } - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :DEFAULT - IRB.conf[:MEASURE] = false - input = TestInputMethod.new([ + c = Class.new(Object) + out, err = execute_lines( "3\n", "measure { |context, code, line_no, &block|\n", " result = block.()\n", @@ -373,56 +367,38 @@ def test_measure_with_proc "3\n", "measure :off\n", "3\n", - ]) - c = Class.new(Object) - irb = IRB::Irb.new(IRB::WorkSpace.new(c.new), input) - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: conf, + main: c + ) + assert_empty err assert_match(/\A=> 3\nBLOCK is added\.\n=> nil\naaa\n=> 3\nBLOCK is added.\naaa\n=> nil\nbbb\n=> 3\n=> nil\n=> 3\n/, out) assert_empty(c.class_variables) end def test_irb_source - IRB.init_config(nil) File.write("#{@tmpdir}/a.rb", "a = 'hi'\n") - input = TestInputMethod.new([ - "a = 'bug17564'\n", - "a\n", - "irb_source '#{@tmpdir}/a.rb'\n", - "a\n", - ]) - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :SIMPLE - irb = IRB::Irb.new(IRB::WorkSpace.new(self), input) - IRB.conf[:MAIN_CONTEXT] = irb.context - out, err = capture_output do - irb.eval_input - end + out, err = execute_lines( + "a = 'bug17564'\n", + "a\n", + "irb_source '#{@tmpdir}/a.rb'\n", + "a\n", + ) assert_empty err assert_pattern_list([ - /=> "bug17564"\n/, - /=> "bug17564"\n/, - / => "hi"\n/, - / => nil\n/, - /=> "hi"\n/, - ], out) + /=> "bug17564"\n/, + /=> "bug17564"\n/, + / => "hi"\n/, + / => nil\n/, + /=> "hi"\n/, + ], out) end def test_help - IRB.init_config(nil) - input = TestInputMethod.new([ - "help 'String#gsub'\n", - "\n", - ]) - IRB.conf[:PROMPT_MODE] = :SIMPLE - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(IRB::WorkSpace.new(self), input) - out, _ = capture_output do - irb.eval_input - end + out, _ = execute_lines( + "help 'String#gsub'\n", + "\n", + ) # the former is what we'd get without document content installed, like on CI # the latter is what we may get locally @@ -434,18 +410,11 @@ def test_help end def test_help_without_rdoc - IRB.init_config(nil) - input = TestInputMethod.new([ + out, _ = without_rdoc do + execute_lines( "help 'String#gsub'\n", "\n", - ]) - IRB.conf[:PROMPT_MODE] = :SIMPLE - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(IRB::WorkSpace.new(self), input) - out, _ = capture_output do - without_rdoc do - irb.eval_input - end + ) end # if it fails to require rdoc, it only returns the command object @@ -456,21 +425,13 @@ def test_help_without_rdoc end def test_irb_load - IRB.init_config(nil) File.write("#{@tmpdir}/a.rb", "a = 'hi'\n") - input = TestInputMethod.new([ - "a = 'bug17564'\n", - "a\n", - "irb_load '#{@tmpdir}/a.rb'\n", - "a\n", - ]) - IRB.conf[:VERBOSE] = false - IRB.conf[:PROMPT_MODE] = :SIMPLE - irb = IRB::Irb.new(IRB::WorkSpace.new(self), input) - IRB.conf[:MAIN_CONTEXT] = irb.context - out, err = capture_output do - irb.eval_input - end + out, err = execute_lines( + "a = 'bug17564'\n", + "a\n", + "irb_load '#{@tmpdir}/a.rb'\n", + "a\n", + ) assert_empty err assert_pattern_list([ /=> "bug17564"\n/, @@ -482,7 +443,7 @@ def test_irb_load end def test_ls - input = TestInputMethod.new([ + out, err = execute_lines( "class P\n", " def m() end\n", " def m2() end\n", @@ -508,16 +469,8 @@ def test_ls "obj.extend M2\n", "def obj.m5() end\n", "ls obj\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) + assert_empty err assert_match(/^instance variables:\s+@a\n/m, out) assert_match(/P#methods:\s+m\n/m, out) @@ -528,18 +481,9 @@ def test_ls end def test_ls_with_no_singleton_class - input = TestInputMethod.new([ + out, err = execute_lines( "ls 42", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) assert_empty err assert_match(/Comparable#methods:\s+/, out) assert_match(/Numeric#methods:\s+/, out) @@ -547,70 +491,34 @@ def test_ls_with_no_singleton_class end def test_show_source - input = TestInputMethod.new([ + out, err = execute_lines( "show_source IRB.conf\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) assert_empty err assert_match(%r[/irb\.rb], out) end def test_show_source_method - input = TestInputMethod.new([ + out, err = execute_lines( "p show_source('IRB.conf')\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) assert_empty err assert_match(%r[/irb\.rb], out) end def test_show_source_string - input = TestInputMethod.new([ + out, err = execute_lines( "show_source 'IRB.conf'\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) assert_empty err assert_match(%r[/irb\.rb], out) end def test_show_source_alias - input = TestInputMethod.new([ + out, err = execute_lines( "$ 'IRB.conf'\n", - ]) - IRB.init_config(nil) - IRB.conf[:COMMAND_ALIASES] = { :'$' => :show_source } - workspace = IRB::WorkSpace.new(Object.new) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + conf: { COMMAND_ALIASES: { :'$' => :show_source } } + ) assert_empty err assert_match(%r[/irb\.rb], out) end @@ -623,79 +531,62 @@ def show_source_test_method end end EOS - input = TestInputMethod.new([ + + out, err = execute_lines( "show_source 'TestIRB::ExtendCommand#show_source_test_method'\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) + assert_empty err assert_include(out, code) end def test_whereami - input = TestInputMethod.new([ + out, err = execute_lines( "whereami\n", - ]) - IRB.init_config(nil) - workspace = IRB::WorkSpace.new(self) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - irb.context.return_format = "=> %s\n" - out, err = capture_output do - irb.eval_input - end + ) assert_empty err assert_match(/^From: .+ @ line \d+ :\n/, out) end def test_whereami_alias - input = TestInputMethod.new([ + out, err = execute_lines( "@\n", - ]) - IRB.init_config(nil) - IRB.conf[:COMMAND_ALIASES] = { :'@' => :whereami } - workspace = IRB::WorkSpace.new(Object.new) - IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) - IRB.conf[:MAIN_CONTEXT] = irb.context - out, err = capture_output do - irb.eval_input - end + conf: { COMMAND_ALIASES: { :'@' => :whereami } } + ) assert_empty err assert_match(/^From: .+ @ line \d+ :\n/, out) end def test_vars_with_aliases - input = TestInputMethod.new([ + @foo = "foo" + $bar = "bar" + out, err = execute_lines( "@foo\n", "$bar\n", - ]) + conf: { COMMAND_ALIASES: { :'$' => :show_source, :'@' => :whereami } } + ) + assert_empty err + assert_match(/"foo"/, out) + assert_match(/"bar"/, out) + ensure + remove_instance_variable(:@foo) + $bar = nil + end + + private + + def execute_lines(*lines, conf: {}, main: self) IRB.init_config(nil) - IRB.conf[:COMMAND_ALIASES] = { - :'@' => :whereami, - :'$' => :show_source, - } - main = Object.new - main.instance_variable_set(:@foo, "foo") - $bar = "bar" - workspace = IRB::WorkSpace.new(main) IRB.conf[:VERBOSE] = false - irb = IRB::Irb.new(workspace, input) + IRB.conf[:PROMPT_MODE] = :SIMPLE + IRB.conf.merge!(conf) + input = TestInputMethod.new(lines) + irb = IRB::Irb.new(IRB::WorkSpace.new(main), input) + irb.context.return_format = "=> %s\n" IRB.conf[:MAIN_CONTEXT] = irb.context - out, err = capture_output do + capture_output do irb.eval_input end - assert_empty err - assert_match(/"foo"/, out) - assert_match(/"bar"/, out) end end end From 5643d2bb9aae3417cfc1b9dc85bf28dfb2574a55 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 8 Nov 2022 09:36:29 -0800 Subject: [PATCH 033/104] YJIT: Make more stats accessible from Ruby code (#6685) --- yjit.rb | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/yjit.rb b/yjit.rb index 71cbaf7edb96fb..5a00dcca575045 100644 --- a/yjit.rb +++ b/yjit.rb @@ -133,7 +133,30 @@ def self.dump_exit_locations(filename) # Return a hash for statistics generated for the --yjit-stats command line option. # Return nil when option is not passed or unavailable. def self.runtime_stats - Primitive.rb_yjit_get_stats + stats = Primitive.rb_yjit_get_stats + return stats if stats.nil? || !Primitive.rb_yjit_stats_enabled_p + + side_exits = total_exit_count(stats) + total_exits = side_exits + stats[:leave_interp_return] + + # Number of instructions that finish executing in YJIT. + # See :count-placement: about the subtraction. + retired_in_yjit = stats[:exec_instruction] - side_exits + + # Average length of instruction sequences executed by YJIT + avg_len_in_yjit = retired_in_yjit.to_f / total_exits + + # Proportion of instructions that retire in YJIT + total_insns_count = retired_in_yjit + stats[:vm_insns_count] + yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + + # Make those stats available in RubyVM::YJIT.runtime_stats as well + stats[:side_exit_count] = side_exits + stats[:total_exit_count] = total_exits + stats[:ratio_in_yjit] = yjit_ratio_pct + stats[:avg_len_in_yjit] = avg_len_in_yjit + + stats end # Produce disassembly for an iseq @@ -198,20 +221,6 @@ def _print_stats print_counters(stats, prefix: 'opt_getinlinecache_', prompt: 'opt_getinlinecache exit reasons: ') print_counters(stats, prefix: 'invalidate_', prompt: 'invalidation reasons: ') - side_exits = total_exit_count(stats) - total_exits = side_exits + stats[:leave_interp_return] - - # Number of instructions that finish executing in YJIT. - # See :count-placement: about the subtraction. - retired_in_yjit = stats[:exec_instruction] - side_exits - - # Average length of instruction sequences executed by YJIT - avg_len_in_yjit = retired_in_yjit.to_f / total_exits - - # Proportion of instructions that retire in YJIT - total_insns_count = retired_in_yjit + stats[:vm_insns_count] - yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count - # Number of failed compiler invocations compilation_failure = stats[:compilation_failure] @@ -231,13 +240,13 @@ def _print_stats $stderr.puts "code_gc_count: " + ("%10d" % stats[:code_gc_count]) $stderr.puts "num_gc_obj_refs: " + ("%10d" % stats[:num_gc_obj_refs]) - $stderr.puts "side_exit_count: " + ("%10d" % side_exits) - $stderr.puts "total_exit_count: " + ("%10d" % total_exits) - $stderr.puts "total_insns_count: " + ("%10d" % total_insns_count) + $stderr.puts "side_exit_count: " + ("%10d" % stats[:side_exit_count]) + $stderr.puts "total_exit_count: " + ("%10d" % stats[:side_exit_count]) + $stderr.puts "total_insns_count: " + ("%10d" % stats[:total_exit_count]) $stderr.puts "vm_insns_count: " + ("%10d" % stats[:vm_insns_count]) $stderr.puts "yjit_insns_count: " + ("%10d" % stats[:exec_instruction]) - $stderr.puts "ratio_in_yjit: " + ("%9.1f" % yjit_ratio_pct) + "%" - $stderr.puts "avg_len_in_yjit: " + ("%10.1f" % avg_len_in_yjit) + $stderr.puts "ratio_in_yjit: " + ("%9.1f" % stats[:ratio_in_yjit]) + "%" + $stderr.puts "avg_len_in_yjit: " + ("%10.1f" % stats[:avg_len_in_yjit]) print_sorted_exit_counts(stats, prefix: "exit_") end From aada904d94ece1d1b48d6275169d3f906a062247 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 8 Nov 2022 17:51:58 +0000 Subject: [PATCH 034/104] [doc] Clarify how to build OpenSSL on macOS --- doc/contributing/building_ruby.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/contributing/building_ruby.md b/doc/contributing/building_ruby.md index f7b489c4a986cf..322ffbb0644206 100644 --- a/doc/contributing/building_ruby.md +++ b/doc/contributing/building_ruby.md @@ -69,11 +69,7 @@ Check ext/openssl/mkmf.log for more details. ``` - Running the following command may solve the issue: - - ``` - brew link openssl --force - ``` + Adding `--with-openssl-dir=$(brew --prefix openssl)` to the list of options passed to configure may solve the issue. Remember to delete your `build` directory and start again from the configure step. From 1a65ab20cb6519ab3d4e58141cfd812eaea5f7e0 Mon Sep 17 00:00:00 2001 From: Jimmy Miller Date: Tue, 8 Nov 2022 15:28:28 -0500 Subject: [PATCH 035/104] Implement optimize call (#6691) This dispatches to a c func for doing the dynamic lookup. I experimented with chain on the proc but wasn't able to detect which call sites would be monomorphic vs polymorphic. There is definitely room for optimization here, but it does reduce exits. --- yjit.c | 9 +++++++ yjit/bindgen/src/main.rs | 2 +- yjit/src/codegen.rs | 49 ++++++++++++++++++++++++++++++++-- yjit/src/cruby_bindings.inc.rs | 10 +++++++ yjit/src/stats.rs | 3 +++ 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/yjit.c b/yjit.c index b943277d61167b..aa49b3cfdca4f6 100644 --- a/yjit.c +++ b/yjit.c @@ -716,6 +716,15 @@ rb_get_iseq_body_param_opt_table(const rb_iseq_t *iseq) return iseq->body->param.opt_table; } +VALUE +rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler) +{ + rb_proc_t *proc; + GetProcPtr(recv, proc); + return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler); +} + + // If true, the iseq is leaf and it can be replaced by a single C call. bool rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq) diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 167ab2a74f20d1..acbbaa613b4024 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -288,7 +288,7 @@ fn main() { .allowlist_function("rb_yjit_get_proc_ptr") .allowlist_function("rb_yjit_exit_locations_dict") .allowlist_function("rb_yjit_icache_invalidate") - + .allowlist_function("rb_optimized_call") // from vm_sync.h .allowlist_function("rb_vm_barrier") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 46f5ed64d3dd3e..7a1673c5cb520c 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5557,8 +5557,53 @@ fn gen_send_general( } OPTIMIZED_METHOD_TYPE_CALL => { - gen_counter_incr!(asm, send_optimized_method_call); - return CantCompile; + + if block.is_some() { + gen_counter_incr!(asm, send_call_block); + return CantCompile; + } + + if flags & VM_CALL_KWARG != 0 { + gen_counter_incr!(asm, send_call_kwarg); + return CantCompile; + } + + // Optimize for single ractor mode and avoid runtime check for + // "defined with an un-shareable Proc in a different Ractor" + if !assume_single_ractor_mode(jit, ocb) { + gen_counter_incr!(asm, send_call_multi_ractor); + return CantCompile; + } + + // About to reset the SP, need to load this here + let recv_load = asm.load(recv); + + let sp = asm.lea(ctx.sp_opnd(0)); + + // Write interpreter SP into CFP. + // Needed in case the callee yields to the block. + jit_save_pc(jit, asm); + // Store incremented PC into current control frame in case callee raises. + gen_save_sp(jit, asm, ctx); + + let kw_splat = flags & VM_CALL_KW_SPLAT; + let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)); + + let ret = asm.ccall(rb_optimized_call as *const u8, vec![ + recv_load, + EC, + argc.into(), + stack_argument_pointer, + kw_splat.into(), + VM_BLOCK_HANDLER_NONE.into(), + ]); + + ctx.stack_pop(argc as usize + 1); + + let stack_ret = ctx.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + return KeepCompiling; + } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { gen_counter_incr!(asm, send_optimized_method_block_call); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 3373c6d76e41bc..9ede3030ff796d 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1480,6 +1480,16 @@ extern "C" { extern "C" { pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; } +extern "C" { + pub fn rb_optimized_call( + recv: *mut VALUE, + ec: *mut rb_execution_context_t, + argc: ::std::os::raw::c_int, + argv: *mut VALUE, + kw_splat: ::std::os::raw::c_int, + block_handler: VALUE, + ) -> VALUE; +} extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; } diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index a60fcaf8364f84..b7bbb4ae3ec3e7 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -173,6 +173,9 @@ make_counters! { send_optimized_method, send_optimized_method_call, send_optimized_method_block_call, + send_call_block, + send_call_kwarg, + send_call_multi_ractor, send_missing_method, send_refined_method, send_cfunc_ruby_array_varg, From 1466682a23ce0d7bf1f30d8b9627b4597c037e4d Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 8 Nov 2022 15:29:30 -0500 Subject: [PATCH 036/104] YJIT: Improve checking message for rustc version (#6693) Preivously we didn't have a "checking ...." line for this check and when rustc was too old, we would dump the error message to the console like: checking for rustc... rustc error: there is no argument named `x` --> :1:33 | 1 | fn main() { let x = 1; format!("{x}"); } | ^^^ error: aborting due to previous error `configure` checks usually don't do this and this might be confusing. With this commit it now says something like: checking whether rustc is new enough for YJIT... no --- configure.ac | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 45b9a3a631a568..59a6508ad8c785 100644 --- a/configure.ac +++ b/configure.ac @@ -3738,9 +3738,11 @@ AC_CHECK_PROG(RUSTC, [rustc], [rustc], [no]) dnl no ac_tool_prefix dnl check if rustc is recent enough to build YJIT (rustc >= 1.58.0) YJIT_RUSTC_OK=no AS_IF([test "$RUSTC" != "no"], - AS_IF([echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --emit asm=/dev/null], + AC_MSG_CHECKING([whether ${RUSTC} is new enough for YJIT]) + AS_IF([echo "fn main() { let x = 1; format!(\"{x}\"); }" | $RUSTC - --emit asm=/dev/null 2>/dev/null], [YJIT_RUSTC_OK=yes] ) + AC_MSG_RESULT($YJIT_RUSTC_OK) ) dnl check if we can build YJIT on this target platform From 5d95cd99f4ed425c416cc91e8986a3402d5b557a Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 7 Nov 2022 16:49:48 -0500 Subject: [PATCH 037/104] YJIT: Reset dropped_bytes when patching code We switch to a new page when we detect dropped_bytes flipping from false to true. Previously, when we patch code for invalidation during code gc, we start with the flag being set to true, so we failed to apply patches that straddle pages. We would write out jumps half way and then stop, which left the code corrupted. Reset the flag before patching so we patch across pages properly. --- yjit/src/asm/mod.rs | 6 ++++++ yjit/src/core.rs | 16 ++++++++++------ yjit/src/invariants.rs | 3 +++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 497f7687edb4cb..4f3d20f5e5f772 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -463,6 +463,12 @@ impl CodeBlock { self.dropped_bytes } + /// To patch code that straddle pages correctly, we need to start with + /// the dropped bytes flag unset so we can detect when to switch to a new page. + pub fn set_dropped_bytes(&mut self, dropped_bytes: bool) { + self.dropped_bytes = dropped_bytes; + } + /// Allocate a new label with a given name pub fn new_label(&mut self, name: String) -> usize { assert!(!name.contains(' '), "use underscores in label names, not spaces"); diff --git a/yjit/src/core.rs b/yjit/src/core.rs index c0e48e87b2bc20..eca58f813502ad 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1599,18 +1599,13 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { } */ - let old_write_pos = cb.get_write_pos(); - let mut block = branch.block.borrow_mut(); let branch_terminates_block = branch.end_addr == block.end_addr; - - // Rewrite the branch assert!(branch.dst_addrs[0].is_some()); - cb.set_write_ptr(branch.start_addr.unwrap()); + // Generate the branch let mut asm = Assembler::new(); asm.comment("regenerate_branch"); - (branch.gen_fn)( &mut asm, branch.dst_addrs[0].unwrap(), @@ -1618,6 +1613,11 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { branch.shape, ); + // Rewrite the branch + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(branch.start_addr.unwrap()); + cb.set_dropped_bytes(false); asm.compile(cb); branch.end_addr = Some(cb.get_write_ptr()); @@ -1638,6 +1638,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { if old_write_pos > cb.get_write_pos() { // We rewound cb->write_pos to generate the branch, now restore it. cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); } else { // The branch sits at the end of cb and consumed some memory. // Keep cb.write_pos. @@ -2193,10 +2194,12 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // Patch in a jump to block.entry_exit. let cur_pos = cb.get_write_ptr(); + let cur_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(block_start); let mut asm = Assembler::new(); asm.jmp(block_entry_exit.into()); + cb.set_dropped_bytes(false); asm.compile(&mut cb); assert!( @@ -2206,6 +2209,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { cb.get_write_ptr().into_i64() - block_start.into_i64(), ); cb.set_write_ptr(cur_pos); + cb.set_dropped_bytes(cur_dropped_bytes); } } diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index 0d8577924cf6ba..cd3214feae546d 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -558,6 +558,7 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { // Apply patches let old_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); let mut patches = CodegenGlobals::take_global_inval_patches(); patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr()); let mut last_patch_end = std::ptr::null(); @@ -568,10 +569,12 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { asm.jmp(patch.outlined_target_pos.into()); cb.set_write_ptr(patch.inline_patch_pos); + cb.set_dropped_bytes(false); asm.compile(cb); last_patch_end = cb.get_write_ptr().raw_ptr(); } cb.set_pos(old_pos); + cb.set_dropped_bytes(old_dropped_bytes); // Freeze invalidated part of the codepage. We only want to wait for // running instances of the code to exit from now on, so we shouldn't From ef1c1ddf68e7219b055ae707b3f8c825d7c787b7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 11:58:37 +0900 Subject: [PATCH 038/104] Use `rb_sprintf` instead of deprecated `sprintf` --- ext/socket/raddrinfo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/socket/raddrinfo.c b/ext/socket/raddrinfo.c index a4e1ed37a352d6..636d1edda3296c 100644 --- a/ext/socket/raddrinfo.c +++ b/ext/socket/raddrinfo.c @@ -618,8 +618,7 @@ rsock_ipaddr(struct sockaddr *sockaddr, socklen_t sockaddrlen, int norevlookup) family = rb_str_dup(rb_id2str(id)); } else { - sprintf(pbuf, "unknown:%d", sockaddr->sa_family); - family = rb_str_new2(pbuf); + family = rb_sprintf("unknown:%d", sockaddr->sa_family); } addr1 = Qnil; From 558137d5f3b77294949bd042bb99ae12d37faa2f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 12:04:22 +0900 Subject: [PATCH 039/104] [DOC] Fix missing type name --- include/ruby/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ruby/random.h b/include/ruby/random.h index 657b37f034d2c0..e3176dbe6bcc85 100644 --- a/include/ruby/random.h +++ b/include/ruby/random.h @@ -189,7 +189,7 @@ typedef const rb_data_type_t rb_random_data_type_t; * 0, RB_RANDOM_INTERFACE_DEFINE(your), * }; * - * static inline constexpr your_prng = { + * static inline constexpr rb_random_data_type_t your_prng_type = { * "your PRNG", * { rb_random_mark, }, * RB_RANDOM_PARENT, // <<-- HERE From c3de7a3c58bf9a138ff8720ed56c0045d2b8e01d Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 13:35:37 +0900 Subject: [PATCH 040/104] Make pending_interrupt?(Exception) work A patch from katsu (Katsuhiro Ueno) [Bug #19110] --- test/ruby/test_thread.rb | 8 ++++++++ thread.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index f6156a16fdb949..8ca23e7d025733 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1515,4 +1515,12 @@ def test_signal_at_join end }; end + + def test_pending_interrupt? + t = Thread.handle_interrupt(Exception => :never) { Thread.new { Thread.stop } } + t.raise(StandardError) + assert_equal(true, t.pending_interrupt?) + assert_equal(true, t.pending_interrupt?(Exception)) + assert_equal(false, t.pending_interrupt?(ArgumentError)) + end end diff --git a/thread.c b/thread.c index d8925e618e8e6c..624c070877bfc7 100644 --- a/thread.c +++ b/thread.c @@ -1922,7 +1922,7 @@ rb_threadptr_pending_interrupt_include_p(rb_thread_t *th, VALUE err) int i; for (i=0; ipending_interrupt_queue); i++) { VALUE e = RARRAY_AREF(th->pending_interrupt_queue, i); - if (rb_class_inherited_p(e, err)) { + if (rb_obj_is_kind_of(e, err)) { return TRUE; } } From 230267d1a8f2b8245e911513926c06299ddeebc8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 20:15:48 +0900 Subject: [PATCH 041/104] Now bison 3.0 or later is required --- parse.y | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parse.y b/parse.y index f6b32d5c979818..526a25ac3b861d 100644 --- a/parse.y +++ b/parse.y @@ -9,6 +9,8 @@ **********************************************************************/ +%require "3.0" + %{ #if !YYPURE From 881bf9a0b8b52c05a5917b95d988ae4b9a391a47 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Mon, 3 Oct 2022 22:21:56 +0900 Subject: [PATCH 042/104] Implement cache optimization for regexp matching --- regexec.c | 469 +++++++++++++++++++++++++++++++++++++++++++++++++++++- regint.h | 8 + 2 files changed, 476 insertions(+), 1 deletion(-) diff --git a/regexec.c b/regexec.c index c77d48b1d9e921..db3881e18a2d66 100644 --- a/regexec.c +++ b/regexec.c @@ -231,6 +231,404 @@ onig_get_capture_tree(OnigRegion* region) } #endif /* USE_CAPTURE_HISTORY */ +#ifdef USE_CACHE_MATCH_OPT + +/* count number of jump-like opcodes for allocation of cache memory. */ +/* return -1 if we cannot optimize the regex matching by using cache. */ +int count_num_cache_opcode(regex_t* reg) +{ + int num = 0; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + RelAddrType addr; + OnigEncoding enc = reg->enc; + + while (p < pend) { + pbegin = p; + switch (*p++) { + case OP_FINISH: + case OP_END: + break; + + case OP_EXACT1: p++; break; + case OP_EXACT2: p += 2; break; + case OP_EXACT3: p += 3; break; + case OP_EXACT4: p += 4; break; + case OP_EXACT5: p += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, p); p += len; break; + case OP_EXACTMB2N1: p += 2; break; + case OP_EXACTMB2N2: p += 4; break; + case OP_EXACTMB2N3: p += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, p); p += len * 2; break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, p); p += len * 3; break; + case OP_EXACTMBN: + { + int mb_len; + GET_LENGTH_INC(mb_len, p); + GET_LENGTH_INC(len, p); + p += mb_len * len; + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, p, pend); p += len; break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, p); p += len; break; + + case OP_CCLASS: + case OP_CCLASS_NOT: + p += SIZE_BITSET; break; + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + GET_LENGTH_INC(len, p); p += len; break; + + case OP_ANYCHAR: + case OP_ANYCHAR_ML: + break; + case OP_ANYCHAR_STAR: + case OP_ANYCHAR_ML_STAR: + num++; break; + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p++; num++; break; + + case OP_WORD: + case OP_NOT_WORD: + case OP_WORD_BOUND: + case OP_NOT_WORD_BOUND: + case OP_WORD_BEGIN: + case OP_WORD_END: + break; + + case OP_ASCII_WORD: + case OP_NOT_ASCII_WORD: + case OP_ASCII_WORD_BOUND: + case OP_NOT_ASCII_WORD_BOUND: + case OP_ASCII_WORD_BEGIN: + case OP_ASCII_WORD_END: + break; + + case OP_BEGIN_BUF: + case OP_END_BUF: + case OP_BEGIN_LINE: + case OP_END_LINE: + case OP_SEMI_END_BUF: + case OP_BEGIN_POSITION: + break; + + case OP_BACKREF1: + case OP_BACKREF2: + case OP_BACKREFN: + case OP_BACKREFN_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_WITH_LEVEL: + return NUM_CACHE_OPCODE_FAIL; + + case OP_MEMORY_START: + case OP_MEMORY_START_PUSH: + case OP_MEMORY_END_PUSH: + case OP_MEMORY_END_PUSH_REC: + case OP_MEMORY_END: + case OP_MEMORY_END_REC: + p += SIZE_MEMNUM; break; + + case OP_KEEP: + break; + + case OP_FAIL: + break; + case OP_JUMP: + GET_RELADDR_INC(addr, p); + break; + case OP_PUSH: + GET_RELADDR_INC(addr, p); + num++; + break; + case OP_POP: + break; + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + p += SIZE_RELADDR + 1; num++; break; + case OP_REPEAT: + case OP_REPEAT_NG: + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + // TODO: support OP_REPEAT opcodes. + return NUM_CACHE_OPCODE_FAIL; + case OP_NULL_CHECK_START: + case OP_NULL_CHECK_END: + case OP_NULL_CHECK_END_MEMST: + case OP_NULL_CHECK_END_MEMST_PUSH: + p += SIZE_MEMNUM; break; + + case OP_PUSH_POS: + case OP_POP_POS: + break; + case OP_PUSH_POS_NOT: + p += SIZE_RELADDR; break; + case OP_FAIL_POS: + break; + case OP_PUSH_STOP_BT: + case OP_POP_STOP_BT: + return NUM_CACHE_OPCODE_FAIL; + case OP_LOOK_BEHIND: + /* GET_LENGTH_INC(len, p); break; */ + return NUM_CACHE_OPCODE_FAIL; + case OP_PUSH_LOOK_BEHIND_NOT: + // Since optimization assumes a string offset does not back, + // we cannot optimize look-behind opcodes. + /* + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(len, p); + break; + */ + return NUM_CACHE_OPCODE_FAIL; + case OP_FAIL_LOOK_BEHIND_NOT: + return NUM_CACHE_OPCODE_FAIL; + case OP_PUSH_ABSENT_POS: + case OP_ABSENT_END: + break; + case OP_ABSENT: + p += SIZE_RELADDR; break; + + case OP_CALL: + case OP_RETURN: + return NUM_CACHE_OPCODE_FAIL; + + case OP_CONDITION: + return NUM_CACHE_OPCODE_FAIL; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + case OP_STATE_CHECK: + case OP_STATE_CHECK_ANYCHAR_STAR: + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + return NUM_CACHE_OPCODE_FAIL; + + case OP_SET_OPTION_PUSH: + case OP_SET_OPTION: + p += SIZE_OPTION; + break; + } + } + + return num; +} + +void init_cache_index_table(regex_t* reg, UChar **table) +{ + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + RelAddrType addr; + OnigEncoding enc = reg->enc; + + while (p < pend) { + pbegin = p; + switch (*p++) { + case OP_FINISH: + case OP_END: + break; + + case OP_EXACT1: p++; break; + case OP_EXACT2: p += 2; break; + case OP_EXACT3: p += 3; break; + case OP_EXACT4: p += 4; break; + case OP_EXACT5: p += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, p); p += len; break; + case OP_EXACTMB2N1: p += 2; break; + case OP_EXACTMB2N2: p += 4; break; + case OP_EXACTMB2N3: p += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, p); p += len * 2; break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, p); p += len * 3; break; + case OP_EXACTMBN: + { + int mb_len; + GET_LENGTH_INC(mb_len, p); + GET_LENGTH_INC(len, p); + p += mb_len * len; + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, p, pend); p += len; break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, p); p += len; break; + + case OP_CCLASS: + case OP_CCLASS_NOT: + p += SIZE_BITSET; break; + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + GET_LENGTH_INC(len, p); p += len; break; + + case OP_ANYCHAR: + case OP_ANYCHAR_ML: + break; + case OP_ANYCHAR_STAR: + case OP_ANYCHAR_ML_STAR: + *table++ = pbegin; break; + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p++; + *table++ = pbegin; + break; + + case OP_WORD: + case OP_NOT_WORD: + case OP_WORD_BOUND: + case OP_NOT_WORD_BOUND: + case OP_WORD_BEGIN: + case OP_WORD_END: + break; + + case OP_ASCII_WORD: + case OP_NOT_ASCII_WORD: + case OP_ASCII_WORD_BOUND: + case OP_NOT_ASCII_WORD_BOUND: + case OP_ASCII_WORD_BEGIN: + case OP_ASCII_WORD_END: + break; + + case OP_BEGIN_BUF: + case OP_END_BUF: + case OP_BEGIN_LINE: + case OP_END_LINE: + case OP_SEMI_END_BUF: + case OP_BEGIN_POSITION: + break; + + case OP_BACKREF1: + case OP_BACKREF2: + case OP_BACKREFN: + case OP_BACKREFN_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_WITH_LEVEL: + return; + + case OP_MEMORY_START: + case OP_MEMORY_START_PUSH: + case OP_MEMORY_END_PUSH: + case OP_MEMORY_END_PUSH_REC: + case OP_MEMORY_END: + case OP_MEMORY_END_REC: + p += SIZE_MEMNUM; break; + + case OP_KEEP: + break; + + case OP_FAIL: + break; + case OP_JUMP: + GET_RELADDR_INC(addr, p); + break; + case OP_PUSH: + GET_RELADDR_INC(addr, p); + *table++ = pbegin; + break; + case OP_POP: + break; + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + p += SIZE_RELADDR + 1; *table++ = pbegin; break; + case OP_REPEAT: + case OP_REPEAT_NG: + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + // TODO: support OP_REPEAT opcodes. + return; + case OP_NULL_CHECK_START: + case OP_NULL_CHECK_END: + case OP_NULL_CHECK_END_MEMST: + case OP_NULL_CHECK_END_MEMST_PUSH: + p += SIZE_MEMNUM; break; + + case OP_PUSH_POS: + case OP_POP_POS: + break; + case OP_PUSH_POS_NOT: + p += SIZE_RELADDR; break; + case OP_FAIL_POS: + break; + case OP_PUSH_STOP_BT: + case OP_POP_STOP_BT: + return; + case OP_LOOK_BEHIND: + /* GET_LENGTH_INC(len, p); break; */ + return; + case OP_PUSH_LOOK_BEHIND_NOT: + // Since optimization assumes a string offset does not back, + // we cannot optimize look-behind opcodes. + /* + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(len, p); + break; + */ + return; + case OP_FAIL_LOOK_BEHIND_NOT: + return; + case OP_PUSH_ABSENT_POS: + case OP_ABSENT_END: + break; + case OP_ABSENT: + p += SIZE_RELADDR; break; + + case OP_CALL: + case OP_RETURN: + return; + + case OP_CONDITION: + return; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + case OP_STATE_CHECK: + case OP_STATE_CHECK_ANYCHAR_STAR: + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + return; + + case OP_SET_OPTION_PUSH: + case OP_SET_OPTION: + p += SIZE_OPTION; + break; + } + } +} + +int find_cache_index_table(UChar** table, int num_cache_table, UChar* p) +{ + int l = 0, r = num_cache_table - 1, m; + + while (l <= r) { + m = (l + r) / 2; + if (table[m] == p) return m; + if (table[m] < p) l = m + 1; + else r = m - 1; + } + + return -1; +} +#endif /* USE_MATCH_CACHE */ + extern void onig_region_clear(OnigRegion* region) { @@ -686,6 +1084,22 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) +#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\ + if (enable) {\ + int cache_index = find_cache_index_table((table), (num_cache_table), (p));\ + int key = (num_cache_table) * (pos) + cache_index;\ + int index = key >> 3;\ + int mask = 1 << (key & 7);\ + if ((match_cache)[index] & mask) {\ + /* fprintf(stderr, "Use cache at %d (%d, %d, %d, %d, %d)\n", (pos), index, mask, key, cache_index, p - pstart); */\ + goto fail;\ + } else {\ + /* fprintf(stderr, "Cache at %d (%d, %d, %d, %d, %d)\n", (pos), index, mask, key, cache_index, p - pstart); */\ + }\ + (match_cache)[index] |= mask;\ + }\ +} while (0) + #define STACK_PUSH_REPEAT(id, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ @@ -1448,6 +1862,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigCaseFoldType case_fold_flag = reg->case_fold_flag; UChar *s, *q, *sbegin; UChar *p = reg->p; + UChar *pbegin = p; UChar *pkeep; char *alloca_base; char *xmalloc_base = NULL; @@ -1461,6 +1876,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unsigned char* state_check_buff = msa->state_check_buff; int num_comb_exp_check = reg->num_comb_exp_check; #endif +#ifdef USE_CACHE_MATCH_OPT + UChar *pstart = reg->p; + int num_fail = 0; + int enable_cache_match_opt = 0; + int num_cache_opcode = NUM_CACHE_OPCODE_UNINIT; + UChar** cache_index_table = (UChar **)0; /* array of pointer to p (regex program) */ + uint8_t *match_cache = (uint8_t *)0; +#endif #if USE_TOKEN_THREADED_VM # define OP_OFFSET 1 @@ -1469,7 +1892,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; # define DEFAULT L_DEFAULT: # define NEXT sprev = sbegin; JUMP -# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++]) +# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++]) RB_GNUC_EXTENSION static const void *oplabels[] = { &&L_OP_FINISH, /* matching process terminator (no more alternative) */ @@ -1645,6 +2068,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, # define VM_LOOP \ while (1) { \ OPCODE_EXEC_HOOK; \ + pbegin = p; \ sbegin = s; \ switch (*p++) { # define VM_LOOP_END } sprev = sbegin; } @@ -2843,6 +3267,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); + DO_CACHE_MATCH_OPT(enable_cache_match_opt, pbegin, num_cache_opcode, cache_index_table, s - sstart, match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3173,6 +3598,40 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = stk->u.state.pstr_prev; pkeep = stk->u.state.pkeep; +#ifdef USE_CACHE_MATCH_OPT + if (++num_fail == (int)(end - sstart) + 1 && num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + enable_cache_match_opt = 1; + if (num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + num_cache_opcode = count_num_cache_opcode(reg); + } + if (num_cache_opcode == NUM_CACHE_OPCODE_FAIL || num_cache_opcode == 0) { + enable_cache_match_opt = 0; + goto fail_match_cache_opt; + } + if (cache_index_table == NULL) { + UChar **table = xmalloc(num_cache_opcode * sizeof(UChar*)); + if (table == NULL) { + enable_cache_match_opt = 0; + goto fail_match_cache_opt; + } + init_cache_index_table(reg, table); + cache_index_table = table; + } + // TODO: check arithemetic overflow. + int match_cache_size8 = num_cache_opcode * ((int)(end - sstart) + 1); + int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); + // fprintf(stderr, "match_cache_size8: %d, match_cache_size: %d\n", match_cache_size8, match_cache_size); + match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); + if (match_cache == NULL) { + enable_cache_match_opt = 0; + goto fail_match_cache_opt; + } + xmemset(match_cache, 0, match_cache_size * sizeof(uint8_t)); + /* fprintf(stderr, "enable cache match opt\n"); */ + } + fail_match_cache_opt: +#endif + #ifdef USE_COMBINATION_EXPLOSION_CHECK if (stk->u.state.state_check != 0) { stk->type = STK_STATE_CHECK_MARK; @@ -3191,23 +3650,31 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, finish: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); + if (cache_index_table) xfree(cache_index_table); + if (match_cache) xfree(match_cache); return best_len; #ifdef ONIG_DEBUG stack_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); + if (cache_index_table) xfree(cache_index_table); + if (match_cache) xfree(match_cache); return ONIGERR_STACK_BUG; #endif bytecode_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); + if (cache_index_table) xfree(cache_index_table); + if (match_cache) xfree(match_cache); return ONIGERR_UNDEFINED_BYTECODE; unexpected_bytecode_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); + if (cache_index_table) xfree(cache_index_table); + if (match_cache) xfree(match_cache); return ONIGERR_UNEXPECTED_BYTECODE; } diff --git a/regint.h b/regint.h index 00b4b6ed9b16f9..1f0d815db03eb7 100644 --- a/regint.h +++ b/regint.h @@ -41,6 +41,14 @@ /* for byte-code statistical data. */ /* #define ONIG_DEBUG_STATISTICS */ +/* enable matching optimization by using cache. */ +#define USE_CACHE_MATCH_OPT + +#ifdef USE_CACHE_MATCH_OPT +# define NUM_CACHE_OPCODE_FAIL -1 +# define NUM_CACHE_OPCODE_UNINIT -2 +#endif + #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ defined(ONIG_DEBUG_STATISTICS) || defined(ONIG_DEBUG_MEMLEAK) From f07dea16e3f70fbc33c51dbc73ef0f33f965e010 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 4 Oct 2022 01:02:51 +0900 Subject: [PATCH 043/104] Keep cache optimization info to MatchArg for global matching --- regexec.c | 61 ++++++++++++++++++++++++------------------------------- regint.h | 7 +++++++ 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/regexec.c b/regexec.c index db3881e18a2d66..70fd1a63a6d2a0 100644 --- a/regexec.c +++ b/regexec.c @@ -821,6 +821,11 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) (msa).best_len = ONIG_MISMATCH;\ (msa).counter = 0;\ (msa).end_time = 0;\ + (msa).enable_cache_match_opt = 0;\ + (msa).num_fail = 0;\ + (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ + (msa).cache_index_table = (UChar **)0;\ + (msa).match_cache = (uint8_t *)0;\ } while(0) #else # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ @@ -871,7 +876,11 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) }\ } while(0) #else /* USE_COMBINATION_EXPLOSION_CHECK */ -# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +# define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).cache_index_table) xfree((msa).cache_index_table);\ + if ((msa).match_cache) xfree((msa).match_cache);\ +} while (0) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ @@ -1091,10 +1100,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ - /* fprintf(stderr, "Use cache at %d (%d, %d, %d, %d, %d)\n", (pos), index, mask, key, cache_index, p - pstart); */\ goto fail;\ - } else {\ - /* fprintf(stderr, "Cache at %d (%d, %d, %d, %d, %d)\n", (pos), index, mask, key, cache_index, p - pstart); */\ }\ (match_cache)[index] |= mask;\ }\ @@ -1878,11 +1884,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif #ifdef USE_CACHE_MATCH_OPT UChar *pstart = reg->p; - int num_fail = 0; - int enable_cache_match_opt = 0; - int num_cache_opcode = NUM_CACHE_OPCODE_UNINIT; - UChar** cache_index_table = (UChar **)0; /* array of pointer to p (regex program) */ - uint8_t *match_cache = (uint8_t *)0; #endif #if USE_TOKEN_THREADED_VM @@ -3267,7 +3268,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(enable_cache_match_opt, pbegin, num_cache_opcode, cache_index_table, s - sstart, match_cache); + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3599,35 +3600,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++num_fail == (int)(end - sstart) + 1 && num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - enable_cache_match_opt = 1; - if (num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - num_cache_opcode = count_num_cache_opcode(reg); + if (++msa->num_fail == (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + msa->enable_cache_match_opt = 1; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + msa->num_cache_opcode = count_num_cache_opcode(reg); } - if (num_cache_opcode == NUM_CACHE_OPCODE_FAIL || num_cache_opcode == 0) { - enable_cache_match_opt = 0; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - if (cache_index_table == NULL) { - UChar **table = xmalloc(num_cache_opcode * sizeof(UChar*)); + if (msa->cache_index_table == NULL) { + UChar **table = xmalloc(msa->num_cache_opcode * sizeof(UChar*)); if (table == NULL) { - enable_cache_match_opt = 0; + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } init_cache_index_table(reg, table); - cache_index_table = table; + msa->cache_index_table = table; } // TODO: check arithemetic overflow. - int match_cache_size8 = num_cache_opcode * ((int)(end - sstart) + 1); + int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); - // fprintf(stderr, "match_cache_size8: %d, match_cache_size: %d\n", match_cache_size8, match_cache_size); - match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); - if (match_cache == NULL) { - enable_cache_match_opt = 0; + msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); + if (msa->match_cache == NULL) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - xmemset(match_cache, 0, match_cache_size * sizeof(uint8_t)); - /* fprintf(stderr, "enable cache match opt\n"); */ + xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); } fail_match_cache_opt: #endif @@ -3650,31 +3649,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, finish: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); - if (cache_index_table) xfree(cache_index_table); - if (match_cache) xfree(match_cache); return best_len; #ifdef ONIG_DEBUG stack_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); - if (cache_index_table) xfree(cache_index_table); - if (match_cache) xfree(match_cache); return ONIGERR_STACK_BUG; #endif bytecode_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); - if (cache_index_table) xfree(cache_index_table); - if (match_cache) xfree(match_cache); return ONIGERR_UNDEFINED_BYTECODE; unexpected_bytecode_error: STACK_SAVE; if (xmalloc_base) xfree(xmalloc_base); - if (cache_index_table) xfree(cache_index_table); - if (match_cache) xfree(match_cache); return ONIGERR_UNEXPECTED_BYTECODE; } diff --git a/regint.h b/regint.h index 1f0d815db03eb7..c310946886dbae 100644 --- a/regint.h +++ b/regint.h @@ -893,6 +893,13 @@ typedef struct { #else uint64_t end_time; #endif +#ifdef USE_CACHE_MATCH_OPT + int num_fail; + int enable_cache_match_opt; + int num_cache_opcode; + UChar** cache_index_table; /* array of pointer to p (regex program) */ + uint8_t *match_cache; +#endif } OnigMatchArg; From 8c9e4d37a3be68574e0a84d2321d694b3f87793f Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 4 Oct 2022 11:20:49 +0900 Subject: [PATCH 044/104] Fix look-around like operators and cclass --- regexec.c | 68 ++++++++++++++++++++----------------------------------- 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/regexec.c b/regexec.c index 70fd1a63a6d2a0..e098cc734a273e 100644 --- a/regexec.c +++ b/regexec.c @@ -285,9 +285,13 @@ int count_num_cache_opcode(regex_t* reg) p += SIZE_BITSET; break; case OP_CCLASS_MB: case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS_MIX: case OP_CCLASS_MIX_NOT: - GET_LENGTH_INC(len, p); p += len; break; + p += SIZE_BITSET; + GET_LENGTH_INC(len, p); + p += len; + break; case OP_ANYCHAR: case OP_ANYCHAR_ML: @@ -373,33 +377,17 @@ int count_num_cache_opcode(regex_t* reg) case OP_PUSH_POS: case OP_POP_POS: - break; case OP_PUSH_POS_NOT: - p += SIZE_RELADDR; break; case OP_FAIL_POS: - break; case OP_PUSH_STOP_BT: case OP_POP_STOP_BT: - return NUM_CACHE_OPCODE_FAIL; case OP_LOOK_BEHIND: - /* GET_LENGTH_INC(len, p); break; */ - return NUM_CACHE_OPCODE_FAIL; case OP_PUSH_LOOK_BEHIND_NOT: - // Since optimization assumes a string offset does not back, - // we cannot optimize look-behind opcodes. - /* - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(len, p); - break; - */ - return NUM_CACHE_OPCODE_FAIL; case OP_FAIL_LOOK_BEHIND_NOT: - return NUM_CACHE_OPCODE_FAIL; case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: - break; case OP_ABSENT: - p += SIZE_RELADDR; break; + return NUM_CACHE_OPCODE_FAIL; case OP_CALL: case OP_RETURN: @@ -427,6 +415,7 @@ int count_num_cache_opcode(regex_t* reg) void init_cache_index_table(regex_t* reg, UChar **table) { + UChar** tstart = table; UChar* pbegin; UChar* p = reg->p; UChar* pend = p + reg->used; @@ -472,11 +461,14 @@ void init_cache_index_table(regex_t* reg, UChar **table) case OP_CCLASS: case OP_CCLASS_NOT: p += SIZE_BITSET; break; - case OP_CCLASS_MB: case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS_MIX: case OP_CCLASS_MIX_NOT: - GET_LENGTH_INC(len, p); p += len; break; + p += SIZE_BITSET; + GET_LENGTH_INC(len, p); + p += len; + break; case OP_ANYCHAR: case OP_ANYCHAR_ML: @@ -564,33 +556,17 @@ void init_cache_index_table(regex_t* reg, UChar **table) case OP_PUSH_POS: case OP_POP_POS: - break; case OP_PUSH_POS_NOT: - p += SIZE_RELADDR; break; case OP_FAIL_POS: - break; case OP_PUSH_STOP_BT: case OP_POP_STOP_BT: - return; case OP_LOOK_BEHIND: - /* GET_LENGTH_INC(len, p); break; */ - return; case OP_PUSH_LOOK_BEHIND_NOT: - // Since optimization assumes a string offset does not back, - // we cannot optimize look-behind opcodes. - /* - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(len, p); - break; - */ - return; case OP_FAIL_LOOK_BEHIND_NOT: - return; case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: - break; case OP_ABSENT: - p += SIZE_RELADDR; break; + return; case OP_CALL: case OP_RETURN: @@ -1096,13 +1072,19 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\ if (enable) {\ int cache_index = find_cache_index_table((table), (num_cache_table), (p));\ - int key = (num_cache_table) * (pos) + cache_index;\ - int index = key >> 3;\ - int mask = 1 << (key & 7);\ - if ((match_cache)[index] & mask) {\ - goto fail;\ + if (cache_index >= 0) {\ + int key = (num_cache_table) * (pos) + cache_index;\ + int index = key >> 3;\ + int mask = 1 << (key & 7);\ + if ((match_cache)[index] & mask) {\ + /*fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\ + goto fail;\ + }\ + /*fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\ + (match_cache)[index] |= mask;\ + } else {\ + /*fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index);*/\ }\ - (match_cache)[index] |= mask;\ }\ } while (0) From d84edce689b8634d9a627a2b73434b9115dd5c72 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 4 Oct 2022 12:28:55 +0900 Subject: [PATCH 045/104] Increment num_fail on OP_POP too --- regexec.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/regexec.c b/regexec.c index e098cc734a273e..16338716479d99 100644 --- a/regexec.c +++ b/regexec.c @@ -1077,13 +1077,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ - /*fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\ + /* fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask); */\ goto fail;\ }\ - /*fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\ + /* fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask); */\ (match_cache)[index] |= mask;\ } else {\ - /*fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index);*/\ + /* fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index); */\ }\ }\ } while (0) @@ -3291,6 +3291,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_POP) MOP_IN(OP_POP); STACK_POP_ONE; + /* We need to increment num_fail here, for invoking a cache optimization correctly, */ + /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */ + msa->num_fail++; MOP_OUT; JUMP; @@ -3582,11 +3585,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++msa->num_fail == (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { msa->enable_cache_match_opt = 1; if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { msa->num_cache_opcode = count_num_cache_opcode(reg); } + // fprintf(stderr, "num_cache_opcode: %d\n", msa->num_cache_opcode); if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; @@ -3599,6 +3603,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } init_cache_index_table(reg, table); msa->cache_index_table = table; + /* + fprintf(stderr, "table = {%p", table); + for (int i = 1; i < msa->num_cache_opcode; i++) { + fprintf(stderr, ", %p", table+i); + } + fprintf(stderr, "}\n"); + */ } // TODO: check arithemetic overflow. int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); From aefb7e5fa5f60d6ad9a6cf56bbf9ac950cee302e Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 4 Oct 2022 14:36:09 +0900 Subject: [PATCH 046/104] Add static declaration to new functions --- regexec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/regexec.c b/regexec.c index 16338716479d99..07e5ed28c6aec3 100644 --- a/regexec.c +++ b/regexec.c @@ -235,7 +235,7 @@ onig_get_capture_tree(OnigRegion* region) /* count number of jump-like opcodes for allocation of cache memory. */ /* return -1 if we cannot optimize the regex matching by using cache. */ -int count_num_cache_opcode(regex_t* reg) +static int count_num_cache_opcode(regex_t* reg) { int num = 0; UChar* pbegin; @@ -413,7 +413,7 @@ int count_num_cache_opcode(regex_t* reg) return num; } -void init_cache_index_table(regex_t* reg, UChar **table) +static void init_cache_index_table(regex_t* reg, UChar **table) { UChar** tstart = table; UChar* pbegin; @@ -590,7 +590,7 @@ void init_cache_index_table(regex_t* reg, UChar **table) } } -int find_cache_index_table(UChar** table, int num_cache_table, UChar* p) +static int find_cache_index_table(UChar** table, int num_cache_table, UChar* p) { int l = 0, r = num_cache_table - 1, m; From cbabba9c82f3401a67d470ee6ef9213cb6161f57 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 6 Oct 2022 17:11:33 +0900 Subject: [PATCH 047/104] Add index to the latest NULL_CHECK_STACK for fast matching --- regexec.c | 21 +++++++++++++++++---- regint.h | 1 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/regexec.c b/regexec.c index 07e5ed28c6aec3..f54efea774a04f 100644 --- a/regexec.c +++ b/regexec.c @@ -978,6 +978,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_TYPE(stack_type) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ STACK_INC;\ } while(0) @@ -1047,6 +1048,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ @@ -1056,6 +1058,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, # define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.state.pcode = (pat);\ STACK_INC;\ } while(0) @@ -1091,6 +1094,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_REPEAT(id, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.repeat.num = (id);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ @@ -1100,6 +1104,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_REPEAT_INC(sindex) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT_INC;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.repeat_inc.si = (sindex);\ STACK_INC;\ } while(0) @@ -1107,6 +1112,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.mem.num = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ @@ -1119,6 +1125,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.mem.num = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ @@ -1130,6 +1137,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.mem.num = (mnum);\ STACK_INC;\ } while(0) @@ -1171,6 +1179,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_NULL_CHECK_START;\ + stk->null_check = (OnigStackIndex)(stk - stk_base);\ stk->u.null_check.num = (cnum);\ stk->u.null_check.pstr = (s);\ STACK_INC;\ @@ -1179,6 +1188,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ stk->type = STK_NULL_CHECK_END;\ + stk->null_check = (OnigStackIndex)(stk - stk_base);\ stk->u.null_check.num = (cnum);\ STACK_INC;\ } while(0) @@ -1186,6 +1196,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_CALL_FRAME(pat) do {\ STACK_ENSURE(1);\ stk->type = STK_CALL_FRAME;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.call_frame.ret_addr = (pat);\ STACK_INC;\ } while(0) @@ -1193,12 +1204,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_RETURN do {\ STACK_ENSURE(1);\ stk->type = STK_RETURN;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ STACK_INC;\ } while(0) #define STACK_PUSH_ABSENT_POS(start, end) do {\ STACK_ENSURE(1);\ stk->type = STK_ABSENT_POS;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ stk->u.absent_pos.abs_pstr = (start);\ stk->u.absent_pos.end_pstr = (end);\ STACK_INC;\ @@ -1362,7 +1375,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ + OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ @@ -1377,7 +1390,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ int level = 0;\ - OnigStackType* k = stk;\ + OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ @@ -1397,7 +1410,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ + OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ @@ -1437,7 +1450,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ int level = 0;\ - OnigStackType* k = stk;\ + OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ diff --git a/regint.h b/regint.h index c310946886dbae..9642389936e9e3 100644 --- a/regint.h +++ b/regint.h @@ -828,6 +828,7 @@ typedef intptr_t OnigStackIndex; typedef struct _OnigStackType { unsigned int type; + OnigStackIndex null_check; union { struct { UChar *pcode; /* byte code position */ From 70f8e9efda017420b1a9503763824b87b748d3d5 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 12 Oct 2022 16:51:00 +0900 Subject: [PATCH 048/104] Enable optimization for ANYCHAR_STAR opcodes --- regexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/regexec.c b/regexec.c index f54efea774a04f..eb5ccb98087c48 100644 --- a/regexec.c +++ b/regexec.c @@ -2579,6 +2579,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2591,6 +2592,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2609,6 +2611,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -2624,6 +2627,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); From 1134fa40ee65be5b74c05e38a29ef4ac353d5763 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 12 Oct 2022 17:51:44 +0900 Subject: [PATCH 049/104] Enable optimization for PUSH_IF/OR opcodes --- regexec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/regexec.c b/regexec.c index eb5ccb98087c48..0b2840acdb032f 100644 --- a/regexec.c +++ b/regexec.c @@ -3319,6 +3319,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3332,6 +3333,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; + DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; From 3c797319810ca86f1f3b0e556f6ac04d56b39c76 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 19 Oct 2022 16:28:30 +0900 Subject: [PATCH 050/104] Fix to compile when USE_CACHE_MATCH_OPT is disabled --- regexec.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/regexec.c b/regexec.c index 0b2840acdb032f..5555b5c7267cf1 100644 --- a/regexec.c +++ b/regexec.c @@ -787,6 +787,23 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) #define STK_MASK_TO_VOID_TARGET 0x10ff #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ +#ifdef USE_CACHE_MATCH_OPT +#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\ + (msa).enable_cache_match_opt = 0;\ + (msa).num_fail = 0;\ + (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ + (msa).cache_index_table = (UChar **)0;\ + (msa).match_cache = (uint8_t *)0;\ +} while(0) +#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ + if ((msa).cache_index_table) xfree((msa).cache_index_table);\ + if ((msa).match_cache) xfree((msa).match_cache);\ +} while(0) +#else +#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) +#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) +#endif + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ (msa).stack_p = (void* )0;\ @@ -797,11 +814,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) (msa).best_len = ONIG_MISMATCH;\ (msa).counter = 0;\ (msa).end_time = 0;\ - (msa).enable_cache_match_opt = 0;\ - (msa).num_fail = 0;\ - (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ - (msa).cache_index_table = (UChar **)0;\ - (msa).match_cache = (uint8_t *)0;\ + MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\ } while(0) #else # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ @@ -812,6 +825,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) (msa).gpos = (arg_gpos);\ (msa).counter = 0;\ (msa).end_time = 0;\ + MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\ } while(0) #endif @@ -850,12 +864,12 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ if ((msa).state_check_buff) xfree((msa).state_check_buff);\ }\ + MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\ } while(0) #else /* USE_COMBINATION_EXPLOSION_CHECK */ # define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).cache_index_table) xfree((msa).cache_index_table);\ - if ((msa).match_cache) xfree((msa).match_cache);\ + MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\ } while (0) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ @@ -1072,6 +1086,8 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) +#ifdef USE_CACHE_MATCH_OPT + #define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\ if (enable) {\ int cache_index = find_cache_index_table((table), (num_cache_table), (p));\ @@ -1091,6 +1107,10 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) +#else +#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) +#endif /* USE_CACHE_MATCH_OPT */ + #define STACK_PUSH_REPEAT(id, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ @@ -3310,7 +3330,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_POP_ONE; /* We need to increment num_fail here, for invoking a cache optimization correctly, */ /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */ +#ifdef USE_CACHE_MATCH_OPT msa->num_fail++; +#endif MOP_OUT; JUMP; From ea3d9893bf4d6c9b6016d5f7fe5a6cf820376e53 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 19 Oct 2022 17:28:26 +0900 Subject: [PATCH 051/104] Reduce warnings --- regexec.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/regexec.c b/regexec.c index 5555b5c7267cf1..402e116bbd9ed1 100644 --- a/regexec.c +++ b/regexec.c @@ -238,15 +238,12 @@ onig_get_capture_tree(OnigRegion* region) static int count_num_cache_opcode(regex_t* reg) { int num = 0; - UChar* pbegin; UChar* p = reg->p; UChar* pend = p + reg->used; LengthType len; - RelAddrType addr; OnigEncoding enc = reg->enc; while (p < pend) { - pbegin = p; switch (*p++) { case OP_FINISH: case OP_END: @@ -350,10 +347,10 @@ static int count_num_cache_opcode(regex_t* reg) case OP_FAIL: break; case OP_JUMP: - GET_RELADDR_INC(addr, p); + p += SIZE_RELADDR; break; case OP_PUSH: - GET_RELADDR_INC(addr, p); + p += SIZE_RELADDR; num++; break; case OP_POP: @@ -415,12 +412,10 @@ static int count_num_cache_opcode(regex_t* reg) static void init_cache_index_table(regex_t* reg, UChar **table) { - UChar** tstart = table; UChar* pbegin; UChar* p = reg->p; UChar* pend = p + reg->used; LengthType len; - RelAddrType addr; OnigEncoding enc = reg->enc; while (p < pend) { @@ -529,10 +524,10 @@ static void init_cache_index_table(regex_t* reg, UChar **table) case OP_FAIL: break; case OP_JUMP: - GET_RELADDR_INC(addr, p); + p += SIZE_RELADDR; break; case OP_PUSH: - GET_RELADDR_INC(addr, p); + p += SIZE_RELADDR; *table++ = pbegin; break; case OP_POP: @@ -793,7 +788,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) (msa).num_fail = 0;\ (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ (msa).cache_index_table = (UChar **)0;\ - (msa).match_cache = (uint8_t *)0;\ + (msa).match_cache = (uint8_t *)0;\ } while(0) #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ if ((msa).cache_index_table) xfree((msa).cache_index_table);\ @@ -1092,17 +1087,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, if (enable) {\ int cache_index = find_cache_index_table((table), (num_cache_table), (p));\ if (cache_index >= 0) {\ - int key = (num_cache_table) * (pos) + cache_index;\ + int key = (num_cache_table) * (int)(pos) + cache_index;\ int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ - /* fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask); */\ goto fail;\ }\ - /* fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask); */\ (match_cache)[index] |= mask;\ - } else {\ - /* fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index); */\ }\ }\ } while (0) @@ -1897,9 +1888,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unsigned char* state_check_buff = msa->state_check_buff; int num_comb_exp_check = reg->num_comb_exp_check; #endif -#ifdef USE_CACHE_MATCH_OPT - UChar *pstart = reg->p; -#endif #if USE_TOKEN_THREADED_VM # define OP_OFFSET 1 @@ -3631,7 +3619,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { msa->num_cache_opcode = count_num_cache_opcode(reg); } - // fprintf(stderr, "num_cache_opcode: %d\n", msa->num_cache_opcode); if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; @@ -3644,13 +3631,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } init_cache_index_table(reg, table); msa->cache_index_table = table; - /* - fprintf(stderr, "table = {%p", table); - for (int i = 1; i < msa->num_cache_opcode; i++) { - fprintf(stderr, ", %p", table+i); - } - fprintf(stderr, "}\n"); - */ } // TODO: check arithemetic overflow. int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); From f25bb291b42a45d23cfc8658720c62e1f3a7390f Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 20 Oct 2022 16:52:23 +0900 Subject: [PATCH 052/104] Support OP_REPEAT and OP_REPEAT_INC --- include/ruby/onigmo.h | 2 + regexec.c | 218 ++++++++++++++++++++++++++++++++++-------- regint.h | 19 +++- 3 files changed, 196 insertions(+), 43 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index d71dfb80fb03a0..703f38f5907153 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,6 +744,8 @@ typedef struct { typedef struct { int lower; int upper; + int base_num; + int inner_num; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index 402e116bbd9ed1..4baf8d32b31ba6 100644 --- a/regexec.c +++ b/regexec.c @@ -235,12 +235,15 @@ onig_get_capture_tree(OnigRegion* region) /* count number of jump-like opcodes for allocation of cache memory. */ /* return -1 if we cannot optimize the regex matching by using cache. */ -static int count_num_cache_opcode(regex_t* reg) +static int count_num_cache_opcode(regex_t* reg, int* table_size) { int num = 0; UChar* p = reg->p; UChar* pend = p + reg->used; LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int current_mem_num = 0; OnigEncoding enc = reg->enc; while (p < pend) { @@ -295,10 +298,10 @@ static int count_num_cache_opcode(regex_t* reg) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num++; break; + num++; *table_size += 1; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num++; break; + p++; num++; *table_size += 1; break; case OP_WORD: case OP_NOT_WORD: @@ -352,19 +355,54 @@ static int count_num_cache_opcode(regex_t* reg) case OP_PUSH: p += SIZE_RELADDR; num++; + *table_size += 1; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num++; break; + p += SIZE_RELADDR + 1; num++; *table_size += 1; break; case OP_REPEAT: case OP_REPEAT_NG: + if (current_mem != -1) { + // A nested OP_REPEAT is not yet supported. + return NUM_CACHE_OPCODE_FAIL; + } + GET_MEMNUM_INC(mem, p); + p += SIZE_RELADDR; + if (reg->repeat_range[mem].lower == 0) { + num++; + *table_size += 1; + } + reg->repeat_range[mem].base_num = num; + current_mem = mem; + current_mem_num = num; + break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: + GET_MEMNUM_INC(mem, p); + //fprintf(stderr, "OP_REPEAT %d\n", mem); + if (mem != current_mem) { + // A lone or invalid OP_REPEAT_INC is found. + return NUM_CACHE_OPCODE_FAIL; + } + { + int inner_num = num - current_mem_num; + OnigRepeatRange *repeat_range = ®->repeat_range[mem]; + repeat_range->inner_num = inner_num; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); + if (repeat_range->lower < repeat_range->upper) { + *table_size += 1; + } + current_mem = -1; + current_mem_num = 0; + } + break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: - // TODO: support OP_REPEAT opcodes. + // TODO: Support nested OP_REPEAT. return NUM_CACHE_OPCODE_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: @@ -410,12 +448,16 @@ static int count_num_cache_opcode(regex_t* reg) return num; } -static void init_cache_index_table(regex_t* reg, UChar **table) +static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) { UChar* pbegin; UChar* p = reg->p; UChar* pend = p + reg->used; LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int num = 0; + int current_mem_num = 0; OnigEncoding enc = reg->enc; while (p < pend) { @@ -470,11 +512,20 @@ static void init_cache_index_table(regex_t* reg, UChar **table) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - *table++ = pbegin; break; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; + break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - *table++ = pbegin; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_WORD: @@ -528,17 +579,55 @@ static void init_cache_index_table(regex_t* reg, UChar **table) break; case OP_PUSH: p += SIZE_RELADDR; - *table++ = pbegin; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; *table++ = pbegin; break; + p += SIZE_RELADDR + 1; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; + break; case OP_REPEAT: case OP_REPEAT_NG: + GET_MEMNUM_INC(mem, p); + p += SIZE_RELADDR; + if (reg->repeat_range[mem].lower == 0) { + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + num++; + table++; + } + current_mem = mem; + current_mem_num = num; + break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: + GET_MEMNUM_INC(mem, p); + { + int inner_num = num - current_mem_num; + OnigRepeatRange *repeat_range = ®->repeat_range[mem]; + if (repeat_range->lower < repeat_range->upper) { + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + table++; + } + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + current_mem = -1; + current_mem_num = 0; + } + break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: support OP_REPEAT opcodes. @@ -584,20 +673,6 @@ static void init_cache_index_table(regex_t* reg, UChar **table) } } } - -static int find_cache_index_table(UChar** table, int num_cache_table, UChar* p) -{ - int l = 0, r = num_cache_table - 1, m; - - while (l <= r) { - m = (l + r) / 2; - if (table[m] == p) return m; - if (table[m] < p) l = m + 1; - else r = m - 1; - } - - return -1; -} #endif /* USE_MATCH_CACHE */ extern void @@ -787,7 +862,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) (msa).enable_cache_match_opt = 0;\ (msa).num_fail = 0;\ (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ - (msa).cache_index_table = (UChar **)0;\ + (msa).cache_index_table = (OnigCacheIndex *)0;\ (msa).match_cache = (uint8_t *)0;\ } while(0) #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ @@ -1083,23 +1158,70 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #ifdef USE_CACHE_MATCH_OPT -#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\ +#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ if (enable) {\ - int cache_index = find_cache_index_table((table), (num_cache_table), (p));\ + int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ if (cache_index >= 0) {\ - int key = (num_cache_table) * (int)(pos) + cache_index;\ + int key = (num_cache_size) * (int)(pos) + cache_index;\ int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ + /*fprintf(stderr, "Use cache (p = %p, cache_index = %d, pos = %d, key = %d)\n", p, cache_index, pos, key);*/\ goto fail;\ }\ + /*fprintf(stderr, "Add cache (p = %p, cache_index = %d, pos = %d, key = %d)\n", p, cache_index, pos, key);*/\ (match_cache)[index] |= mask;\ }\ }\ } while (0) + +static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) +{ + int l = 0, r = num_cache_table - 1, m; + OnigCacheIndex* item; + OnigRepeatRange* range; + OnigStackType *stkp; + int count = 0; + int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG; + + while (l <= r) { + m = (l + r) / 2; + if (table[m].addr == p) break; + if (table[m].addr < p) l = m + 1; + else r = m - 1; + } + + if (!(0 <= m && m < num_cache_table && table[m].addr == p)) { + return -1; + } + + item = &table[m]; + //fprintf(stderr, "m = %d, outer_repeat = %d, num = %d\n", item->outer_repeat, item->num); + if (item->outer_repeat == -1) { + return item->num; + } + + range = ®->repeat_range[item->outer_repeat]; + //fprintf(stderr, "inner_num = %d, lower = %d, upper = %d\n", range->inner_num, range->lower, range->upper); + + stkp = &stk[repeat_stk[item->outer_repeat]]; + count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; + //fprintf(stderr, "count = %d\n", count); + + if (count < range->lower) { + return range->base_num + range->inner_num * count + item->num; + } + + if (range->upper == 0x7fffffff) { + return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num; + } + + return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; +} + #else -#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) +#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) #endif /* USE_CACHE_MATCH_OPT */ #define STACK_PUSH_REPEAT(id, pat) do {\ @@ -2587,7 +2709,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2600,7 +2722,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2619,7 +2741,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -2635,7 +2757,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -3275,7 +3397,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3329,7 +3451,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3343,7 +3465,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; - DO_CACHE_MATCH_OPT(msa->enable_cache_match_opt, pbegin, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3362,6 +3484,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); } } @@ -3378,6 +3501,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); p += addr; } @@ -3396,6 +3520,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* end of repeat. Nothing to do. */ } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + if (*pbegin == OP_REPEAT_INC) { + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + } STACK_PUSH_ALT(p, s, sprev, pkeep); p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ } @@ -3426,6 +3553,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* pcode = stkp->u.repeat.pcode; STACK_PUSH_REPEAT_INC(si); + if (*pbegin == OP_REPEAT_INC_NG) { + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + } STACK_PUSH_ALT(pcode, s, sprev, pkeep); } else { @@ -3615,22 +3745,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_CACHE_MATCH_OPT if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + int table_size = 0; msa->enable_cache_match_opt = 1; if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - msa->num_cache_opcode = count_num_cache_opcode(reg); + msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); } if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } if (msa->cache_index_table == NULL) { - UChar **table = xmalloc(msa->num_cache_opcode * sizeof(UChar*)); + OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); if (table == NULL) { msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } init_cache_index_table(reg, table); msa->cache_index_table = table; + msa->num_cache_table = table_size; } // TODO: check arithemetic overflow. int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); @@ -3641,6 +3773,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail_match_cache_opt; } xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); + + /* + fprintf(stderr, "total_cache = %d\n", msa->num_cache_opcode); + fprintf(stderr, "table_size = %d\n", msa->num_cache_table); + fprintf(stderr, "table = {\n"); + for (int i = 0; i < msa->num_cache_table; i++) { + fprintf(stderr, " {%p, %d, %d},\n", msa->cache_index_table[i].addr, msa->cache_index_table[i].num, msa->cache_index_table[i].outer_repeat); + } + fprintf(stderr, "}\n"); + */ } fail_match_cache_opt: #endif diff --git a/regint.h b/regint.h index 9642389936e9e3..12b5d5c70a5544 100644 --- a/regint.h +++ b/regint.h @@ -872,6 +872,14 @@ typedef struct _OnigStackType { } u; } OnigStackType; +#ifdef USE_CACHE_MATCH_OPT +typedef struct { + UChar *addr; + int num; + int outer_repeat; +} OnigCacheIndex; +#endif + typedef struct { void* stack_p; size_t stack_n; @@ -895,11 +903,12 @@ typedef struct { uint64_t end_time; #endif #ifdef USE_CACHE_MATCH_OPT - int num_fail; - int enable_cache_match_opt; - int num_cache_opcode; - UChar** cache_index_table; /* array of pointer to p (regex program) */ - uint8_t *match_cache; + int num_fail; + int enable_cache_match_opt; + int num_cache_opcode; + int num_cache_table; + OnigCacheIndex *cache_index_table; + uint8_t *match_cache; #endif } OnigMatchArg; From 37613fea1657b1a0732501657275bc03e8e0ebc4 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 27 Oct 2022 22:39:56 +0900 Subject: [PATCH 053/104] Clear cache on OP_NULL_CHECK_END_MEMST --- regexec.c | 112 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 95 insertions(+), 17 deletions(-) diff --git a/regexec.c b/regexec.c index 4baf8d32b31ba6..fd308d1883ef33 100644 --- a/regexec.c +++ b/regexec.c @@ -1175,7 +1175,6 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) - static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) { int l = 0, r = num_cache_table - 1, m; @@ -1197,17 +1196,14 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd } item = &table[m]; - //fprintf(stderr, "m = %d, outer_repeat = %d, num = %d\n", item->outer_repeat, item->num); if (item->outer_repeat == -1) { return item->num; } range = ®->repeat_range[item->outer_repeat]; - //fprintf(stderr, "inner_num = %d, lower = %d, upper = %d\n", range->inner_num, range->lower, range->upper); stkp = &stk[repeat_stk[item->outer_repeat]]; count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; - //fprintf(stderr, "count = %d\n", count); if (count < range->lower) { return range->base_num + range->inner_num * count + item->num; @@ -1220,6 +1216,64 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; } +static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { + int l = 0, r = num_cache_table - 1, m1, m2; + int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; + OnigCacheIndex *item1, *item2; + int k1, k2; + + while (l <= r) { + m1 = (l + r) / 2; + if (table[m1].addr == pbegin) break; + if (table[m1].addr < pbegin) l = m1 + 1; + else r = m1 - 1; + } + + l = 0, r = num_cache_table - 1; + while (l <= r) { + m2 = (l + r) / 2; + if (table[m2].addr == pend) break; + if (table[m2].addr < pend) l = m2 + 1; + else r = m2 - 1; + } + + if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++; + if (table[m2].addr > pend && m2 - 1 > 0) m2--; + + item1 = &table[m1]; + item2 = &table[m2]; + + if (item1->outer_repeat < 0) k1 = item1->num; + else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num; + + if (item2->outer_repeat < 0) k2 = item2->num; + else { + OnigRepeatRange *range = ®->repeat_range[item2->outer_repeat]; + if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num; + else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num; + } + + int base = pos * num_cache_size; + k1 += base; + k2 += base; + + if ((k1 >> 3) == (k2 >> 3)) { + match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); + } else { + int i = k1 >> 3; + if (k1 & 7) { + match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1; + i++; + } + if (i < (k2 >> 3)) { + xmemset(&match_cache[i], 0, (k2 >> 3) - i); + if (k2 & 7) { + match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); + } + } + } +} + #else #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) #endif /* USE_CACHE_MATCH_OPT */ @@ -1542,7 +1596,7 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd }\ } while(0) -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ +#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ @@ -1559,14 +1613,14 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd while (k < stk) {\ if (k->type == STK_MEM_START) {\ if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ + (isnull) = 0; (ischange) = 1; break;\ }\ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ + (isnull) = 0; (ischange) = 1; break;\ }\ else if (endp != s) {\ (isnull) = -1; /* empty, but position changed */ \ @@ -2709,7 +2763,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2722,7 +2776,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2757,7 +2811,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -3344,9 +3398,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; + int ischanged = 0; // set 1 when a loop is empty but memory status is changed. GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); if (isnull) { # ifdef ONIG_DEBUG_MATCH fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", @@ -3355,6 +3410,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (isnull == -1) goto fail; goto null_check_found; } +# ifdef USE_CACHE_MATCH_OPT + if (ischanged && msa->enable_cache_match_opt) { + RelAddrType rel; + OnigUChar *addr; + int mem; + UChar* tmp = p; + switch (*tmp++) { + case OP_JUMP: + case OP_PUSH: + GET_RELADDR_INC(rel, tmp); + addr = tmp + rel; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + GET_MEMNUM_INC(mem, tmp); + addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode; + break; + default: + goto unexpected_bytecode_error; + } + reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); + } +# endif } MOP_OUT; JUMP; @@ -3397,7 +3475,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3451,7 +3529,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3465,7 +3543,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3501,7 +3579,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); p += addr; } @@ -3521,7 +3599,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { if (*pbegin == OP_REPEAT_INC) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(p, s, sprev, pkeep); p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ @@ -3554,7 +3632,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(si); if (*pbegin == OP_REPEAT_INC_NG) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(pcode, s, sprev, pkeep); } From ff2998a86c5f4238462715eaf6b6bfd70519349b Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Sun, 6 Nov 2022 21:01:10 +0900 Subject: [PATCH 054/104] Remove debug printf --- regexec.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/regexec.c b/regexec.c index fd308d1883ef33..0bd4c8a96c9756 100644 --- a/regexec.c +++ b/regexec.c @@ -1166,10 +1166,8 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ - /*fprintf(stderr, "Use cache (p = %p, cache_index = %d, pos = %d, key = %d)\n", p, cache_index, pos, key);*/\ goto fail;\ }\ - /*fprintf(stderr, "Add cache (p = %p, cache_index = %d, pos = %d, key = %d)\n", p, cache_index, pos, key);*/\ (match_cache)[index] |= mask;\ }\ }\ @@ -3851,16 +3849,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail_match_cache_opt; } xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); - - /* - fprintf(stderr, "total_cache = %d\n", msa->num_cache_opcode); - fprintf(stderr, "table_size = %d\n", msa->num_cache_table); - fprintf(stderr, "table = {\n"); - for (int i = 0; i < msa->num_cache_table; i++) { - fprintf(stderr, " {%p, %d, %d},\n", msa->cache_index_table[i].addr, msa->cache_index_table[i].num, msa->cache_index_table[i].outer_repeat); - } - fprintf(stderr, "}\n"); - */ } fail_match_cache_opt: #endif From 80d145fa52e9077dc7812a94f3fc41208fc58bd7 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Mon, 7 Nov 2022 09:03:12 +0900 Subject: [PATCH 055/104] Fix and add regexp tests --- test/ruby/test_regexp.rb | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 3d3cdbb46e2054..5daf67a4365d0a 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1589,7 +1589,7 @@ def test_s_timeout t = Time.now assert_raise_with_message(Regexp::TimeoutError, "regexp match timeout") do # A typical ReDoS case - /^(a*)*$/ =~ "a" * 1000000 + "x" + /^(a*)*\1$/ =~ "a" * 1000000 + "x" end t = Time.now - t @@ -1631,7 +1631,7 @@ def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeo Regexp.timeout = global_timeout - re = Regexp.new("^a*b?a*$", timeout: per_instance_timeout) + re = Regexp.new("^(a*)\\1b?a*$", timeout: per_instance_timeout) assert_equal(per_instance_timeout, re.timeout) t = Time.now @@ -1673,4 +1673,24 @@ def test_timeout_corner_cases assert_raise(ArgumentError) { Regexp.new("foo", timeout: -1) } end; end + + def test_cache_optimization_exponential + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + begin; + timeout = EnvUtil.apply_timeout_scale(0.2) + Regexp.timeout = timeout + + assert_nil(/^(a*)*$/ =~ "a" * 1000000 + "x") + end; + end + + def test_cache_optimization_square + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + begin; + timeout = EnvUtil.apply_timeout_scale(0.2) + Regexp.timeout = timeout + + assert_nil(/^a*b?a*$/ =~ "a" * 1000000 + "x") + end; + end end From 71c76d1bfad90c63bcd74fcd2447e52189de8c49 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Mon, 7 Nov 2022 11:52:38 +0900 Subject: [PATCH 056/104] Update timeout seconds --- test/ruby/test_regexp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 5daf67a4365d0a..f11d1fb616372a 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1677,7 +1677,7 @@ def test_timeout_corner_cases def test_cache_optimization_exponential assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") begin; - timeout = EnvUtil.apply_timeout_scale(0.2) + timeout = EnvUtil.apply_timeout_scale(2) Regexp.timeout = timeout assert_nil(/^(a*)*$/ =~ "a" * 1000000 + "x") From 0a14825f731afc468481569d332027abdd58cd4e Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 8 Nov 2022 11:58:47 +0900 Subject: [PATCH 057/104] Update timeout seconds for square test --- test/ruby/test_regexp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index f11d1fb616372a..ee4c2cef5b22cb 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1687,7 +1687,7 @@ def test_cache_optimization_exponential def test_cache_optimization_square assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") begin; - timeout = EnvUtil.apply_timeout_scale(0.2) + timeout = EnvUtil.apply_timeout_scale(2) Regexp.timeout = timeout assert_nil(/^a*b?a*$/ =~ "a" * 1000000 + "x") From 22294731a82642ac63ed6708e0015361d05d6677 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 8 Nov 2022 15:05:52 +0900 Subject: [PATCH 058/104] Refactor field names --- include/ruby/onigmo.h | 5 +- regexec.c | 303 ++++++++++++++++++++---------------------- regint.h | 23 ++-- 3 files changed, 162 insertions(+), 169 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 703f38f5907153..40cbedd4df6a68 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,8 +744,9 @@ typedef struct { typedef struct { int lower; int upper; - int base_num; - int inner_num; + /* These fields are for cache optimization. */ + int base_point; + int inner_point; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index 0bd4c8a96c9756..b9f8411faf13ae 100644 --- a/regexec.c +++ b/regexec.c @@ -233,19 +233,17 @@ onig_get_capture_tree(OnigRegion* region) #ifdef USE_CACHE_MATCH_OPT -/* count number of jump-like opcodes for allocation of cache memory. */ -/* return -1 if we cannot optimize the regex matching by using cache. */ -static int count_num_cache_opcode(regex_t* reg, int* table_size) +static int count_num_cache_index(regex_t* reg) { - int num = 0; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; - MemNumType current_mem = -1; - int current_mem_num = 0; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; OnigEncoding enc = reg->enc; + int num_cache_index = 0; + MemNumType current_repeat = NO_OUTER_REPEAT; + while (p < pend) { switch (*p++) { case OP_FINISH: @@ -298,10 +296,10 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num++; *table_size += 1; break; + num_cache_index++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num++; *table_size += 1; break; + p++; num_cache_index++; break; case OP_WORD: case OP_NOT_WORD: @@ -334,7 +332,7 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -354,56 +352,45 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) break; case OP_PUSH: p += SIZE_RELADDR; - num++; - *table_size += 1; + num_cache_index++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num++; *table_size += 1; break; + p += SIZE_RELADDR + 1; num_cache_index++; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_mem != -1) { + if (current_repeat != NO_OUTER_REPEAT) { // A nested OP_REPEAT is not yet supported. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; } GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - num++; - *table_size += 1; + num_cache_index++; } - reg->repeat_range[mem].base_num = num; - current_mem = mem; - current_mem_num = num; + current_repeat = mem; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - //fprintf(stderr, "OP_REPEAT %d\n", mem); - if (mem != current_mem) { + if (mem != current_repeat) { // A lone or invalid OP_REPEAT_INC is found. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; } { - int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; - repeat_range->inner_num = inner_num; - num -= inner_num; - num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { - *table_size += 1; + num_cache_index++; } - current_mem = -1; - current_mem_num = 0; + current_repeat = NO_OUTER_REPEAT; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: Support nested OP_REPEAT. - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -422,21 +409,21 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_CALL: case OP_RETURN: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_CONDITION: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_OPCODE_FAIL; + return NUM_CACHE_INDEX_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -445,21 +432,22 @@ static int count_num_cache_opcode(regex_t* reg, int* table_size) } } - return num; + return num_cache_index; } -static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) +static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) { - UChar* pbegin; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; - MemNumType current_mem = -1; - int num = 0; - int current_mem_num = 0; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; OnigEncoding enc = reg->enc; + int num_cache_point = 0; + MemNumType current_repeat = -1; + int current_repeat_base_point = 0; + while (p < pend) { pbegin = p; switch (*p++) { @@ -512,20 +500,20 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_WORD: @@ -559,7 +547,7 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return; + return NUM_CACHE_INDEX_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -579,59 +567,61 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) break; case OP_PUSH: p += SIZE_RELADDR; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: p += SIZE_RELADDR + 1; - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = current_mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; break; case OP_REPEAT: case OP_REPEAT_NG: GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = mem; - num++; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = current_repeat; + num_cache_point++; + cache_index++; } - current_mem = mem; - current_mem_num = num; + reg->repeat_range[mem].base_point = num_cache_point; + current_repeat = mem; + current_repeat_base_point = num_cache_point; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, p); + GET_MEMNUM_INC(mem, p); { - int inner_num = num - current_mem_num; + int inner_point = num_cache_point - current_repeat_base_point; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { - table->addr = pbegin; - table->num = num - current_mem_num; - table->outer_repeat = mem; - table++; + cache_index->addr = pbegin; + cache_index->point = num_cache_point - current_repeat_base_point; + cache_index->outer_repeat = mem; + cache_index++; } - num -= inner_num; - num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - current_mem = -1; - current_mem_num = 0; + repeat_range->inner_point = inner_point; + num_cache_point -= inner_point; + num_cache_point += inner_point * repeat_range->lower + (inner_point + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + current_repeat = NO_OUTER_REPEAT; + current_repeat_base_point = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: support OP_REPEAT opcodes. - return; + return NUM_CACHE_INDEX_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -650,21 +640,21 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return; + return NUM_CACHE_INDEX_FAIL; case OP_CALL: case OP_RETURN: - return; + return NUM_CACHE_INDEX_FAIL; case OP_CONDITION: - return; + return NUM_CACHE_INDEX_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return; + return NUM_CACHE_INDEX_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -672,6 +662,8 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) break; } } + + return num_cache_point; } #endif /* USE_MATCH_CACHE */ @@ -859,15 +851,16 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) #ifdef USE_CACHE_MATCH_OPT #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\ - (msa).enable_cache_match_opt = 0;\ + (msa).enable_cache_opt = 0;\ (msa).num_fail = 0;\ - (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ - (msa).cache_index_table = (OnigCacheIndex *)0;\ - (msa).match_cache = (uint8_t *)0;\ + (msa).num_cache_index = NUM_CACHE_INDEX_UNINIT;\ + (msa).num_cache_point = 0;\ + (msa).cache_index = (OnigCacheIndex *)0;\ + (msa).cache = (uint8_t *)0;\ } while(0) #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ - if ((msa).cache_index_table) xfree((msa).cache_index_table);\ - if ((msa).match_cache) xfree((msa).match_cache);\ + if ((msa).cache_index) xfree((msa).cache_index);\ + if ((msa).cache) xfree((msa).cache);\ } while(0) #else #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) @@ -1160,9 +1153,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ if (enable) {\ - int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ - if (cache_index >= 0) {\ - int key = (num_cache_size) * (int)(pos) + cache_index;\ + int point = find_cache_index((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ + if (point >= 0) {\ + int key = (num_cache_size) * (int)(pos) + point;\ int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ @@ -1173,7 +1166,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) -static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) +static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) { int l = 0, r = num_cache_table - 1, m; OnigCacheIndex* item; @@ -1195,7 +1188,7 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd item = &table[m]; if (item->outer_repeat == -1) { - return item->num; + return item->point; } range = ®->repeat_range[item->outer_repeat]; @@ -1204,21 +1197,21 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; if (count < range->lower) { - return range->base_num + range->inner_num * count + item->num; + return range->base_point + range->inner_point * count + item->point; } if (range->upper == 0x7fffffff) { - return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num; + return range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item->point; } - return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; + return range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (count - range->lower) + item->point; } static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { int l = 0, r = num_cache_table - 1, m1, m2; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; - int k1, k2; + int p1, p2; while (l <= r) { m1 = (l + r) / 2; @@ -1241,32 +1234,32 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, item1 = &table[m1]; item2 = &table[m2]; - if (item1->outer_repeat < 0) k1 = item1->num; - else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num; + if (item1->outer_repeat < 0) p1 = item1->point; + else p1 = reg->repeat_range[item1->outer_repeat].base_point + item1->point; - if (item2->outer_repeat < 0) k2 = item2->num; + if (item2->outer_repeat < 0) p2 = item2->point; else { OnigRepeatRange *range = ®->repeat_range[item2->outer_repeat]; - if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num; - else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num; + if (range->upper == 0x7fffffff) p2 = range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item2->point; + else p2 = range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->point; } int base = pos * num_cache_size; - k1 += base; - k2 += base; + p1 += base; + p2 += base; - if ((k1 >> 3) == (k2 >> 3)) { - match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); + if ((p1 >> 3) == (p2 >> 3)) { + match_cache[p1 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)) | ((1 << (p1 & 7)) - 1); } else { - int i = k1 >> 3; - if (k1 & 7) { - match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1; + int i = p1 >> 3; + if (p1 & 7) { + match_cache[p1 >> 3] &= (1 << ((p1 & 7) - 1)) - 1; i++; } - if (i < (k2 >> 3)) { - xmemset(&match_cache[i], 0, (k2 >> 3) - i); - if (k2 & 7) { - match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); + if (i < (p2 >> 3)) { + xmemset(&match_cache[i], 0, (p2 >> 3) - i); + if (p2 & 7) { + match_cache[p2 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)); } } } @@ -2761,7 +2754,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2774,7 +2767,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2793,7 +2786,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -2809,7 +2802,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -3396,7 +3389,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; - int ischanged = 0; // set 1 when a loop is empty but memory status is changed. + int ischanged = 0; // set 1 when a loop is null but memory status is changed. GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); @@ -3409,7 +3402,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto null_check_found; } # ifdef USE_CACHE_MATCH_OPT - if (ischanged && msa->enable_cache_match_opt) { + if (ischanged && msa->enable_cache_opt) { RelAddrType rel; OnigUChar *addr; int mem; @@ -3428,7 +3421,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, default: goto unexpected_bytecode_error; } - reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); + reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->cache, msa->cache_index, msa->num_cache_index ,msa->num_cache_point); } # endif } @@ -3473,7 +3466,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3527,7 +3520,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3541,7 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3560,7 +3553,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); } } @@ -3577,7 +3570,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); STACK_PUSH_ALT(p, s, sprev, pkeep); p += addr; } @@ -3597,7 +3590,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { if (*pbegin == OP_REPEAT_INC) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); } STACK_PUSH_ALT(p, s, sprev, pkeep); p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ @@ -3630,7 +3623,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(si); if (*pbegin == OP_REPEAT_INC_NG) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); } STACK_PUSH_ALT(pcode, s, sprev, pkeep); } @@ -3820,35 +3813,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - int table_size = 0; - msa->enable_cache_match_opt = 1; - if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); + if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { + msa->enable_cache_opt = 1; + if (msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { + msa->num_cache_index = count_num_cache_index(reg); } - if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { - msa->enable_cache_match_opt = 0; + if (msa->num_cache_index == NUM_CACHE_INDEX_FAIL || msa->num_cache_index == 0) { + msa->enable_cache_opt = 0; goto fail_match_cache_opt; } - if (msa->cache_index_table == NULL) { - OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); + if (msa->cache_index == NULL) { + OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_index * sizeof(OnigCacheIndex)); if (table == NULL) { - msa->enable_cache_match_opt = 0; + msa->enable_cache_opt = 0; goto fail_match_cache_opt; } - init_cache_index_table(reg, table); - msa->cache_index_table = table; - msa->num_cache_table = table_size; + msa->num_cache_point = init_cache_index(reg, table); + msa->cache_index = table; } // TODO: check arithemetic overflow. - int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); - int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); - msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); - if (msa->match_cache == NULL) { - msa->enable_cache_match_opt = 0; + int cache_size8 = msa->num_cache_point * ((int)(end - str) + 1); + int cache_size = (cache_size8 >> 3) + (cache_size8 & 7 ? 1 : 0); + msa->cache = (uint8_t*)xmalloc(cache_size * sizeof(uint8_t)); + if (msa->cache == NULL) { + msa->enable_cache_opt = 0; goto fail_match_cache_opt; } - xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); + xmemset(msa->cache, 0, cache_size * sizeof(uint8_t)); } fail_match_cache_opt: #endif diff --git a/regint.h b/regint.h index 12b5d5c70a5544..e1e48c09513a1d 100644 --- a/regint.h +++ b/regint.h @@ -45,8 +45,8 @@ #define USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT -# define NUM_CACHE_OPCODE_FAIL -1 -# define NUM_CACHE_OPCODE_UNINIT -2 +# define NUM_CACHE_INDEX_FAIL -1 +# define NUM_CACHE_INDEX_UNINIT -2 #endif #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ @@ -874,10 +874,11 @@ typedef struct _OnigStackType { #ifdef USE_CACHE_MATCH_OPT typedef struct { - UChar *addr; - int num; - int outer_repeat; + UChar* addr; /* pointer to corresponding opcode. */ + int point; /* cache point number (in outer repeat if `outer_repeat != -1`) */ + int outer_repeat; /* outer repeat index number */ } OnigCacheIndex; +#define NO_OUTER_REPEAT -1 #endif typedef struct { @@ -903,12 +904,12 @@ typedef struct { uint64_t end_time; #endif #ifdef USE_CACHE_MATCH_OPT - int num_fail; - int enable_cache_match_opt; - int num_cache_opcode; - int num_cache_table; - OnigCacheIndex *cache_index_table; - uint8_t *match_cache; + int num_fail; /* counter of failure (backtrack) number for switching cache optimization. */ + int num_cache_point; /* number of cache point in program */ + int num_cache_index; /* size of cache index array */ + int enable_cache_opt; /* whether cache optimization is enabled */ + OnigCacheIndex* cache_index; /* cache index array for computing cache point number */ + uint8_t* cache; /* bit array for cache optimization */ #endif } OnigMatchArg; From a1c1fc558a0ee791e91a66cae5c9515679890339 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Tue, 8 Nov 2022 15:13:27 +0900 Subject: [PATCH 059/104] Revert "Refactor field names" This reverts commit 1e6673d6bbd2adbf555d82c7c0906ceb148ed6ee. --- include/ruby/onigmo.h | 5 +- regexec.c | 303 ++++++++++++++++++++++-------------------- regint.h | 23 ++-- 3 files changed, 169 insertions(+), 162 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 40cbedd4df6a68..703f38f5907153 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,9 +744,8 @@ typedef struct { typedef struct { int lower; int upper; - /* These fields are for cache optimization. */ - int base_point; - int inner_point; + int base_num; + int inner_num; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index b9f8411faf13ae..0bd4c8a96c9756 100644 --- a/regexec.c +++ b/regexec.c @@ -233,17 +233,19 @@ onig_get_capture_tree(OnigRegion* region) #ifdef USE_CACHE_MATCH_OPT -static int count_num_cache_index(regex_t* reg) +/* count number of jump-like opcodes for allocation of cache memory. */ +/* return -1 if we cannot optimize the regex matching by using cache. */ +static int count_num_cache_opcode(regex_t* reg, int* table_size) { - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + int num = 0; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_index = 0; - MemNumType current_repeat = NO_OUTER_REPEAT; - while (p < pend) { switch (*p++) { case OP_FINISH: @@ -296,10 +298,10 @@ static int count_num_cache_index(regex_t* reg) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num_cache_index++; break; + num++; *table_size += 1; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num_cache_index++; break; + p++; num++; *table_size += 1; break; case OP_WORD: case OP_NOT_WORD: @@ -332,7 +334,7 @@ static int count_num_cache_index(regex_t* reg) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -352,45 +354,56 @@ static int count_num_cache_index(regex_t* reg) break; case OP_PUSH: p += SIZE_RELADDR; - num_cache_index++; + num++; + *table_size += 1; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num_cache_index++; break; + p += SIZE_RELADDR + 1; num++; *table_size += 1; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_repeat != NO_OUTER_REPEAT) { + if (current_mem != -1) { // A nested OP_REPEAT is not yet supported. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - num_cache_index++; + num++; + *table_size += 1; } - current_repeat = mem; + reg->repeat_range[mem].base_num = num; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - if (mem != current_repeat) { + //fprintf(stderr, "OP_REPEAT %d\n", mem); + if (mem != current_mem) { // A lone or invalid OP_REPEAT_INC is found. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; } { + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; + repeat_range->inner_num = inner_num; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { - num_cache_index++; + *table_size += 1; } - current_repeat = NO_OUTER_REPEAT; + current_mem = -1; + current_mem_num = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: Support nested OP_REPEAT. - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -409,21 +422,21 @@ static int count_num_cache_index(regex_t* reg) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CALL: case OP_RETURN: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_CONDITION: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_INDEX_FAIL; + return NUM_CACHE_OPCODE_FAIL; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -432,22 +445,21 @@ static int count_num_cache_index(regex_t* reg) } } - return num_cache_index; + return num; } -static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) +static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) { - UChar* pbegin; - UChar* p = reg->p; - UChar* pend = p + reg->used; - LengthType len; - MemNumType mem; + UChar* pbegin; + UChar* p = reg->p; + UChar* pend = p + reg->used; + LengthType len; + MemNumType mem; + MemNumType current_mem = -1; + int num = 0; + int current_mem_num = 0; OnigEncoding enc = reg->enc; - int num_cache_point = 0; - MemNumType current_repeat = -1; - int current_repeat_base_point = 0; - while (p < pend) { pbegin = p; switch (*p++) { @@ -500,20 +512,20 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_WORD: @@ -547,7 +559,7 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - return NUM_CACHE_INDEX_FAIL; + return; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -567,61 +579,59 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; case OP_PUSH: p += SIZE_RELADDR; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_POP: break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: p += SIZE_RELADDR + 1; - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = current_mem; + num++; + table++; break; case OP_REPEAT: case OP_REPEAT_NG: GET_MEMNUM_INC(mem, p); p += SIZE_RELADDR; if (reg->repeat_range[mem].lower == 0) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = current_repeat; - num_cache_point++; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + num++; + table++; } - reg->repeat_range[mem].base_point = num_cache_point; - current_repeat = mem; - current_repeat_base_point = num_cache_point; + current_mem = mem; + current_mem_num = num; break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, p); + GET_MEMNUM_INC(mem, p); { - int inner_point = num_cache_point - current_repeat_base_point; + int inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { - cache_index->addr = pbegin; - cache_index->point = num_cache_point - current_repeat_base_point; - cache_index->outer_repeat = mem; - cache_index++; + table->addr = pbegin; + table->num = num - current_mem_num; + table->outer_repeat = mem; + table++; } - repeat_range->inner_point = inner_point; - num_cache_point -= inner_point; - num_cache_point += inner_point * repeat_range->lower + (inner_point + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - current_repeat = NO_OUTER_REPEAT; - current_repeat_base_point = 0; + num -= inner_num; + num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); + current_mem = -1; + current_mem_num = 0; } break; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: // TODO: support OP_REPEAT opcodes. - return NUM_CACHE_INDEX_FAIL; + return; case OP_NULL_CHECK_START: case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST: @@ -640,21 +650,21 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - return NUM_CACHE_INDEX_FAIL; + return; case OP_CALL: case OP_RETURN: - return NUM_CACHE_INDEX_FAIL; + return; case OP_CONDITION: - return NUM_CACHE_INDEX_FAIL; + return; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - return NUM_CACHE_INDEX_FAIL; + return; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: @@ -662,8 +672,6 @@ static int init_cache_index(regex_t* reg, OnigCacheIndex *cache_index) break; } } - - return num_cache_point; } #endif /* USE_MATCH_CACHE */ @@ -851,16 +859,15 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) #ifdef USE_CACHE_MATCH_OPT #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\ - (msa).enable_cache_opt = 0;\ + (msa).enable_cache_match_opt = 0;\ (msa).num_fail = 0;\ - (msa).num_cache_index = NUM_CACHE_INDEX_UNINIT;\ - (msa).num_cache_point = 0;\ - (msa).cache_index = (OnigCacheIndex *)0;\ - (msa).cache = (uint8_t *)0;\ + (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\ + (msa).cache_index_table = (OnigCacheIndex *)0;\ + (msa).match_cache = (uint8_t *)0;\ } while(0) #define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\ - if ((msa).cache_index) xfree((msa).cache_index);\ - if ((msa).cache) xfree((msa).cache);\ + if ((msa).cache_index_table) xfree((msa).cache_index_table);\ + if ((msa).match_cache) xfree((msa).match_cache);\ } while(0) #else #define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) @@ -1153,9 +1160,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ if (enable) {\ - int point = find_cache_index((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ - if (point >= 0) {\ - int key = (num_cache_size) * (int)(pos) + point;\ + int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ + if (cache_index >= 0) {\ + int key = (num_cache_size) * (int)(pos) + cache_index;\ int index = key >> 3;\ int mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ @@ -1166,7 +1173,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) -static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) +static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) { int l = 0, r = num_cache_table - 1, m; OnigCacheIndex* item; @@ -1188,7 +1195,7 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re item = &table[m]; if (item->outer_repeat == -1) { - return item->point; + return item->num; } range = ®->repeat_range[item->outer_repeat]; @@ -1197,21 +1204,21 @@ static int find_cache_index(regex_t* reg, OnigStackType *stk, OnigStackIndex *re count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; if (count < range->lower) { - return range->base_point + range->inner_point * count + item->point; + return range->base_num + range->inner_num * count + item->num; } if (range->upper == 0x7fffffff) { - return range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item->point; + return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num; } - return range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (count - range->lower) + item->point; + return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; } static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { int l = 0, r = num_cache_table - 1, m1, m2; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; - int p1, p2; + int k1, k2; while (l <= r) { m1 = (l + r) / 2; @@ -1234,32 +1241,32 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, item1 = &table[m1]; item2 = &table[m2]; - if (item1->outer_repeat < 0) p1 = item1->point; - else p1 = reg->repeat_range[item1->outer_repeat].base_point + item1->point; + if (item1->outer_repeat < 0) k1 = item1->num; + else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num; - if (item2->outer_repeat < 0) p2 = item2->point; + if (item2->outer_repeat < 0) k2 = item2->num; else { OnigRepeatRange *range = ®->repeat_range[item2->outer_repeat]; - if (range->upper == 0x7fffffff) p2 = range->base_point + range->inner_point * range->lower + (is_inc ? 0 : 1) + item2->point; - else p2 = range->base_point + range->inner_point * range->lower + (range->inner_point + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->point; + if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num; + else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num; } int base = pos * num_cache_size; - p1 += base; - p2 += base; + k1 += base; + k2 += base; - if ((p1 >> 3) == (p2 >> 3)) { - match_cache[p1 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)) | ((1 << (p1 & 7)) - 1); + if ((k1 >> 3) == (k2 >> 3)) { + match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); } else { - int i = p1 >> 3; - if (p1 & 7) { - match_cache[p1 >> 3] &= (1 << ((p1 & 7) - 1)) - 1; + int i = k1 >> 3; + if (k1 & 7) { + match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1; i++; } - if (i < (p2 >> 3)) { - xmemset(&match_cache[i], 0, (p2 >> 3) - i); - if (p2 & 7) { - match_cache[p2 >> 3] &= ((1 << (8 - (p2 & 7) - 1)) - 1 << ((p2 & 7) + 1)); + if (i < (k2 >> 3)) { + xmemset(&match_cache[i], 0, (k2 >> 3) - i); + if (k2 & 7) { + match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); } } } @@ -2754,7 +2761,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); DATA_ENSURE(n); @@ -2767,7 +2774,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); n = enclen(encode, s, end); if (n > 1) { @@ -2786,7 +2793,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -2802,7 +2809,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + 1, s, sprev, pkeep); } n = enclen(encode, s, end); @@ -3389,7 +3396,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; - int ischanged = 0; // set 1 when a loop is null but memory status is changed. + int ischanged = 0; // set 1 when a loop is empty but memory status is changed. GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); @@ -3402,7 +3409,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto null_check_found; } # ifdef USE_CACHE_MATCH_OPT - if (ischanged && msa->enable_cache_opt) { + if (ischanged && msa->enable_cache_match_opt) { RelAddrType rel; OnigUChar *addr; int mem; @@ -3421,7 +3428,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, default: goto unexpected_bytecode_error; } - reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->cache, msa->cache_index, msa->num_cache_index ,msa->num_cache_point); + reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); } # endif } @@ -3466,7 +3473,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH) MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3520,7 +3527,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3534,7 +3541,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); if (*p == *s) { p++; - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); MOP_OUT; JUMP; @@ -3553,7 +3560,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache); STACK_PUSH_ALT(p + addr, s, sprev, pkeep); } } @@ -3570,7 +3577,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT(mem, p); if (reg->repeat_range[mem].lower == 0) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); STACK_PUSH_ALT(p, s, sprev, pkeep); p += addr; } @@ -3590,7 +3597,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { if (*pbegin == OP_REPEAT_INC) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(p, s, sprev, pkeep); p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ @@ -3623,7 +3630,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(si); if (*pbegin == OP_REPEAT_INC_NG) { - DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_opt, pbegin, msa->num_cache_index, msa->num_cache_point, msa->cache_index, s - str, msa->cache); + DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache); } STACK_PUSH_ALT(pcode, s, sprev, pkeep); } @@ -3813,33 +3820,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { - msa->enable_cache_opt = 1; - if (msa->num_cache_index == NUM_CACHE_INDEX_UNINIT) { - msa->num_cache_index = count_num_cache_index(reg); + if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + int table_size = 0; + msa->enable_cache_match_opt = 1; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); } - if (msa->num_cache_index == NUM_CACHE_INDEX_FAIL || msa->num_cache_index == 0) { - msa->enable_cache_opt = 0; + if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - if (msa->cache_index == NULL) { - OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_index * sizeof(OnigCacheIndex)); + if (msa->cache_index_table == NULL) { + OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); if (table == NULL) { - msa->enable_cache_opt = 0; + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - msa->num_cache_point = init_cache_index(reg, table); - msa->cache_index = table; + init_cache_index_table(reg, table); + msa->cache_index_table = table; + msa->num_cache_table = table_size; } // TODO: check arithemetic overflow. - int cache_size8 = msa->num_cache_point * ((int)(end - str) + 1); - int cache_size = (cache_size8 >> 3) + (cache_size8 & 7 ? 1 : 0); - msa->cache = (uint8_t*)xmalloc(cache_size * sizeof(uint8_t)); - if (msa->cache == NULL) { - msa->enable_cache_opt = 0; + int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); + int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); + msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); + if (msa->match_cache == NULL) { + msa->enable_cache_match_opt = 0; goto fail_match_cache_opt; } - xmemset(msa->cache, 0, cache_size * sizeof(uint8_t)); + xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); } fail_match_cache_opt: #endif diff --git a/regint.h b/regint.h index e1e48c09513a1d..12b5d5c70a5544 100644 --- a/regint.h +++ b/regint.h @@ -45,8 +45,8 @@ #define USE_CACHE_MATCH_OPT #ifdef USE_CACHE_MATCH_OPT -# define NUM_CACHE_INDEX_FAIL -1 -# define NUM_CACHE_INDEX_UNINIT -2 +# define NUM_CACHE_OPCODE_FAIL -1 +# define NUM_CACHE_OPCODE_UNINIT -2 #endif #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ @@ -874,11 +874,10 @@ typedef struct _OnigStackType { #ifdef USE_CACHE_MATCH_OPT typedef struct { - UChar* addr; /* pointer to corresponding opcode. */ - int point; /* cache point number (in outer repeat if `outer_repeat != -1`) */ - int outer_repeat; /* outer repeat index number */ + UChar *addr; + int num; + int outer_repeat; } OnigCacheIndex; -#define NO_OUTER_REPEAT -1 #endif typedef struct { @@ -904,12 +903,12 @@ typedef struct { uint64_t end_time; #endif #ifdef USE_CACHE_MATCH_OPT - int num_fail; /* counter of failure (backtrack) number for switching cache optimization. */ - int num_cache_point; /* number of cache point in program */ - int num_cache_index; /* size of cache index array */ - int enable_cache_opt; /* whether cache optimization is enabled */ - OnigCacheIndex* cache_index; /* cache index array for computing cache point number */ - uint8_t* cache; /* bit array for cache optimization */ + int num_fail; + int enable_cache_match_opt; + int num_cache_opcode; + int num_cache_table; + OnigCacheIndex *cache_index_table; + uint8_t *match_cache; #endif } OnigMatchArg; From ff5dba831910c91e293220b652be868e9cfdc8e1 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 18:09:57 +0900 Subject: [PATCH 060/104] Return ONIGERR_MEMORY if it fails to allocate memory for cache_match_opt --- regexec.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/regexec.c b/regexec.c index 0bd4c8a96c9756..acf03f2501c36d 100644 --- a/regexec.c +++ b/regexec.c @@ -3833,20 +3833,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (msa->cache_index_table == NULL) { OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(table_size * sizeof(OnigCacheIndex)); if (table == NULL) { - msa->enable_cache_match_opt = 0; - goto fail_match_cache_opt; + return ONIGERR_MEMORY; } init_cache_index_table(reg, table); msa->cache_index_table = table; msa->num_cache_table = table_size; } - // TODO: check arithemetic overflow. - int match_cache_size8 = msa->num_cache_opcode * ((int)(end - str) + 1); - int match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); + size_t len = (end - str) + 1; + size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len; + /* overflow check */ + if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) { + return ONIGERR_MEMORY; + } + size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t)); if (msa->match_cache == NULL) { - msa->enable_cache_match_opt = 0; - goto fail_match_cache_opt; + return ONIGERR_MEMORY; } xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t)); } From 537286d0bb5021afe188cfba6100772bb0285e06 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 18:13:00 +0900 Subject: [PATCH 061/104] Prevent GCC warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` regexec.c: In function ‘reset_match_cache’: regexec.c:1259:56: warning: suggest parentheses around ‘-’ inside ‘<<’ [-Wparentheses] 1259 | match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ regexec.c:1269:60: warning: suggest parentheses around ‘-’ inside ‘<<’ [-Wparentheses] 1269 | match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ regexec.c: In function ‘find_cache_index_table’: regexec.c:1192:11: warning: ‘m’ may be used uninitialized [-Wmaybe-uninitialized] 1192 | if (!(0 <= m && m < num_cache_table && table[m].addr == p)) { | ~~^~~~ regexec.c: In function ‘match_at’: regexec.c:1238:12: warning: ‘m1’ is used uninitialized [-Wuninitialized] 1238 | if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++; | ^ regexec.c:1218:39: note: ‘m1’ was declared here 1218 | int l = 0, r = num_cache_table - 1, m1, m2; | ^~ regexec.c:1239:12: warning: ‘m2’ is used uninitialized [-Wuninitialized] 1239 | if (table[m2].addr > pend && m2 - 1 > 0) m2--; | ^ regexec.c:1218:43: note: ‘m2’ was declared here 1218 | int l = 0, r = num_cache_table - 1, m1, m2; | ^~ ``` --- regexec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/regexec.c b/regexec.c index acf03f2501c36d..94d03241a53304 100644 --- a/regexec.c +++ b/regexec.c @@ -1175,7 +1175,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) { - int l = 0, r = num_cache_table - 1, m; + int l = 0, r = num_cache_table - 1, m = 0; OnigCacheIndex* item; OnigRepeatRange* range; OnigStackType *stkp; @@ -1215,7 +1215,7 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd } static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { - int l = 0, r = num_cache_table - 1, m1, m2; + int l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; int k1, k2; @@ -1256,7 +1256,7 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, k2 += base; if ((k1 >> 3) == (k2 >> 3)) { - match_cache[k1 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); + match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); } else { int i = k1 >> 3; if (k1 & 7) { @@ -1266,7 +1266,7 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, if (i < (k2 >> 3)) { xmemset(&match_cache[i], 0, (k2 >> 3) - i); if (k2 & 7) { - match_cache[k2 >> 3] &= ((1 << (8 - (k2 & 7) - 1)) - 1 << ((k2 & 7) + 1)); + match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)); } } } From 14845ab4ffccc317a08629f4503c87ee97621c0b Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 8 Nov 2022 23:36:14 +0900 Subject: [PATCH 062/104] Ensure that the table size for CACHE_MATCH fits with int Currently, the keys for CACHE_MATCH are handled as an `int` type. So we should make sure the table size are smaller than the range of `int`. --- regexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regexec.c b/regexec.c index 94d03241a53304..e8fffccd8b661a 100644 --- a/regexec.c +++ b/regexec.c @@ -3842,7 +3842,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, size_t len = (end - str) + 1; size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len; /* overflow check */ - if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) { + if (match_cache_size8 >= INT_MAX_LIMIT) { return ONIGERR_MEMORY; } size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); From d868f4ca31339095991e162e010fcda0f2d7bd39 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Wed, 9 Nov 2022 00:37:46 +0900 Subject: [PATCH 063/104] Check for integer overflow in the allocation of match_cache table --- regexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/regexec.c b/regexec.c index e8fffccd8b661a..febcb03f640b14 100644 --- a/regexec.c +++ b/regexec.c @@ -3842,6 +3842,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, size_t len = (end - str) + 1; size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len; /* overflow check */ + if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) { + return ONIGERR_MEMORY; + } + /* Currently, int is used for the key of match_cache */ if (match_cache_size8 >= INT_MAX_LIMIT) { return ONIGERR_MEMORY; } From 36ff0521c1e81f83bf050d428815ae7413960136 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 9 Nov 2022 12:19:52 +0900 Subject: [PATCH 064/104] Use long instead of int --- regexec.c | 30 +++++++++++++++--------------- regint.h | 15 ++++++++------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/regexec.c b/regexec.c index febcb03f640b14..cfef254c2e66d4 100644 --- a/regexec.c +++ b/regexec.c @@ -235,15 +235,15 @@ onig_get_capture_tree(OnigRegion* region) /* count number of jump-like opcodes for allocation of cache memory. */ /* return -1 if we cannot optimize the regex matching by using cache. */ -static int count_num_cache_opcode(regex_t* reg, int* table_size) +static long count_num_cache_opcode(regex_t* reg, long* table_size) { - int num = 0; + long num = 0; UChar* p = reg->p; UChar* pend = p + reg->used; LengthType len; MemNumType mem; MemNumType current_mem = -1; - int current_mem_num = 0; + long current_mem_num = 0; OnigEncoding enc = reg->enc; while (p < pend) { @@ -456,8 +456,8 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) LengthType len; MemNumType mem; MemNumType current_mem = -1; - int num = 0; - int current_mem_num = 0; + long num = 0; + long current_mem_num = 0; OnigEncoding enc = reg->enc; while (p < pend) { @@ -1162,9 +1162,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, if (enable) {\ int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ if (cache_index >= 0) {\ - int key = (num_cache_size) * (int)(pos) + cache_index;\ - int index = key >> 3;\ - int mask = 1 << (key & 7);\ + long key = (num_cache_size) * (long)(pos) + cache_index;\ + long index = key >> 3;\ + long mask = 1 << (key & 7);\ if ((match_cache)[index] & mask) {\ goto fail;\ }\ @@ -1214,11 +1214,11 @@ static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackInd return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num; } -static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, int pos, uint8_t* match_cache, OnigCacheIndex *table, int num_cache_size, int num_cache_table) { - int l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0; +static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos, uint8_t* match_cache, OnigCacheIndex *table, long num_cache_size, long num_cache_table) { + long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; - int k1, k2; + long k1, k2; while (l <= r) { m1 = (l + r) / 2; @@ -3428,7 +3428,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, default: goto unexpected_bytecode_error; } - reset_match_cache(reg, addr, pbegin, (int)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); + reset_match_cache(reg, addr, pbegin, (long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode); } # endif } @@ -3820,8 +3820,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, pkeep = stk->u.state.pkeep; #ifdef USE_CACHE_MATCH_OPT - if (++msa->num_fail >= (int)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { - int table_size = 0; + if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { + long table_size = 0; msa->enable_cache_match_opt = 1; if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) { msa->num_cache_opcode = count_num_cache_opcode(reg, &table_size); @@ -3846,7 +3846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, return ONIGERR_MEMORY; } /* Currently, int is used for the key of match_cache */ - if (match_cache_size8 >= INT_MAX_LIMIT) { + if (match_cache_size8 >= LONG_MAX_LIMIT) { return ONIGERR_MEMORY; } size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0); diff --git a/regint.h b/regint.h index 12b5d5c70a5544..75073e6377a48e 100644 --- a/regint.h +++ b/regint.h @@ -387,6 +387,7 @@ typedef unsigned int BitStatusType; #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) +#define LONG_MAX_LIMIT ((1UL << (SIZEOF_LONG * 8 - 1)) - 1) #define DIGITVAL(code) ((code) - '0') #define ODIGITVAL(code) DIGITVAL(code) @@ -875,7 +876,7 @@ typedef struct _OnigStackType { #ifdef USE_CACHE_MATCH_OPT typedef struct { UChar *addr; - int num; + long num; int outer_repeat; } OnigCacheIndex; #endif @@ -903,12 +904,12 @@ typedef struct { uint64_t end_time; #endif #ifdef USE_CACHE_MATCH_OPT - int num_fail; - int enable_cache_match_opt; - int num_cache_opcode; - int num_cache_table; - OnigCacheIndex *cache_index_table; - uint8_t *match_cache; + long num_fail; + int enable_cache_match_opt; + long num_cache_opcode; + long num_cache_table; + OnigCacheIndex* cache_index_table; + uint8_t* match_cache; #endif } OnigMatchArg; From 1dc4128e92ed6e7af8556abdfb657c135f4e13b7 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 9 Nov 2022 16:22:42 +0900 Subject: [PATCH 065/104] Reduce warnings --- include/ruby/onigmo.h | 4 ++-- regexec.c | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 703f38f5907153..348c4ec08f5f95 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -744,8 +744,8 @@ typedef struct { typedef struct { int lower; int upper; - int base_num; - int inner_num; + long base_num; + long inner_num; } OnigRepeatRange; typedef void (*OnigWarnFunc)(const char* s); diff --git a/regexec.c b/regexec.c index cfef254c2e66d4..75ffb86c4510dc 100644 --- a/regexec.c +++ b/regexec.c @@ -381,18 +381,16 @@ static long count_num_cache_opcode(regex_t* reg, long* table_size) case OP_REPEAT_INC: case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); - //fprintf(stderr, "OP_REPEAT %d\n", mem); if (mem != current_mem) { // A lone or invalid OP_REPEAT_INC is found. return NUM_CACHE_OPCODE_FAIL; } { - int inner_num = num - current_mem_num; + long inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; repeat_range->inner_num = inner_num; num -= inner_num; num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower); - //fprintf(stderr, "lower %d < upper %d\n", repeat_range->lower, repeat_range->upper); if (repeat_range->lower < repeat_range->upper) { *table_size += 1; } @@ -614,7 +612,7 @@ static void init_cache_index_table(regex_t* reg, OnigCacheIndex *table) case OP_REPEAT_INC_NG: GET_MEMNUM_INC(mem, p); { - int inner_num = num - current_mem_num; + long inner_num = num - current_mem_num; OnigRepeatRange *repeat_range = ®->repeat_range[mem]; if (repeat_range->lower < repeat_range->upper) { table->addr = pbegin; @@ -1160,7 +1158,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\ if (enable) {\ - int cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ + long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\ if (cache_index >= 0) {\ long key = (num_cache_size) * (long)(pos) + cache_index;\ long index = key >> 3;\ @@ -1173,9 +1171,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while (0) -static int find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, int num_cache_table, UChar* p) +static long find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, long num_cache_table, UChar* p) { - int l = 0, r = num_cache_table - 1, m = 0; + long l = 0, r = num_cache_table - 1, m = 0; OnigCacheIndex* item; OnigRepeatRange* range; OnigStackType *stkp; @@ -1218,7 +1216,7 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0; int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG; OnigCacheIndex *item1, *item2; - long k1, k2; + long k1, k2, base; while (l <= r) { m1 = (l + r) / 2; @@ -1251,14 +1249,14 @@ static void reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num; } - int base = pos * num_cache_size; + base = pos * num_cache_size; k1 += base; k2 += base; if ((k1 >> 3) == (k2 >> 3)) { match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1); } else { - int i = k1 >> 3; + long i = k1 >> 3; if (k1 & 7) { match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1; i++; From 0e1e1b19804b7ea704ec43c4fb23082f7e74da3b Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Wed, 9 Nov 2022 10:15:39 -0600 Subject: [PATCH 066/104] [DOC] Enhanced RDoc for IO (#6669) --- doc/io_streams.rdoc | 517 ----------------------------------------- file.c | 2 +- io.c | 548 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 437 insertions(+), 630 deletions(-) delete mode 100644 doc/io_streams.rdoc diff --git a/doc/io_streams.rdoc b/doc/io_streams.rdoc deleted file mode 100644 index c8ce9991cfcce9..00000000000000 --- a/doc/io_streams.rdoc +++ /dev/null @@ -1,517 +0,0 @@ -== \IO Streams - -This page describes: - -- {Stream classes}[rdoc-ref:io_streams.rdoc@Stream+Classes]. -- {Pre-existing streams}[rdoc-ref:io_streams.rdoc@Pre-Existing+Streams]. -- {User-created streams}[rdoc-ref:io_streams.rdoc@User-Created+Streams]. -- {Basic \IO}[rdoc-ref:io_streams.rdoc@Basic+IO], including: - - - {Position}[rdoc-ref:io_streams.rdoc@Position]. - - {Open and closed streams}[rdoc-ref:io_streams.rdoc@Open+and+Closed+Streams]. - - {End-of-stream}[rdoc-ref:io_streams.rdoc@End-of-Stream]. - -- {Line \IO}[rdoc-ref:io_streams.rdoc@Line+IO], including: - - - {Line separator}[rdoc-ref:io_streams.rdoc@Line+Separator]. - - {Line limit}[rdoc-ref:io_streams.rdoc@Line+Limit]. - - {Line number}[rdoc-ref:io_streams.rdoc@Line+Number]. - - {Line options}[rdoc-ref:io_streams.rdoc@Line+Options]. - -- {Character \IO}[rdoc-ref:io_streams.rdoc@Character+IO]. -- {Byte \IO}[rdoc-ref:io_streams.rdoc@Byte+IO]. -- {Codepoint \IO}[rdoc-ref:io_streams.rdoc@Codepoint+IO]. - -=== Stream Classes - -Ruby supports processing data as \IO streams; -that is, as data that may be read, re-read, written, re-written, -and traversed via iteration. - -Core classes with such support include: - -- IO, and its derived class File. -- {StringIO}[rdoc-ref:StringIO]: for processing a string. -- {ARGF}[rdoc-ref:ARGF]: for processing files cited on the command line. - -Except as noted, the instance methods described on this page -are available in classes \ARGF, \File, \IO, and \StringIO. -A few, also noted, are available in class \Kernel. - -=== Pre-Existing Streams - -Pre-existing streams that are referenced by constants include: - -- $stdin: read-only instance of \IO. -- $stdout: write-only instance of \IO. -- $stderr: read-only instance of \IO. -- \ARGF: read-only instance of \ARGF. - -=== User-Created Streams - -You can create streams: - -- \File: - - - File.new: returns a new \File object; - the file should be closed when no longer needed. - - File.open: passes a new \File object to given the block; - the file is automatically closed on block exit. - -- \IO: - - - IO.new: returns a new \IO object for the given integer file descriptor; - the \IO object should be closed when no longer needed. - - IO.open: passes a new \IO object to the given block; - the \IO object is automatically closed on block exit. - - IO.popen: returns a new \IO object that is connected to the $stdin - and $stdout of a newly-launched subprocess. - - Kernel#open: returns a new \IO object connected to a given source: - stream, file, or subprocess; - the \IO object should be closed when no longer needed. - -- \StringIO: - - - StringIO.new: returns a new \StringIO object; - the \StringIO object should be closed when no longer needed. - - StringIO.open: passes a new \StringIO object to the given block; - the \StringIO object is automatically closed on block exit. - -(You cannot create an \ARGF object, but one already exists.) - -=== About the Examples - -Many examples here use these variables: - - :include: doc/examples/files.rdoc - -=== Basic \IO - -You can perform basic stream \IO with these methods: - -- IO#read: Returns all remaining or the next _n_ bytes read from the stream, - for a given _n_: - - f = File.new('t.txt') - f.read # => "First line\nSecond line\n\nFourth line\nFifth line\n" - f.rewind - f.read(30) # => "First line\r\nSecond line\r\n\r\nFou" - f.read(30) # => "rth line\r\nFifth line\r\n" - f.read(30) # => nil - f.close - -- IO#write: Writes one or more given strings to the stream: - - $stdout.write('Hello', ', ', 'World!', "\n") # => 14 - $stdout.write('foo', :bar, 2, "\n") - - Output: - - Hello, World! - foobar2 - -==== Position - -An \IO stream has a nonnegative integer _position_, -which is the byte offset at which the next read or write is to occur. -A new stream has position zero (and line number zero); -method +rewind+ resets the position (and line number) to zero. - -The relevant methods: - -- IO#tell (aliased as +#pos+): - Returns the current position (in bytes) in the stream: - - f = File.new('t.txt') - f.tell # => 0 - f.gets # => "First line\n" - f.tell # => 12 - f.close - -- IO#pos=: Sets the position of the stream (in bytes): - - f = File.new('t.txt') - f.tell # => 0 - f.pos = 20 # => 20 - f.tell # => 20 - f.close - -- IO#seek: Sets the position of the stream to a given integer +offset+ - (in bytes), with respect to a given constant +whence+, which is one of: - - - +:CUR+ or IO::SEEK_CUR: - Repositions the stream to its current position plus the given +offset+: - - f = File.new('t.txt') - f.tell # => 0 - f.seek(20, :CUR) # => 0 - f.tell # => 20 - f.seek(-10, :CUR) # => 0 - f.tell # => 10 - f.close - - - +:END+ or IO::SEEK_END: - Repositions the stream to its end plus the given +offset+: - - f = File.new('t.txt') - f.tell # => 0 - f.seek(0, :END) # => 0 # Repositions to stream end. - f.tell # => 52 - f.seek(-20, :END) # => 0 - f.tell # => 32 - f.seek(-40, :END) # => 0 - f.tell # => 12 - f.close - - - +:SET+ or IO:SEEK_SET: - Repositions the stream to the given +offset+: - - f = File.new('t.txt') - f.tell # => 0 - f.seek(20, :SET) # => 0 - f.tell # => 20 - f.seek(40, :SET) # => 0 - f.tell # => 40 - f.close - -- IO#rewind: Positions the stream to the beginning (also resetting the line number): - - f = File.new('t.txt') - f.tell # => 0 - f.gets # => "First line\n" - f.tell # => 12 - f.rewind # => 0 - f.tell # => 0 - f.lineno # => 0 - f.close - -==== Open and Closed Streams - -A new \IO stream may be open for reading, open for writing, or both. - -A stream is automatically closed when claimed by the garbage collector. - -Attempted reading or writing on a closed stream raises an exception. - -- IO#close: Closes the stream for both reading and writing. -- IO#close_read: Closes the stream for reading; not in ARGF. -- IO#close_write: Closes the stream for writing; not in ARGF. -- IO#closed?: Returns whether the stream is closed. - -==== End-of-Stream - -You can query whether a stream is positioned at its end using -method IO#eof? (also aliased as +#eof+). - -You can reposition to end-of-stream by reading all stream content: - - f = File.new('t.txt') - f.eof? # => false - f.read # => "First line\nSecond line\n\nFourth line\nFifth line\n" - f.eof? # => true - -Or by using method IO#seek: - - f = File.new('t.txt') - f.eof? # => false - f.seek(0, :END) - f.eof? # => true - -=== Line \IO - -You can read an \IO stream line-by-line using these methods: - -- IO#each_line: Passes each line to the block: - - f = File.new('t.txt') - f.each_line {|line| p line } - - Output: - - "First line\n" - "Second line\n" - "\n" - "Fourth line\n" - "Fifth line\n" - - The reading may begin mid-line: - - f = File.new('t.txt') - f.pos = 27 - f.each_line {|line| p line } - - Output: - - "rth line\n" - "Fifth line\n" - -- IO#gets (also in Kernel): Returns the next line (which may begin mid-line): - - f = File.new('t.txt') - f.gets # => "First line\n" - f.gets # => "Second line\n" - f.pos = 27 - f.gets # => "rth line\n" - f.readlines # => ["Fifth line\n"] - f.gets # => nil - -- IO#readline (also in Kernel; not in StringIO): - Like #gets, but raises an exception at end-of-stream. - -- IO#readlines (also in Kernel): Returns all remaining lines in an array; - may begin mid-line: - - f = File.new('t.txt') - f.pos = 19 - f.readlines # => ["ine\n", "\n", "Fourth line\n", "Fifth line\n"] - f.readlines # => [] - -Each of these reader methods may be called with: - -- An optional line separator, +sep+. -- An optional line-size limit, +limit+. -- Both +sep+ and +limit+. - -You can write to an \IO stream line-by-line using this method: - -- IO#puts (also in Kernel; not in \StringIO): Writes objects to the stream: - - f = File.new('t.tmp', 'w') - f.puts('foo', :bar, 1, 2.0, Complex(3, 0)) - f.flush - File.read('t.tmp') # => "foo\nbar\n1\n2.0\n3+0i\n" - -==== Line Separator - -The default line separator is the given by the global variable $/, -whose value is by default "\n". -The line to be read next is all data from the current position -to the next line separator: - - f = File.new('t.txt') - f.gets # => "First line\n" - f.gets # => "Second line\n" - f.gets # => "\n" - f.gets # => "Fourth line\n" - f.gets # => "Fifth line\n" - f.close - -You can specify a different line separator: - - f = File.new('t.txt') - f.gets('l') # => "First l" - f.gets('li') # => "ine\nSecond li" - f.gets('lin') # => "ne\n\nFourth lin" - f.gets # => "e\n" - f.close - -There are two special line separators: - -- +nil+: The entire stream is read into a single string: - - f = File.new('t.txt') - f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n" - f.close - -- '' (the empty string): The next "paragraph" is read - (paragraphs being separated by two consecutive line separators): - - f = File.new('t.txt') - f.gets('') # => "First line\nSecond line\n\n" - f.gets('') # => "Fourth line\nFifth line\n" - f.close - -==== Line Limit - -The line to be read may be further defined by an optional integer argument +limit+, -which specifies that the number of bytes returned may not be (much) longer -than the given +limit+; -a multi-byte character will not be split, and so a line may be slightly longer -than the given limit. - -If +limit+ is not given, the line is determined only by +sep+. - - # Text with 1-byte characters. - File.new('t.txt') {|f| f.gets(1) } # => "F" - File.new('t.txt') {|f| f.gets(2) } # => "Fi" - File.new('t.txt') {|f| f.gets(3) } # => "Fir" - File.new('t.txt') {|f| f.gets(4) } # => "Firs" - # No more than one line. - File.new('t.txt') {|f| f.gets(10) } # => "First line" - File.new('t.txt') {|f| f.gets(11) } # => "First line\n" - File.new('t.txt') {|f| f.gets(12) } # => "First line\n" - - # Text with 2-byte characters, which will not be split. - File.new('r.rus') {|f| f.gets(1).size } # => 1 - File.new('r.rus') {|f| f.gets(2).size } # => 1 - File.new('r.rus') {|f| f.gets(3).size } # => 2 - File.new('r.rus') {|f| f.gets(4).size } # => 2 - -==== Line Separator and Line Limit - -With arguments +sep+ and +limit+ given, -combines the two behaviors: - -- Returns the next line as determined by line separator +sep+. -- But returns no more bytes than are allowed by the limit. - -Example: - - File.new('t.txt') {|f| f.gets('li', 20) } # => "First li" - File.new('t.txt') {|f| f.gets('li', 2) } # => "Fi" - -==== Line Number - -A readable \IO stream has a _line_ _number_, -which is the non-negative integer line number -in the stream where the next read will occur. - -The line number is the number of lines read by certain line-oriented methods -(IO.foreach, IO#each_line, IO#gets, IO#readline, and IO#readlines) -according to the given (or default) line separator +sep+. - -A new stream is initially has line number zero (and position zero); -method +rewind+ resets the line number (and position) to zero. - -\Method IO#lineno returns the line number. - -Reading lines from a stream usually changes its line number: - - f = File.new('t.txt', 'r') - f.lineno # => 0 - f.readline # => "This is line one.\n" - f.lineno # => 1 - f.readline # => "This is the second line.\n" - f.lineno # => 2 - f.readline # => "Here's the third line.\n" - f.lineno # => 3 - f.eof? # => true - f.close - -Iterating over lines in a stream usually changes its line number: - - File.open('t.txt') do |f| - f.each_line do |line| - p "position=#{f.pos} eof?=#{f.eof?} lineno=#{f.lineno}" - end - end - -Output: - - "position=11 eof?=false lineno=1" - "position=23 eof?=false lineno=2" - "position=24 eof?=false lineno=3" - "position=36 eof?=false lineno=4" - "position=47 eof?=true lineno=5" - -==== Line Options - -A number of \IO methods accept optional keyword arguments -that determine how lines in a stream are to be treated: - -- +:chomp+: If +true+, line separators are omitted; default is +false+. - -=== Character \IO - -You can process an \IO stream character-by-character using these methods: - -- IO#getc: Reads and returns the next character from the stream: - - f = File.new('t.rus') - f.getc # => "т" - f.getc # => "е" - f.getc # => "с" - f.getc # => "т" - f.getc # => nil - -- IO#readchar (not in \StringIO): - Like #getc, but raises an exception at end-of-stream: - - f.readchar # Raises EOFError. - -- IO#ungetc (not in \ARGF): - Pushes back ("unshifts") a character or integer onto the stream: - - path = 't.tmp' - File.write(path, 'foo') - File.open(path) do |f| - f.ungetc('т') - f.read # => "тfoo" - end - -- IO#putc (also in Kernel): Writes a character to the stream: - - File.open('t.tmp', 'w') do |f| - f.putc('т') - f.putc('е') - f.putc('с') - f.putc('т') - end - File.read('t.tmp') # => "тест" - -- IO#each_char: Reads each remaining character in the stream, - passing the character to the given block: - - File.open('t.rus') do |f| - f.pos = 4 - f.each_char {|c| p c } - end - - Output: - - "с" - "т" - -=== Byte \IO - -You can process an \IO stream byte-by-byte using these methods: - -- IO#getbyte: Returns the next 8-bit byte as an integer in range 0..255: - - File.read('t.dat') - # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" - File.read('t.dat') - # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" - f = File.new('t.dat') - f.getbyte # => 254 - f.getbyte # => 255 - f.seek(-2, :END) - f.getbyte # => 153 - f.getbyte # => 148 - f.getbyte # => nil - -- IO#readbyte (not in \StringIO): - Like #getbyte, but raises an exception if at end-of-stream: - - f.readbyte # Raises EOFError. - -- IO#ungetbyte (not in \ARGF): - Pushes back ("unshifts") a byte back onto the stream: - - f.ungetbyte(0) - f.ungetbyte(01) - f.read # => "\u0001\u0000" - -- IO#each_byte: Reads each remaining byte in the stream, - passing the byte to the given block: - - f.seek(-4, :END) - f.each_byte {|b| p b } - - Output: - - 153 - 147 - 153 - 148 - -=== Codepoint \IO - -You can process an \IO stream codepoint-by-codepoint using method -+#each_codepoint+: - - a = [] - File.open('t.rus') do |f| - f.each_codepoint {|c| a << c } - end - a # => [1090, 1077, 1089, 1090] diff --git a/file.c b/file.c index efd15ea8a16af8..93f5898ccde0b8 100644 --- a/file.c +++ b/file.c @@ -7147,7 +7147,7 @@ const char ruby_null_device[] = * strings read are converted from external to internal encoding, * and strings written are converted from internal to external encoding. * For further details about transcoding input and output, - * see {Encodings}[rdoc-ref:io_streams.rdoc@Encodings]. + * see {Encodings}[rdoc-ref:encodings.rdoc@Encodings]. * * If the external encoding is 'BOM|UTF-8', 'BOM|UTF-16LE' * or 'BOM|UTF16-BE', diff --git a/io.c b/io.c index 62b25c2ebbcaa6..63a96bf2b80a68 100644 --- a/io.c +++ b/io.c @@ -2373,7 +2373,7 @@ rb_io_flush(VALUE io) * tell -> integer * * Returns the current position (in bytes) in +self+ - * (see {Position}[rdoc-ref:io_streams.rdoc@Position]): + * (see {Position}[rdoc-ref:IO@Position]): * * f = File.open('t.txt') * f.tell # => 0 @@ -2439,7 +2439,7 @@ interpret_seek_whence(VALUE vwhence) * seek(offset, whence = IO::SEEK_SET) -> 0 * * Seeks to the position given by integer +offset+ - * (see {Position}[rdoc-ref:io_streams.rdoc@Position]) + * (see {Position}[rdoc-ref:IO@Position]) * and constant +whence+, which is one of: * * - +:CUR+ or IO::SEEK_CUR: @@ -2499,7 +2499,7 @@ rb_io_seek_m(int argc, VALUE *argv, VALUE io) * pos = new_position -> new_position * * Seeks to the given +new_position+ (in bytes); - * see {Position}[rdoc-ref:io_streams.rdoc@Position]: + * see {Position}[rdoc-ref:IO@Position]: * * f = File.open('t.txt') * f.tell # => 0 @@ -2533,8 +2533,8 @@ static void clear_readconv(rb_io_t *fptr); * * Repositions the stream to its beginning, * setting both the position and the line number to zero; - * see {Position}[rdoc-ref:io_streams.rdoc@Position] - * and {Line Number}[rdoc-ref:io_streams.rdoc@Line+Number]: + * see {Position}[rdoc-ref:IO@Position] + * and {Line Number}[rdoc-ref:IO@Line+Number]: * * f = File.open('t.txt') * f.tell # => 0 @@ -2624,7 +2624,7 @@ io_fillbuf(rb_io_t *fptr) * eof -> true or false * * Returns +true+ if the stream is positioned at its end, +false+ otherwise; - * see {Position}[rdoc-ref:io_streams.rdoc@Position]: + * see {Position}[rdoc-ref:IO@Position]: * * f = File.open('t.txt') * f.eof # => false @@ -3640,10 +3640,11 @@ io_write_nonblock(rb_execution_context_t *ec, VALUE io, VALUE str, VALUE ex) /* * call-seq: - * read(maxlen = nil) -> string or nil - * read(maxlen = nil, out_string) -> out_string or nil + * read(maxlen = nil, out_string = nil) -> new_string, out_string, or nil * - * Reads bytes from the stream (in binary mode): + * Reads bytes from the stream, (in binary mode); + * the stream must be opened for reading + * (see {Access Modes}[rdoc-ref:File@Access+Modes]): * * - If +maxlen+ is +nil+, reads all bytes. * - Otherwise reads +maxlen+ bytes, if available. @@ -4192,13 +4193,13 @@ rb_io_gets_internal(VALUE io) /* * call-seq: - * gets(sep = $/, **line_opts) -> string or nil - * gets(limit, **line_opts) -> string or nil - * gets(sep, limit, **line_opts) -> string or nil + * gets(sep = $/, chomp: false) -> string or nil + * gets(limit, chomp: false) -> string or nil + * gets(sep, limit, chomp: false) -> string or nil * * Reads and returns a line from the stream; * assigns the return value to $_. - * See {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]. + * See {Line IO}[rdoc-ref:IO@Line+IO]. * * With no arguments given, returns the next line * as determined by line separator $/, or +nil+ if none: @@ -4215,7 +4216,7 @@ rb_io_gets_internal(VALUE io) * With only string argument +sep+ given, * returns the next line as determined by line separator +sep+, * or +nil+ if none; - * see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]: + * see {Line Separator}[rdoc-ref:IO@Line+Separator]: * * f = File.new('t.txt') * f.gets('l') # => "First l" @@ -4236,7 +4237,7 @@ rb_io_gets_internal(VALUE io) * * With only integer argument +limit+ given, * limits the number of bytes in the line; - * see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]: + * see {Line Limit}[rdoc-ref:IO@Line+Limit]: * * # No more than one line. * File.open('t.txt') {|f| f.gets(10) } # => "First line" @@ -4250,8 +4251,8 @@ rb_io_gets_internal(VALUE io) * or +nil+ if none. * - But returns no more bytes than are allowed by the limit. * - * For all forms above, optional keyword arguments +line_opts+ specify - * {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]: + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted: * * f = File.open('t.txt') * # Chomp the lines. @@ -4280,8 +4281,8 @@ rb_io_gets_m(int argc, VALUE *argv, VALUE io) * call-seq: * lineno -> integer * - * Returns the current line number for the stream. - * See {Line Number}[rdoc-ref:io_streams.rdoc@Line+Number]. + * Returns the current line number for the stream; + * see {Line Number}[rdoc-ref:IO@Line+Number]. * */ @@ -4299,8 +4300,8 @@ rb_io_lineno(VALUE io) * call-seq: * lineno = integer -> integer * - * Sets and returns the line number for the stream. - * See {Line Number}[rdoc-ref:io_streams.rdoc@Line+Number]. + * Sets and returns the line number for the stream; + * see {Line Number}[rdoc-ref:IO@Line+Number]. * */ @@ -4317,12 +4318,14 @@ rb_io_set_lineno(VALUE io, VALUE lineno) /* * call-seq: - * readline(sep = $/, **line_opts) -> string - * readline(limit, **line_opts) -> string - * readline(sep, limit, **line_opts) -> string + * readline(sep = $/, chomp: false) -> string + * readline(limit, chomp: false) -> string + * readline(sep, limit, chomp: false) -> string * - * Reads a line as with IO#gets, but raises EOFError if already at end-of-file. + * Reads a line as with IO#gets, but raises EOFError if already at end-of-stream. * + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted. */ static VALUE @@ -4340,13 +4343,13 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); /* * call-seq: - * readlines(sep = $/, **line_opts) -> array - * readlines(limit, **line_opts) -> array - * readlines(sep, limit, **line_opts) -> array + * readlines(sep = $/, chomp: false) -> array + * readlines(limit, chomp: false) -> array + * readlines(sep, limit, chomp: false) -> array * * Reads and returns all remaining line from the stream; * does not modify $_. - * See {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]. + * See {Line IO}[rdoc-ref:IO@Line+IO]. * * With no arguments given, returns lines * as determined by line separator $/, or +nil+ if none: @@ -4360,7 +4363,7 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); * With only string argument +sep+ given, * returns lines as determined by line separator +sep+, * or +nil+ if none; - * see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]: + * see {Line Separator}[rdoc-ref:IO@Line+Separator]: * * f = File.new('t.txt') * f.readlines('li') @@ -4381,7 +4384,7 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); * * With only integer argument +limit+ given, * limits the number of bytes in each line; - * see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]: + * see {Line Limit}[rdoc-ref:IO@Line+Limit]: * * f = File.new('t.txt') * f.readlines(8) @@ -4394,8 +4397,8 @@ static VALUE io_readlines(const struct getline_arg *arg, VALUE io); * - Returns lines as determined by line separator +sep+. * - But returns no more bytes in a line than are allowed by the limit. * - * For all forms above, optional keyword arguments +line_opts+ specify - * {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]: + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted: * * f = File.new('t.txt') * f.readlines(chomp: true) @@ -4429,15 +4432,15 @@ io_readlines(const struct getline_arg *arg, VALUE io) /* * call-seq: - * each_line(sep = $/, **line_opts) {|line| ... } -> self - * each_line(limit, **line_opts) {|line| ... } -> self - * each_line(sep, limit, **line_opts) {|line| ... } -> self + * each_line(sep = $/, chomp: false) {|line| ... } -> self + * each_line(limit, chomp: false) {|line| ... } -> self + * each_line(sep, limit, chomp: false) {|line| ... } -> self * each_line -> enumerator * * Calls the block with each remaining line read from the stream; - * does nothing if already at end-of-file; * returns +self+. - * See {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]. + * Does nothing if already at end-of-stream; + * See {Line IO}[rdoc-ref:IO@Line+IO]. * * With no arguments given, reads lines * as determined by line separator $/: @@ -4457,7 +4460,7 @@ io_readlines(const struct getline_arg *arg, VALUE io) * * With only string argument +sep+ given, * reads lines as determined by line separator +sep+; - * see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]: + * see {Line Separator}[rdoc-ref:IO@Line+Separator]: * * f = File.new('t.txt') * f.each_line('li') {|line| p line } @@ -4493,7 +4496,7 @@ io_readlines(const struct getline_arg *arg, VALUE io) * * With only integer argument +limit+ given, * limits the number of bytes in each line; - * see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]: + * see {Line Limit}[rdoc-ref:IO@Line+Limit]: * * f = File.new('t.txt') * f.each_line(8) {|line| p line } @@ -4517,8 +4520,8 @@ io_readlines(const struct getline_arg *arg, VALUE io) * - Calls with the next line as determined by line separator +sep+. * - But returns no more bytes than are allowed by the limit. * - * For all forms above, optional keyword arguments +line_opts+ specify - * {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]: + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted: * * f = File.new('t.txt') * f.each_line(chomp: true) {|line| p line } @@ -4560,7 +4563,7 @@ rb_io_each_line(int argc, VALUE *argv, VALUE io) * each_byte -> enumerator * * Calls the given block with each byte (0..255) in the stream; returns +self+. - * See {Byte IO}[rdoc-ref:io_streams.rdoc@Byte+IO]. + * See {Byte IO}[rdoc-ref:IO@Byte+IO]. * * f = File.new('t.rus') * a = [] @@ -4708,7 +4711,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) * each_char -> enumerator * * Calls the given block with each character in the stream; returns +self+. - * See {Character IO}[rdoc-ref:io_streams.rdoc@Character+IO]. + * See {Character IO}[rdoc-ref:IO@Character+IO]. * * f = File.new('t.rus') * a = [] @@ -4869,8 +4872,8 @@ rb_io_each_codepoint(VALUE io) * getc -> character or nil * * Reads and returns the next 1-character string from the stream; - * returns +nil+ if already at end-of-file. - * See {Character IO}[rdoc-ref:io_streams.rdoc@Character+IO]. + * returns +nil+ if already at end-of-stream. + * See {Character IO}[rdoc-ref:IO@Character+IO]. * * f = File.open('t.txt') * f.getc # => "F" @@ -4902,8 +4905,8 @@ rb_io_getc(VALUE io) * readchar -> string * * Reads and returns the next 1-character string from the stream; - * raises EOFError if already at end-of-file. - * See {Character IO}[rdoc-ref:io_streams.rdoc@Character+IO]. + * raises EOFError if already at end-of-stream. + * See {Character IO}[rdoc-ref:IO@Character+IO]. * * f = File.open('t.txt') * f.readchar # => "F" @@ -4932,8 +4935,8 @@ rb_io_readchar(VALUE io) * getbyte -> integer or nil * * Reads and returns the next byte (in range 0..255) from the stream; - * returns +nil+ if already at end-of-file. - * See {Byte IO}[rdoc-ref:io_streams.rdoc@Byte+IO]. + * returns +nil+ if already at end-of-stream. + * See {Byte IO}[rdoc-ref:IO@Byte+IO]. * * f = File.open('t.txt') * f.getbyte # => 70 @@ -4943,7 +4946,6 @@ rb_io_readchar(VALUE io) * f.close * * Related: IO#readbyte (may raise EOFError). - * */ VALUE @@ -4977,8 +4979,8 @@ rb_io_getbyte(VALUE io) * readbyte -> integer * * Reads and returns the next byte (in range 0..255) from the stream; - * raises EOFError if already at end-of-file. - * See {Byte IO}[rdoc-ref:io_streams.rdoc@Byte+IO]. + * raises EOFError if already at end-of-stream. + * See {Byte IO}[rdoc-ref:IO@Byte+IO]. * * f = File.open('t.txt') * f.readbyte # => 70 @@ -5009,7 +5011,7 @@ rb_io_readbyte(VALUE io) * * Pushes back ("unshifts") the given data onto the stream's buffer, * placing the data so that it is next to be read; returns +nil+. - * See {Byte IO}[rdoc-ref:io_streams.rdoc@Byte+IO]. + * See {Byte IO}[rdoc-ref:IO@Byte+IO]. * * Note that: * @@ -5070,7 +5072,7 @@ rb_io_ungetbyte(VALUE io, VALUE b) * * Pushes back ("unshifts") the given data onto the stream's buffer, * placing the data so that it is next to be read; returns +nil+. - * See {Character IO}[rdoc-ref:io_streams.rdoc@Character+IO]. + * See {Character IO}[rdoc-ref:IO@Character+IO]. * * Note that: * @@ -5155,8 +5157,10 @@ rb_io_ungetc(VALUE io, VALUE c) * Returns +true+ if the stream is associated with a terminal device (tty), * +false+ otherwise: * - * File.new('t.txt').isatty #=> false - * File.new('/dev/tty').isatty #=> true + * f = File.new('t.txt').isatty #=> false + * f.close + * f = File.new('/dev/tty').isatty #=> true + * f.close * * IO#tty? is an alias for IO#isatty. * @@ -5639,6 +5643,7 @@ rb_io_close(VALUE io) * * Closes the stream for both reading and writing * if open for either or both; returns +nil+. + * See {Open and Closed Streams}[rdoc-ref:IO@Open+and+Closed+Streams]. * * If the stream is open for writing, flushes any buffered writes * to the operating system before closing. @@ -5710,7 +5715,8 @@ io_close(VALUE io) * closed? -> true or false * * Returns +true+ if the stream is closed for both reading and writing, - * +false+ otherwise: + * +false+ otherwise. + * See {Open and Closed Streams}[rdoc-ref:IO@Open+and+Closed+Streams]. * * IO.popen('ruby', 'r+') do |pipe| * puts pipe.closed? @@ -5726,8 +5732,6 @@ io_close(VALUE io) * false * true * - * See also {Open and Closed Streams}[rdoc-ref:io_streams.rdoc@Open+and+Closed+Streams]. - * * Related: IO#close_read, IO#close_write, IO#close. */ @@ -5757,6 +5761,7 @@ rb_io_closed(VALUE io) * * Closes the stream for reading if open for reading; * returns +nil+. + * See {Open and Closed Streams}[rdoc-ref:IO@Open+and+Closed+Streams]. * * If the stream was opened by IO.popen and is also closed for writing, * sets global variable $? (child exit status). @@ -5779,8 +5784,6 @@ rb_io_closed(VALUE io) * pid 14748 exit 0 * true * - * See also {Open and Closed Streams}[rdoc-ref:io_streams.rdoc@Open+and+Closed+Streams]. - * * Related: IO#close, IO#close_write, IO#closed?. */ @@ -5830,7 +5833,8 @@ rb_io_close_read(VALUE io) * close_write -> nil * * Closes the stream for writing if open for writing; - * returns +nil+: + * returns +nil+. + * See {Open and Closed Streams}[rdoc-ref:IO@Open+and+Closed+Streams]. * * Flushes any buffered writes to the operating system before closing. * @@ -5853,8 +5857,6 @@ rb_io_close_read(VALUE io) * pid 15044 exit 0 * true * - * See also {Open and Closed Streams}[rdoc-ref:io_streams.rdoc@Open+and+Closed+Streams]. - * * Related: IO#close, IO#close_read, IO#closed?. */ @@ -6066,7 +6068,7 @@ pread_internal_call(VALUE arg) * * - Reads at the given +offset+ (in bytes). * - Disregards, and does not modify, the stream's position - * (see {Position}[rdoc-ref:io_streams.rdoc@Position]). + * (see {Position}[rdoc-ref:IO@Position]). * - Bypasses any user space buffering in the stream. * * Because this method does not disturb the stream's state @@ -6142,7 +6144,7 @@ internal_pwrite_func(void *ptr) * * - Writes at the given +offset+ (in bytes). * - Disregards, and does not modify, the stream's position - * (see {Position}[rdoc-ref:io_streams.rdoc@Position]). + * (see {Position}[rdoc-ref:IO@Position]). * - Bypasses any user space buffering in the stream. * * Because this method does not disturb the stream's state @@ -8577,7 +8579,7 @@ deprecated_str_setter(VALUE val, ID id, VALUE *var) * Writes the given objects to the stream; returns +nil+. * Appends the output record separator $OUTPUT_RECORD_SEPARATOR * ($\\), if it is not +nil+. - * See {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]. + * See {Line IO}[rdoc-ref:IO@Line+IO]. * * With argument +objects+ given, for each object: * @@ -8715,7 +8717,7 @@ rb_f_print(int argc, const VALUE *argv, VALUE _) * putc(object) -> object * * Writes a character to the stream. - * See {Character IO}[rdoc-ref:io_streams.rdoc@Character+IO]. + * See {Character IO}[rdoc-ref:IO@Character+IO]. * * If +object+ is numeric, converts to integer if necessary, * then writes the character whose code is the @@ -8819,7 +8821,7 @@ io_puts_ary(VALUE ary, VALUE out, int recur) * returns +nil+.\ * Writes a newline after each that does not already end with a newline sequence. * If called without arguments, writes a newline. - * See {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]. + * See {Line IO}[rdoc-ref:IO@Line+IO]. * * Note that each added newline is the character "\n", * not the output record separator ($\\). @@ -9424,31 +9426,32 @@ rb_io_set_encoding_by_bom(VALUE io) * * Argument +path+ must be a valid file path: * - * File.new('/etc/fstab') - * File.new('t.txt') + * f = File.new('/etc/fstab') + * f.close + * f = File.new('t.txt') + * f.close * * Optional argument +mode+ (defaults to 'r') must specify a valid mode; * see {Access Modes}[rdoc-ref:File@Access+Modes]: * - * File.new('t.tmp', 'w') - * File.new('t.tmp', File::RDONLY) + * f = File.new('t.tmp', 'w') + * f.close + * f = File.new('t.tmp', File::RDONLY) + * f.close * * Optional argument +perm+ (defaults to 0666) must specify valid permissions * see {File Permissions}[rdoc-ref:File@File+Permissions]: * - * File.new('t.tmp', File::CREAT, 0644) - * File.new('t.tmp', File::CREAT, 0444) + * f = File.new('t.tmp', File::CREAT, 0644) + * f.close + * f = File.new('t.tmp', File::CREAT, 0444) + * f.close * * Optional keyword arguments +opts+ specify: * * - {Open Options}[rdoc-ref:IO@Open+Options]. * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. * - * Examples: - * - * File.new('t.tmp', autoclose: true) - * File.new('t.tmp', internal_encoding: nil) - * */ static VALUE @@ -10231,9 +10234,9 @@ static VALUE argf_readline(int, VALUE *, VALUE); /* * call-seq: - * readline(sep = $/, **line_opts) -> string - * readline(limit, **line_opts) -> string - * readline(sep, limit, **line_opts) -> string + * readline(sep = $/, chomp: false) -> string + * readline(limit, chomp: false) -> string + * readline(sep, limit, chomp: false) -> string * * Equivalent to method Kernel#gets, except that it raises an exception * if called at end-of-stream: @@ -10242,6 +10245,8 @@ static VALUE argf_readline(int, VALUE *, VALUE); * ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] * in `readline': end of file reached (EOFError) * + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted. */ static VALUE @@ -10290,18 +10295,18 @@ static VALUE argf_readlines(int, VALUE *, VALUE); /* * call-seq: - * readlines(sep = $/, **line_opts) -> array - * readlines(limit, **line_opts) -> array - * readlines(sep, limit, **line_opts) -> array + * readlines(sep = $/, chomp: false, **enc_opts) -> array + * readlines(limit, chomp: false, **enc_opts) -> array + * readlines(sep, limit, chomp: false, **enc_opts) -> array * * Returns an array containing the lines returned by calling - * Kernel#gets until the end-of-file is reached; - * (see {Line IO}[rdoc-ref:io_streams.rdoc@Line+IO]). + * Kernel#gets until the end-of-stream is reached; + * (see {Line IO}[rdoc-ref:IO@Line+IO]). * * With only string argument +sep+ given, * returns the remaining lines as determined by line separator +sep+, * or +nil+ if none; - * see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]: + * see {Line Separator}[rdoc-ref:IO@Line+Separator]: * * # Default separator. * $ cat t.txt | ruby -e "p readlines" @@ -10321,7 +10326,7 @@ static VALUE argf_readlines(int, VALUE *, VALUE); * * With only integer argument +limit+ given, * limits the number of bytes in the line; - * see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]: + * see {Line Limit}[rdoc-ref:IO@Line+Limit]: * * $cat t.txt | ruby -e "p readlines 10" * ["First line", "\n", "Second lin", "e\n", "\n", "Fourth lin", "e\n", "Fifth line", "\n"] @@ -10333,18 +10338,17 @@ static VALUE argf_readlines(int, VALUE *, VALUE); * ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] * * With arguments +sep+ and +limit+ given, combines the two behaviors; - * see {Line Separator and Line Limit}[rdoc-ref:io_streams.rdoc@Line+Separator+and+Line+Limit]. + * see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]. * - * For all forms above, optional keyword arguments specify: - * - * - {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]. - * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. - * - * Examples: + * Optional keyword argument +chomp+ specifies whether line separators + * are to be omitted: * * $ cat t.txt | ruby -e "p readlines(chomp: true)" * ["First line", "Second line", "", "Fourth line", "Fifth line"] * + * Optional keyword arguments +enc_opts+ specify encoding options; + * see {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. + * */ static VALUE @@ -11828,7 +11832,7 @@ io_s_foreach(VALUE v) * For both forms, command and path, the remaining arguments are the same. * * With argument +sep+ given, parses lines as determined by that line separator - * (see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator]): * * File.foreach('t.txt', 'li') {|line| p line } * @@ -11851,7 +11855,7 @@ io_s_foreach(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]): + * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): * * File.foreach('t.txt', 7) {|line| p line } * @@ -11870,13 +11874,13 @@ io_s_foreach(VALUE v) * With arguments +sep+ and +limit+ given, * parses lines as determined by the given * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:io_streams.rdoc@Line+Separator+and+Line+Limit]): + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): * * Optional keyword arguments +opts+ specify: * * - {Open Options}[rdoc-ref:IO@Open+Options]. * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. - * - {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]. + * - {Line Options}[rdoc-ref:IO@Line+Options]. * * Returns an Enumerator if no block is given. * @@ -11946,7 +11950,7 @@ io_s_readlines(VALUE v) * For both forms, command and path, the remaining arguments are the same. * * With argument +sep+ given, parses lines as determined by that line separator - * (see {Line Separator}[rdoc-ref:io_streams.rdoc@Line+Separator]): + * (see {Line Separator}[rdoc-ref:IO@Line+Separator]): * * # Ordinary separator. * IO.readlines('t.txt', 'li') @@ -11960,7 +11964,7 @@ io_s_readlines(VALUE v) * * With argument +limit+ given, parses lines as determined by the default * line separator and the given line-length limit - * (see {Line Limit}[rdoc-ref:io_streams.rdoc@Line+Limit]): + * (see {Line Limit}[rdoc-ref:IO@Line+Limit]): * * IO.readlines('t.txt', 7) * # => ["First l", "ine\n", "Second ", "line\n", "\n", "Third l", "ine\n", "Fourth ", "line\n"] @@ -11968,13 +11972,13 @@ io_s_readlines(VALUE v) * With arguments +sep+ and +limit+ given, * parses lines as determined by the given * line separator and the given line-length limit - * (see {Line Separator and Line Limit}[rdoc-ref:io_streams.rdoc@Line+Separator+and+Line+Limit]): + * (see {Line Separator and Line Limit}[rdoc-ref:IO@Line+Separator+and+Line+Limit]): * * Optional keyword arguments +opts+ specify: * * - {Open Options}[rdoc-ref:IO@Open+Options]. * - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. - * - {Line Options}[rdoc-ref:io_streams.rdoc@Line+Options]. + * - {Line Options}[rdoc-ref:IO@Line+Options]. * */ @@ -14687,10 +14691,10 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * - A position, which determines where in the stream the next * read or write is to occur; - * see {Position}[rdoc-ref:io_streams.rdoc@Position]. + * see {Position}[rdoc-ref:IO@Position]. * - A line number, which is a special, line-oriented, "position" * (different from the position mentioned above); - * see {Line Number}[rdoc-ref:io_streams.rdoc@Line+Number]. + * see {Line Number}[rdoc-ref:IO@Line+Number]. * * == Extension io/console * @@ -14725,6 +14729,326 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Also available are the options offered in String#encode, * which may control conversion between external internal encoding. * + * == Basic \IO + * + * You can perform basic stream \IO with these methods, + * which typically operate on multi-byte strings: + * + * - IO#read: Reads and returns some or all of the remaining bytes from the stream. + * - IO#write: Writes zero or more strings to the stream; + * each given object that is not already a string is converted via +to_s+. + * + * === Position + * + * An \IO stream has a nonnegative integer _position_, + * which is the byte offset at which the next read or write is to occur. + * A new stream has position zero (and line number zero); + * method +rewind+ resets the position (and line number) to zero. + * + * The relevant methods: + * + * - IO#tell (aliased as +#pos+): Returns the current position (in bytes) in the stream. + * - IO#pos=: Sets the position of the stream to a given integer +new_position+ (in bytes). + * - IO#seek: Sets the position of the stream to a given integer +offset+ (in bytes), + * relative to a given position +whence+ + * (indicating the beginning, end, or current position). + * - IO#rewind: Positions the stream at the beginning (also resetting the line number). + * + * === Open and Closed Streams + * + * A new \IO stream may be open for reading, open for writing, or both. + * + * A stream is automatically closed when claimed by the garbage collector. + * + * Attempted reading or writing on a closed stream raises an exception. + * + * The relevant methods: + * + * - IO#close: Closes the stream for both reading and writing. + * - IO#close_read: Closes the stream for reading. + * - IO#close_write: Closes the stream for writing. + * - IO#closed?: Returns whether the stream is closed. + * + * === End-of-Stream + * + * You can query whether a stream is positioned at its end: + * + * - IO#eof? (also aliased as +#eof+): Returns whether the stream is at end-of-stream. + * + * You can reposition to end-of-stream by using method IO#seek: + * + * f = File.new('t.txt') + * f.eof? # => false + * f.seek(0, :END) + * f.eof? # => true + * f.close + * + * Or by reading all stream content (which is slower than using IO#seek): + * + * f.rewind + * f.eof? # => false + * f.read # => "First line\nSecond line\n\nFourth line\nFifth line\n" + * f.eof? # => true + * + * == Line \IO + * + * You can read an \IO stream line-by-line using these methods: + * + * - IO#each_line: Reads each remaining line, passing it to the given block. + * - IO#gets: Returns the next line. + * - IO#readline: Like #gets, but raises an exception at end-of-stream. + * - IO#readlines: Returns all remaining lines in an array. + * + * Each of these reader methods accepts: + * + * - An optional line separator, +sep+; + * see {Line Separator}[rdoc-ref:IO@Line+Separator]. + * - An optional line-size limit, +limit+; + * see {Line Limit}[rdoc-ref:IO@Line+Limit]. + * + * For each of these reader methods, reading may begin mid-line, + * depending on the stream's position; + * see {Position}[rdoc-ref:IO@Position]: + * + * f = File.new('t.txt') + * f.pos = 27 + * f.each_line {|line| p line } + * f.close + * + * Output: + * + * "rth line\n" + * "Fifth line\n" + * + * You can write to an \IO stream line-by-line using this method: + * + * - IO#puts: Writes objects to the stream. + * + * === Line Separator + * + * Each of these methods uses a line separator, + * which is the string that delimits lines: + * + * - IO.foreach. + * - IO.readlines. + * - IO#each_line. + * - IO#gets. + * - IO#readline. + * - IO#readlines. + * + * The default line separator is the given by the global variable $/, + * whose value is by default "\n". + * The line to be read next is all data from the current position + * to the next line separator: + * + * f = File.new('t.txt') + * f.gets # => "First line\n" + * f.gets # => "Second line\n" + * f.gets # => "\n" + * f.gets # => "Fourth line\n" + * f.gets # => "Fifth line\n" + * f.close + * + * You can specify a different line separator: + * + * f = File.new('t.txt') + * f.gets('l') # => "First l" + * f.gets('li') # => "ine\nSecond li" + * f.gets('lin') # => "ne\n\nFourth lin" + * f.gets # => "e\n" + * f.close + * + * There are two special line separators: + * + * - +nil+: The entire stream is read into a single string: + * + * f = File.new('t.txt') + * f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n" + * f.close + * + * - '' (the empty string): The next "paragraph" is read + * (paragraphs being separated by two consecutive line separators): + * + * f = File.new('t.txt') + * f.gets('') # => "First line\nSecond line\n\n" + * f.gets('') # => "Fourth line\nFifth line\n" + * f.close + * + * === Line Limit + * + * Each of these methods uses a line limit, + * which specifies that the number of bytes returned may not be (much) longer + * than the given +limit+; + * + * - IO.foreach. + * - IO.readlines. + * - IO#each_line. + * - IO#gets. + * - IO#readline. + * - IO#readlines. + * + * A multi-byte character will not be split, and so a line may be slightly longer + * than the given limit. + * + * If +limit+ is not given, the line is determined only by +sep+. + * + * # Text with 1-byte characters. + * File.open('t.txt') {|f| f.gets(1) } # => "F" + * File.open('t.txt') {|f| f.gets(2) } # => "Fi" + * File.open('t.txt') {|f| f.gets(3) } # => "Fir" + * File.open('t.txt') {|f| f.gets(4) } # => "Firs" + * # No more than one line. + * File.open('t.txt') {|f| f.gets(10) } # => "First line" + * File.open('t.txt') {|f| f.gets(11) } # => "First line\n" + * File.open('t.txt') {|f| f.gets(12) } # => "First line\n" + * + * # Text with 2-byte characters, which will not be split. + * File.open('t.rus') {|f| f.gets(1).size } # => 1 + * File.open('t.rus') {|f| f.gets(2).size } # => 1 + * File.open('t.rus') {|f| f.gets(3).size } # => 2 + * File.open('t.rus') {|f| f.gets(4).size } # => 2 + * + * === Line Separator and Line Limit + * + * With arguments +sep+ and +limit+ given, + * combines the two behaviors: + * + * - Returns the next line as determined by line separator +sep+. + * - But returns no more bytes than are allowed by the limit. + * + * Example: + * + * File.open('t.txt') {|f| f.gets('li', 20) } # => "First li" + * File.open('t.txt') {|f| f.gets('li', 2) } # => "Fi" + * + * === Line Number + * + * A readable \IO stream has a non-negative integer line number. + * + * The relevant methods: + * + * - IO#lineno: Returns the line number. + * - IO#lineno=: Resets and returns the line number. + * + * Unless modified by a call to method IO#lineno=, + * the line number is the number of lines read + * by certain line-oriented methods, + * according to the given line separator +sep+: + * + * - IO.foreach: Increments the line number on each call to the block. + * - IO#each_line: Increments the line number on each call to the block. + * - IO#gets: Increments the line number. + * - IO#readline: Increments the line number. + * - IO#readlines: Increments the line number for each line read. + * + * A new stream is initially has line number zero (and position zero); + * method +rewind+ resets the line number (and position) to zero: + * + * f = File.new('t.txt') + * f.lineno # => 0 + * f.gets # => "First line\n" + * f.lineno # => 1 + * f.rewind + * f.lineno # => 0 + * f.close + * + * Reading lines from a stream usually changes its line number: + * + * f = File.new('t.txt', 'r') + * f.lineno # => 0 + * f.readline # => "This is line one.\n" + * f.lineno # => 1 + * f.readline # => "This is the second line.\n" + * f.lineno # => 2 + * f.readline # => "Here's the third line.\n" + * f.lineno # => 3 + * f.eof? # => true + * f.close + * + * Iterating over lines in a stream usually changes its line number: + * + * File.open('t.txt') do |f| + * f.each_line do |line| + * p "position=#{f.pos} eof?=#{f.eof?} lineno=#{f.lineno}" + * end + * end + * + * Output: + * + * "position=11 eof?=false lineno=1" + * "position=23 eof?=false lineno=2" + * "position=24 eof?=false lineno=3" + * "position=36 eof?=false lineno=4" + * "position=47 eof?=true lineno=5" + * + * Unlike the stream's {position}[rdoc-ref:IO@Position], + * the line number does not affect where the next read or write will occur: + * + * f = File.new('t.txt') + * f.lineno = 1000 + * f.lineno # => 1000 + * f.gets # => "First line\n" + * f.lineno # => 1001 + * f.close + * + * Associated with the line number is the global variable $.: + * + * - When a stream is opened, $. is not set; + * its value is left over from previous activity in the process: + * + * $. = 41 + * f = File.new('t.txt') + * $. = 41 + * # => 41 + * f.close + * + * - When a stream is read, #. is set to the line number for that stream: + * + * f0 = File.new('t.txt') + * f1 = File.new('t.dat') + * f0.readlines # => ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] + * $. # => 5 + * f1.readlines # => ["\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"] + * $. # => 1 + * f0.close + * f1.close + * + * - Methods IO#rewind and IO#seek do not affect $.: + * + * f = File.new('t.txt') + * f.readlines # => ["First line\n", "Second line\n", "\n", "Fourth line\n", "Fifth line\n"] + * $. # => 5 + * f.rewind + * f.seek(0, :SET) + * $. # => 5 + * f.close + * + * == Character \IO + * + * You can process an \IO stream character-by-character using these methods: + * + * - IO#getc: Reads and returns the next character from the stream. + * - IO#readchar: Like #getc, but raises an exception at end-of-stream. + * - IO#ungetc: Pushes back ("unshifts") a character or integer onto the stream. + * - IO#putc: Writes a character to the stream. + * - IO#each_char: Reads each remaining character in the stream, + * passing the character to the given block. + * == Byte \IO + * + * You can process an \IO stream byte-by-byte using these methods: + * + * - IO#getbyte: Returns the next 8-bit byte as an integer in range 0..255. + * - IO#readbyte: Like #getbyte, but raises an exception if at end-of-stream. + * - IO#ungetbyte: Pushes back ("unshifts") a byte back onto the stream. + * - IO#each_byte: Reads each remaining byte in the stream, + * passing the byte to the given block. + * + * == Codepoint \IO + * + * You can process an \IO stream codepoint-by-codepoint: + * + * - IO#each_codepoint: Reads each remaining codepoint, passing it to the given block. + * * == What's Here * * First, what's elsewhere. \Class \IO: @@ -14772,11 +15096,11 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * - #read_nonblock: the next _n_ bytes read from +self+ for a given _n_, * in non-block mode. * - #readbyte: Returns the next byte read from +self+; - * same as #getbyte, but raises an exception on end-of-file. + * same as #getbyte, but raises an exception on end-of-stream. * - #readchar: Returns the next character read from +self+; - * same as #getc, but raises an exception on end-of-file. + * same as #getc, but raises an exception on end-of-stream. * - #readline: Returns the next line read from +self+; - * same as #getline, but raises an exception of end-of-file. + * same as #getline, but raises an exception of end-of-stream. * - #readlines: Returns an array of all lines read read from +self+. * - #readpartial: Returns up to the given number of bytes from +self+. * @@ -14836,7 +15160,7 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * - #binmode?: Returns whether +self+ is in binary mode. * - #close_on_exec?: Returns the close-on-exec flag for +self+. * - #closed?: Returns whether +self+ is closed. - * - #eof? (aliased as #eof): Returns whether +self+ is at end-of-file. + * - #eof? (aliased as #eof): Returns whether +self+ is at end-of-stream. * - #external_encoding: Returns the external encoding object for +self+. * - #fileno (aliased as #to_i): Returns the integer file descriptor for +self+ * - #internal_encoding: Returns the internal encoding object for +self+. From 9ab978b7183d92cf1451be5dce7c7cc72222a39a Mon Sep 17 00:00:00 2001 From: "Eileen M. Uchitelle" Date: Wed, 9 Nov 2022 17:09:16 -0500 Subject: [PATCH 067/104] Fix exit locations dump (#6703) While I was working on my RubyConf talk for tracing yjit exit locations I realized that there were exits from the dump code included in the stats data. For example I saw 224 interp leave exits for a simple script that should have had 1 or 2. I realized that the dump code needs to be called _after_ the stats are generated, otherwise the dump code will be counted in the stats exits. I've added a `_dump_locations` method to the `at_exit` for stats generation to ensure that it runs last. I've updated the documentation to add a note that if you call `dump_exit_locations` directly, your stats will include the dump code exits as well. --- yjit.rb | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/yjit.rb b/yjit.rb index 5a00dcca575045..21f2eea4def39c 100644 --- a/yjit.rb +++ b/yjit.rb @@ -112,13 +112,25 @@ def self.exit_locations # # Usage: # + # If `--yjit-exit-locations` is passed, a file named + # "yjit_exit_locations.dump" will automatically be generated. + # + # If you want to collect traces manually, call `dump_exit_locations` + # directly. + # + # Note that calling this in a script will generate stats after the + # dump is created, so the stats data may include exits from the + # dump itself. + # # In a script call: # - # RubyVM::YJIT.dump_exit_locations("my_file.dump") + # at_exit do + # RubyVM::YJIT.dump_exit_locations("my_file.dump") + # end # # Then run the file with the following options: # - # ruby --yjit --yjit-stats --yjit-trace-exits test.rb + # ruby --yjit --yjit-trace-exits test.rb # # Once the code is done running, use Stackprof to read the dump file. # See Stackprof documentation for options. @@ -196,12 +208,24 @@ def self.simulate_oom! # Avoid calling a method here to not interfere with compilation tests if Primitive.rb_yjit_stats_enabled_p - at_exit { _print_stats } + at_exit do + _print_stats + _dump_locations + end end class << self private + def _dump_locations + return unless trace_exit_locations_enabled? + + filename = "yjit_exit_locations.dump" + dump_exit_locations(filename) + + $stderr.puts("YJIT exit locations dumped to `#{filename}`.") + end + # Format and print out counters def _print_stats stats = runtime_stats From 5a4d9f91c4678a4b821fc9cc3be77058b1235bf9 Mon Sep 17 00:00:00 2001 From: yui-knk Date: Thu, 10 Nov 2022 08:06:18 +0900 Subject: [PATCH 068/104] Update required Bison version in building_ruby.md [ci skip] See [Feature #19068] for the context. --- doc/contributing/building_ruby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/contributing/building_ruby.md b/doc/contributing/building_ruby.md index 322ffbb0644206..33ed732a2bc458 100644 --- a/doc/contributing/building_ruby.md +++ b/doc/contributing/building_ruby.md @@ -6,7 +6,7 @@ * C compiler * autoconf - 2.67 or later - * bison - 2.0 or later + * bison - 3.0 or later * gperf - 3.0.3 or later * ruby - 2.7 or later From b7504af8fc73b92ae13b4cf3a65d2417d219ecb4 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 22:04:28 +0900 Subject: [PATCH 069/104] Preprocess for older bison is no longer needed --- common.mk | 2 -- ext/ripper/depend | 2 -- tool/pure_parser.rb | 24 ------------------------ 3 files changed, 28 deletions(-) delete mode 100755 tool/pure_parser.rb diff --git a/common.mk b/common.mk index 88934378f4bf35..81f99b14583045 100644 --- a/common.mk +++ b/common.mk @@ -956,8 +956,6 @@ PHONY: {$(srcdir)}.y.c: $(ECHO) generating $@ $(Q)$(BASERUBY) $(tooldir)/id2token.rb $(SRC_FILE) > parse.tmp.y - $(Q)$(BASERUBY) $(tooldir)/pure_parser.rb parse.tmp.y $(YACC) - $(Q)$(RM) parse.tmp.y.bak $(Q)$(YACC) -d $(YFLAGS) -o y.tab.c parse.tmp.y $(Q)$(RM) parse.tmp.y $(Q)sed -f $(tooldir)/ytab.sed -e "/^#/s|parse\.tmp\.[iy]|$(SRC_FILE)|" -e "/^#/s!y\.tab\.c!$@!" y.tab.c > $@.new diff --git a/ext/ripper/depend b/ext/ripper/depend index 15c557a8efa052..c77e4e1b7a2d57 100644 --- a/ext/ripper/depend +++ b/ext/ripper/depend @@ -20,8 +20,6 @@ static: check ripper.y: $(srcdir)/tools/preproc.rb $(srcdir)/tools/dsl.rb $(top_srcdir)/parse.y $(top_srcdir)/defs/id.def $(ECHO) extracting $@ from $(top_srcdir)/parse.y $(Q) $(RUBY) $(top_srcdir)/tool/id2token.rb $(top_srcdir)/parse.y > ripper.tmp.y - $(Q) $(RUBY) $(top_srcdir)/tool/pure_parser.rb ripper.tmp.y $(BISON) - $(Q) $(RM) ripper.tmp.y.bak $(Q) $(RUBY) $(srcdir)/tools/preproc.rb ripper.tmp.y --output=$@ $(Q) $(RM) ripper.tmp.y diff --git a/tool/pure_parser.rb b/tool/pure_parser.rb deleted file mode 100755 index 21c87cc5d61b80..00000000000000 --- a/tool/pure_parser.rb +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/ruby -pi.bak -BEGIN { - # pathological setting - ENV['LANG'] = ENV['LC_MESSAGES'] = ENV['LC_ALL'] = 'C' - - require_relative 'lib/colorize' - - colorize = Colorize.new - file = ARGV.shift - begin - version = IO.popen(ARGV+%w[--version], "rb", &:read) - rescue Errno::ENOENT - abort "Failed to run `#{colorize.fail ARGV.join(' ')}'; You may have to install it." - end - unless /\Abison .* (\d+)\.\d+/ =~ version - puts colorize.fail("not bison") - exit - end - exit if $1.to_i >= 3 - ARGV.clear - ARGV.push(file) -} -$_.sub!(/^%define\s+api\.pure/, '%pure-parser') -$_.sub!(/^%define\s+.*/, '') From 0ae5de1a5ddcca407dd2ba30ca5d5f3d7c3ea4bc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 10 Nov 2022 09:11:20 +0900 Subject: [PATCH 070/104] Adjust indents [ci skip] --- insns.def | 2 +- variable.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/insns.def b/insns.def index cde327e430b7cb..9f5ee7095a766d 100644 --- a/insns.def +++ b/insns.def @@ -697,7 +697,7 @@ defined { val = Qnil; if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { - val = pushval; + val = pushval; } } diff --git a/variable.c b/variable.c index fd0c30b9b53315..c9c4be1c4355ba 100644 --- a/variable.c +++ b/variable.c @@ -1183,7 +1183,8 @@ rb_ivar_lookup(VALUE obj, ID id, VALUE undef) shape_id = ivtbl->shape_id; #endif ivar_list = ivtbl->ivptr; - } else { + } + else { return undef; } break; @@ -2051,7 +2052,8 @@ autoload_data(VALUE mod, ID id) if (RB_TYPE_P(mod, T_ICLASS)) { if (FL_TEST_RAW(mod, RICLASS_IS_ORIGIN)) { return 0; - } else { + } + else { mod = RBASIC(mod)->klass; } } @@ -2262,7 +2264,8 @@ autoload_table_lookup_or_create(VALUE module) VALUE autoload_table_value = rb_ivar_lookup(module, autoload, 0); if (autoload_table_value) { return check_autoload_table(autoload_table_value); - } else { + } + else { autoload_table_value = TypedData_Wrap_Struct(0, &autoload_table_type, 0); rb_class_ivar_set(module, autoload, autoload_table_value); return (DATA_PTR(autoload_table_value) = st_init_numtable()); @@ -3492,7 +3495,8 @@ cvar_lookup_at(VALUE klass, ID id, st_data_t *v) if (RB_TYPE_P(klass, T_ICLASS)) { if (FL_TEST_RAW(klass, RICLASS_IS_ORIGIN)) { return 0; - } else { + } + else { // check the original module klass = RBASIC(klass)->klass; } @@ -3879,7 +3883,8 @@ rb_class_ivar_set(VALUE obj, ID key, VALUE value) RCLASS_IVPTR(obj)[idx] = value; RB_OBJ_WRITTEN(obj, Qundef, value); - } else { + } + else { // Creating and setting a new instance variable // Move to a shape which fits the new ivar From 6eaed20e145c1ab4c9c4ccf669d0ebea934f4d36 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 8 Nov 2022 09:06:21 +0900 Subject: [PATCH 071/104] Add version to the interface of Random extensions --- ext/-test-/random/bad_version.c | 132 ++++++++++++++++++++++++++ ext/-test-/random/depend | 160 ++++++++++++++++++++++++++++++++ include/ruby/random.h | 33 +++++++ random.c | 9 ++ test/-ext-/test_random.rb | 26 +++++- 5 files changed, 356 insertions(+), 4 deletions(-) create mode 100644 ext/-test-/random/bad_version.c diff --git a/ext/-test-/random/bad_version.c b/ext/-test-/random/bad_version.c new file mode 100644 index 00000000000000..b6bf2ac5af18a7 --- /dev/null +++ b/ext/-test-/random/bad_version.c @@ -0,0 +1,132 @@ +#include "ruby/random.h" + +#if RUBY_RANDOM_INTERFACE_VERSION_MAJOR < RUBY_RANDOM_INTERFACE_VERSION_MAJOR_MAX +# define DEFINE_VERSION_MAX 1 +#else +# define DEFINE_VERSION_MAX 0 +#endif + +NORETURN(static void must_not_reach(void)); +static void +must_not_reach(void) +{ + rb_raise(rb_eTypeError, "must not reach"); +} + +NORETURN(static void bad_version_init(rb_random_t *, const uint32_t *, size_t)); +static void +bad_version_init(rb_random_t *rnd, const uint32_t *buf, size_t len) +{ + must_not_reach(); +} + +NORETURN(static void bad_version_get_bytes(rb_random_t *, void *, size_t)); +static void +bad_version_get_bytes(rb_random_t *rnd, void *p, size_t n) +{ + must_not_reach(); +} + +NORETURN(static uint32_t bad_version_get_int32(rb_random_t *)); +static uint32_t +bad_version_get_int32(rb_random_t *rnd) +{ + must_not_reach(); + UNREACHABLE_RETURN(0); +} + +static VALUE +bad_version_alloc(VALUE klass, const rb_data_type_t *type) +{ + rb_random_t *rnd; + VALUE obj = TypedData_Make_Struct(klass, rb_random_t, type, rnd); + rb_random_base_init(rnd); + return obj; +} + +/* version 0 */ +static const rb_random_interface_t random_version_zero_if; + +static rb_random_data_type_t version_zero_type = { + "random/version_zero", + { + rb_random_mark, + RUBY_TYPED_DEFAULT_FREE, + }, + RB_RANDOM_PARENT, + (void *)&random_version_zero_if, + RUBY_TYPED_FREE_IMMEDIATELY +}; + +static VALUE +version_zero_alloc(VALUE klass) +{ + return bad_version_alloc(klass, &version_zero_type); +} + +static void +init_version_zero(VALUE mod, VALUE base) +{ + VALUE c = rb_define_class_under(mod, "VersionZero", base); + rb_define_alloc_func(c, version_zero_alloc); + RB_RANDOM_DATA_INIT_PARENT(version_zero_type); +} + +#if DEFINE_VERSION_MAX +/* version max */ +static const rb_random_interface_t random_version_max_if; +static rb_random_data_type_t version_max_type = { + "random/version_max", + { + rb_random_mark, + RUBY_TYPED_DEFAULT_FREE, + }, + RB_RANDOM_PARENT, + (void *)&random_version_max_if, + RUBY_TYPED_FREE_IMMEDIATELY +}; + +static VALUE +version_max_alloc(VALUE klass) +{ + return bad_version_alloc(klass, &version_max_type); +} + +static void +init_version_max(VALUE mod, VALUE base) +{ + VALUE c = rb_define_class_under(mod, "VersionMax", base); + rb_define_alloc_func(c, version_max_alloc); + RB_RANDOM_DATA_INIT_PARENT(version_max_type); +} +#else +static void +init_version_max(mod, base) +{ +} +#endif + +void +Init_random_bad_version(VALUE mod, VALUE base) +{ + init_version_zero(mod, base); + init_version_max(mod, base); +} + +#undef RUBY_RANDOM_INTERFACE_VERSION_MAJOR + +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR 0 +static const rb_random_interface_t random_version_zero_if = { + 0, + RB_RANDOM_INTERFACE_DEFINE(bad_version) +}; +#undef RUBY_RANDOM_INTERFACE_VERSION_MAJOR + +#if DEFINE_VERSION_MAX +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR RUBY_RANDOM_INTERFACE_VERSION_MAJOR_MAX +static const rb_random_interface_t random_version_max_if = { + 0, + RB_RANDOM_INTERFACE_DEFINE(bad_version) +}; +#undef RUBY_RANDOM_INTERFACE_VERSION_MAJOR +#endif diff --git a/ext/-test-/random/depend b/ext/-test-/random/depend index 602526cf7b76b9..f2cbf7fc144a07 100644 --- a/ext/-test-/random/depend +++ b/ext/-test-/random/depend @@ -1,4 +1,164 @@ # AUTOGENERATED DEPENDENCIES START +bad_version.o: $(RUBY_EXTCONF_H) +bad_version.o: $(arch_hdrdir)/ruby/config.h +bad_version.o: $(hdrdir)/ruby/assert.h +bad_version.o: $(hdrdir)/ruby/backward.h +bad_version.o: $(hdrdir)/ruby/backward/2/assume.h +bad_version.o: $(hdrdir)/ruby/backward/2/attributes.h +bad_version.o: $(hdrdir)/ruby/backward/2/bool.h +bad_version.o: $(hdrdir)/ruby/backward/2/inttypes.h +bad_version.o: $(hdrdir)/ruby/backward/2/limits.h +bad_version.o: $(hdrdir)/ruby/backward/2/long_long.h +bad_version.o: $(hdrdir)/ruby/backward/2/stdalign.h +bad_version.o: $(hdrdir)/ruby/backward/2/stdarg.h +bad_version.o: $(hdrdir)/ruby/defines.h +bad_version.o: $(hdrdir)/ruby/intern.h +bad_version.o: $(hdrdir)/ruby/internal/abi.h +bad_version.o: $(hdrdir)/ruby/internal/anyargs.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/char.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/double.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/int.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/long.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/short.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +bad_version.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +bad_version.o: $(hdrdir)/ruby/internal/assume.h +bad_version.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +bad_version.o: $(hdrdir)/ruby/internal/attr/artificial.h +bad_version.o: $(hdrdir)/ruby/internal/attr/cold.h +bad_version.o: $(hdrdir)/ruby/internal/attr/const.h +bad_version.o: $(hdrdir)/ruby/internal/attr/constexpr.h +bad_version.o: $(hdrdir)/ruby/internal/attr/deprecated.h +bad_version.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +bad_version.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +bad_version.o: $(hdrdir)/ruby/internal/attr/error.h +bad_version.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +bad_version.o: $(hdrdir)/ruby/internal/attr/forceinline.h +bad_version.o: $(hdrdir)/ruby/internal/attr/format.h +bad_version.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +bad_version.o: $(hdrdir)/ruby/internal/attr/noalias.h +bad_version.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +bad_version.o: $(hdrdir)/ruby/internal/attr/noexcept.h +bad_version.o: $(hdrdir)/ruby/internal/attr/noinline.h +bad_version.o: $(hdrdir)/ruby/internal/attr/nonnull.h +bad_version.o: $(hdrdir)/ruby/internal/attr/noreturn.h +bad_version.o: $(hdrdir)/ruby/internal/attr/pure.h +bad_version.o: $(hdrdir)/ruby/internal/attr/restrict.h +bad_version.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +bad_version.o: $(hdrdir)/ruby/internal/attr/warning.h +bad_version.o: $(hdrdir)/ruby/internal/attr/weakref.h +bad_version.o: $(hdrdir)/ruby/internal/cast.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +bad_version.o: $(hdrdir)/ruby/internal/compiler_since.h +bad_version.o: $(hdrdir)/ruby/internal/config.h +bad_version.o: $(hdrdir)/ruby/internal/constant_p.h +bad_version.o: $(hdrdir)/ruby/internal/core.h +bad_version.o: $(hdrdir)/ruby/internal/core/rarray.h +bad_version.o: $(hdrdir)/ruby/internal/core/rbasic.h +bad_version.o: $(hdrdir)/ruby/internal/core/rbignum.h +bad_version.o: $(hdrdir)/ruby/internal/core/rclass.h +bad_version.o: $(hdrdir)/ruby/internal/core/rdata.h +bad_version.o: $(hdrdir)/ruby/internal/core/rfile.h +bad_version.o: $(hdrdir)/ruby/internal/core/rhash.h +bad_version.o: $(hdrdir)/ruby/internal/core/robject.h +bad_version.o: $(hdrdir)/ruby/internal/core/rregexp.h +bad_version.o: $(hdrdir)/ruby/internal/core/rstring.h +bad_version.o: $(hdrdir)/ruby/internal/core/rstruct.h +bad_version.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +bad_version.o: $(hdrdir)/ruby/internal/ctype.h +bad_version.o: $(hdrdir)/ruby/internal/dllexport.h +bad_version.o: $(hdrdir)/ruby/internal/dosish.h +bad_version.o: $(hdrdir)/ruby/internal/error.h +bad_version.o: $(hdrdir)/ruby/internal/eval.h +bad_version.o: $(hdrdir)/ruby/internal/event.h +bad_version.o: $(hdrdir)/ruby/internal/fl_type.h +bad_version.o: $(hdrdir)/ruby/internal/gc.h +bad_version.o: $(hdrdir)/ruby/internal/glob.h +bad_version.o: $(hdrdir)/ruby/internal/globals.h +bad_version.o: $(hdrdir)/ruby/internal/has/attribute.h +bad_version.o: $(hdrdir)/ruby/internal/has/builtin.h +bad_version.o: $(hdrdir)/ruby/internal/has/c_attribute.h +bad_version.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +bad_version.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +bad_version.o: $(hdrdir)/ruby/internal/has/extension.h +bad_version.o: $(hdrdir)/ruby/internal/has/feature.h +bad_version.o: $(hdrdir)/ruby/internal/has/warning.h +bad_version.o: $(hdrdir)/ruby/internal/intern/array.h +bad_version.o: $(hdrdir)/ruby/internal/intern/bignum.h +bad_version.o: $(hdrdir)/ruby/internal/intern/class.h +bad_version.o: $(hdrdir)/ruby/internal/intern/compar.h +bad_version.o: $(hdrdir)/ruby/internal/intern/complex.h +bad_version.o: $(hdrdir)/ruby/internal/intern/cont.h +bad_version.o: $(hdrdir)/ruby/internal/intern/dir.h +bad_version.o: $(hdrdir)/ruby/internal/intern/enum.h +bad_version.o: $(hdrdir)/ruby/internal/intern/enumerator.h +bad_version.o: $(hdrdir)/ruby/internal/intern/error.h +bad_version.o: $(hdrdir)/ruby/internal/intern/eval.h +bad_version.o: $(hdrdir)/ruby/internal/intern/file.h +bad_version.o: $(hdrdir)/ruby/internal/intern/gc.h +bad_version.o: $(hdrdir)/ruby/internal/intern/hash.h +bad_version.o: $(hdrdir)/ruby/internal/intern/io.h +bad_version.o: $(hdrdir)/ruby/internal/intern/load.h +bad_version.o: $(hdrdir)/ruby/internal/intern/marshal.h +bad_version.o: $(hdrdir)/ruby/internal/intern/numeric.h +bad_version.o: $(hdrdir)/ruby/internal/intern/object.h +bad_version.o: $(hdrdir)/ruby/internal/intern/parse.h +bad_version.o: $(hdrdir)/ruby/internal/intern/proc.h +bad_version.o: $(hdrdir)/ruby/internal/intern/process.h +bad_version.o: $(hdrdir)/ruby/internal/intern/random.h +bad_version.o: $(hdrdir)/ruby/internal/intern/range.h +bad_version.o: $(hdrdir)/ruby/internal/intern/rational.h +bad_version.o: $(hdrdir)/ruby/internal/intern/re.h +bad_version.o: $(hdrdir)/ruby/internal/intern/ruby.h +bad_version.o: $(hdrdir)/ruby/internal/intern/select.h +bad_version.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +bad_version.o: $(hdrdir)/ruby/internal/intern/signal.h +bad_version.o: $(hdrdir)/ruby/internal/intern/sprintf.h +bad_version.o: $(hdrdir)/ruby/internal/intern/string.h +bad_version.o: $(hdrdir)/ruby/internal/intern/struct.h +bad_version.o: $(hdrdir)/ruby/internal/intern/thread.h +bad_version.o: $(hdrdir)/ruby/internal/intern/time.h +bad_version.o: $(hdrdir)/ruby/internal/intern/variable.h +bad_version.o: $(hdrdir)/ruby/internal/intern/vm.h +bad_version.o: $(hdrdir)/ruby/internal/interpreter.h +bad_version.o: $(hdrdir)/ruby/internal/iterator.h +bad_version.o: $(hdrdir)/ruby/internal/memory.h +bad_version.o: $(hdrdir)/ruby/internal/method.h +bad_version.o: $(hdrdir)/ruby/internal/module.h +bad_version.o: $(hdrdir)/ruby/internal/newobj.h +bad_version.o: $(hdrdir)/ruby/internal/rgengc.h +bad_version.o: $(hdrdir)/ruby/internal/scan_args.h +bad_version.o: $(hdrdir)/ruby/internal/special_consts.h +bad_version.o: $(hdrdir)/ruby/internal/static_assert.h +bad_version.o: $(hdrdir)/ruby/internal/stdalign.h +bad_version.o: $(hdrdir)/ruby/internal/stdbool.h +bad_version.o: $(hdrdir)/ruby/internal/symbol.h +bad_version.o: $(hdrdir)/ruby/internal/value.h +bad_version.o: $(hdrdir)/ruby/internal/value_type.h +bad_version.o: $(hdrdir)/ruby/internal/variable.h +bad_version.o: $(hdrdir)/ruby/internal/warning_push.h +bad_version.o: $(hdrdir)/ruby/internal/xmalloc.h +bad_version.o: $(hdrdir)/ruby/missing.h +bad_version.o: $(hdrdir)/ruby/random.h +bad_version.o: $(hdrdir)/ruby/ruby.h +bad_version.o: $(hdrdir)/ruby/st.h +bad_version.o: $(hdrdir)/ruby/subst.h +bad_version.o: bad_version.c init.o: $(RUBY_EXTCONF_H) init.o: $(arch_hdrdir)/ruby/config.h init.o: $(hdrdir)/ruby.h diff --git a/include/ruby/random.h b/include/ruby/random.h index e3176dbe6bcc85..989445f2f91a93 100644 --- a/include/ruby/random.h +++ b/include/ruby/random.h @@ -16,6 +16,26 @@ #include "ruby/ruby.h" +/* + * version + * 0: before versioning; deprecated + * 1: added version and flags + */ +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR 1 +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR 0 + +#define RUBY_RANDOM_PASTE_VERSION_SUFFIX(x, y, z) x##_##y##_##z +#define RUBY_RANDOM_WITH_VERSION_SUFFIX(name, major, minor) \ + RUBY_RANDOM_PASTE_VERSION_SUFFIX(name, major, minor) +#define rb_random_data_type \ + RUBY_RANDOM_WITH_VERSION_SUFFIX(rb_random_data_type, \ + RUBY_RANDOM_INTERFACE_VERSION_MAJOR, \ + RUBY_RANDOM_INTERFACE_VERSION_MINOR) +#define RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER \ + {RUBY_RANDOM_INTERFACE_VERSION_MAJOR, RUBY_RANDOM_INTERFACE_VERSION_MINOR} +#define RUBY_RANDOM_INTERFACE_VERSION_MAJOR_MAX 0xff +#define RUBY_RANDOM_INTERFACE_VERSION_MINOR_MAX 0xff + RBIMPL_SYMBOL_EXPORT_BEGIN() /** @@ -84,6 +104,18 @@ typedef struct { /** Number of bits of seed numbers. */ size_t default_seed_bits; + /** + * Major/minor versions of this interface + */ + struct { + uint8_t major, minor; + } version; + + /** + * Reserved flags + */ + uint16_t flags; + /** Initialiser function. */ rb_random_init_func *init; @@ -161,6 +193,7 @@ typedef struct { * ``` */ #define RB_RANDOM_INTERFACE_DEFINE(prefix) \ + RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER, 0, \ prefix##_init, \ prefix##_get_int32, \ prefix##_get_bytes diff --git a/random.c b/random.c index f8879cfb88a836..100a54f4e43500 100644 --- a/random.c +++ b/random.c @@ -400,6 +400,15 @@ random_init(int argc, VALUE *argv, VALUE obj) rb_raise(rb_eTypeError, "undefined random interface: %s", RTYPEDDATA_TYPE(obj)->wrap_struct_name); } + + unsigned int major = rng->version.major; + unsigned int minor = rng->version.minor; + if (major != RUBY_RANDOM_INTERFACE_VERSION_MAJOR) { + rb_raise(rb_eTypeError, "Random interface version " + STRINGIZE(RUBY_RANDOM_INTERFACE_VERSION_MAJOR) "." + STRINGIZE(RUBY_RANDOM_INTERFACE_VERSION_MINOR) " " + "expected: %d.%d", major, minor); + } argc = rb_check_arity(argc, 0, 1); rb_check_frozen(obj); if (argc == 0) { diff --git a/test/-ext-/test_random.rb b/test/-ext-/test_random.rb index 838e5d2f14211e..e5cebcc871a5e8 100644 --- a/test/-ext-/test_random.rb +++ b/test/-ext-/test_random.rb @@ -1,11 +1,13 @@ require 'test/unit' module TestRandomExt + def setup + super + assert_nothing_raised(LoadError) {require '-test-/random'} + end + class TestLoop < Test::Unit::TestCase - def setup - super - assert_nothing_raised(LoadError) {require '-test-/random'} - end + include TestRandomExt def test_bytes rnd = Bug::Random::Loop.new(1) @@ -24,4 +26,20 @@ def test_real assert_equal(1.00, Bug::Random::Loop.new(4<<14).rand) end end + + class TestVersionZero < Test::Unit::TestCase + include TestRandomExt + + def test_bad_version + assert_raise(TypeError) {Bug::Random::VersionZero.new} + end + end + + class TestVersionMax < Test::Unit::TestCase + include TestRandomExt + + def test_bad_version + assert_raise(TypeError) {Bug::Random::VersionMax.new} + end + end end From b7e8876704648cee6866591ac1aca7a54faff742 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 19:36:37 +0900 Subject: [PATCH 072/104] [Bug #19100] Add `init_int32` function to `rb_random_interface_t` Distinguish initialization by single word from initialization by array. --- ext/-test-/random/bad_version.c | 3 +++ ext/-test-/random/loop.c | 1 + include/ruby/random.h | 30 ++++++++++++++++++++++++++---- random.c | 21 +++++++++++++-------- test/ruby/test_rand.rb | 8 ++++++++ 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ext/-test-/random/bad_version.c b/ext/-test-/random/bad_version.c index b6bf2ac5af18a7..dae63a6d190171 100644 --- a/ext/-test-/random/bad_version.c +++ b/ext/-test-/random/bad_version.c @@ -20,6 +20,9 @@ bad_version_init(rb_random_t *rnd, const uint32_t *buf, size_t len) must_not_reach(); } +NORETURN(static void bad_version_init_int32(rb_random_t *, uint32_t)); +RB_RANDOM_DEFINE_INIT_INT32_FUNC(bad_version) + NORETURN(static void bad_version_get_bytes(rb_random_t *, void *, size_t)); static void bad_version_get_bytes(rb_random_t *rnd, void *p, size_t n) diff --git a/ext/-test-/random/loop.c b/ext/-test-/random/loop.c index 05720964031a74..805c8e9122999f 100644 --- a/ext/-test-/random/loop.c +++ b/ext/-test-/random/loop.c @@ -13,6 +13,7 @@ static const rb_random_interface_t random_loop_if = { RB_RANDOM_INTERFACE_DEFINE_WITH_REAL(loop) }; +RB_RANDOM_DEFINE_INIT_INT32_FUNC(loop); static size_t random_loop_memsize(const void *ptr) { diff --git a/include/ruby/random.h b/include/ruby/random.h index 989445f2f91a93..39bdb6f3e3950a 100644 --- a/include/ruby/random.h +++ b/include/ruby/random.h @@ -19,7 +19,7 @@ /* * version * 0: before versioning; deprecated - * 1: added version and flags + * 1: added version, flags and init_32bit function */ #define RUBY_RANDOM_INTERFACE_VERSION_MAJOR 1 #define RUBY_RANDOM_INTERFACE_VERSION_MINOR 0 @@ -66,6 +66,17 @@ RBIMPL_ATTR_NONNULL(()) */ typedef void rb_random_init_func(rb_random_t *rng, const uint32_t *buf, size_t len); +RBIMPL_ATTR_NONNULL(()) +/** + * This is the type of functions called when your random object is initialised. + * Passed data is the seed integer. + * + * @param[out] rng Your random struct to fill in. + * @param[in] data Seed, single word. + * @post `rng` is initialised using the passed seeds. + */ +typedef void rb_random_init_int32_func(rb_random_t *rng, uint32_t data); + RBIMPL_ATTR_NONNULL(()) /** * This is the type of functions called from your object's `#rand` method. @@ -116,9 +127,12 @@ typedef struct { */ uint16_t flags; - /** Initialiser function. */ + /** Function to initialize from uint32_t array. */ rb_random_init_func *init; + /** Function to initialize from single uint32_t. */ + rb_random_init_int32_func *init_int32; + /** Function to obtain a random integer. */ rb_random_get_int32_func *get_int32; @@ -162,11 +176,12 @@ typedef struct { } rb_random_interface_t; /** - * This utility macro defines 3 functions named prefix_init, prefix_get_int32, - * prefix_get_bytes. + * This utility macro defines 4 functions named prefix_init, prefix_init_int32, + * prefix_get_int32, prefix_get_bytes. */ #define RB_RANDOM_INTERFACE_DECLARE(prefix) \ static void prefix##_init(rb_random_t *, const uint32_t *, size_t); \ + static void prefix##_init_int32(rb_random_t *, uint32_t); \ static unsigned int prefix##_get_int32(rb_random_t *); \ static void prefix##_get_bytes(rb_random_t *, void *, size_t) @@ -195,6 +210,7 @@ typedef struct { #define RB_RANDOM_INTERFACE_DEFINE(prefix) \ RUBY_RANDOM_INTERFACE_VERSION_INITIALIZER, 0, \ prefix##_init, \ + prefix##_init_int32, \ prefix##_get_int32, \ prefix##_get_bytes @@ -206,6 +222,12 @@ typedef struct { RB_RANDOM_INTERFACE_DEFINE(prefix), \ prefix##_get_real +#define RB_RANDOM_DEFINE_INIT_INT32_FUNC(prefix) \ + static void prefix##_init_int32(rb_random_t *rnd, uint32_t data) \ + { \ + prefix##_init(rnd, &data, 1); \ + } + #if defined _WIN32 && !defined __CYGWIN__ typedef rb_data_type_t rb_random_data_type_t; # define RB_RANDOM_PARENT 0 diff --git a/random.c b/random.c index 100a54f4e43500..759e15335ae9b6 100644 --- a/random.c +++ b/random.c @@ -371,11 +371,14 @@ rand_init(const rb_random_interface_t *rng, rb_random_t *rnd, VALUE seed) INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER); if (sign < 0) sign = -sign; - if (len > 1) { + if (len <= 1) { + rng->init_int32(rnd, len ? buf[0] : 0); + } + else { if (sign != 2 && buf[len-1] == 1) /* remove leading-zero-guard */ len--; + rng->init(rnd, buf, len); } - rng->init(rnd, buf, len); explicit_bzero(buf, len * sizeof(*buf)); ALLOCV_END(buf0); return seed; @@ -891,16 +894,18 @@ rand_mt_load(VALUE obj, VALUE dump) return obj; } +static void +rand_mt_init_int32(rb_random_t *rnd, uint32_t data) +{ + struct MT *mt = &((rb_random_mt_t *)rnd)->mt; + init_genrand(mt, data); +} + static void rand_mt_init(rb_random_t *rnd, const uint32_t *buf, size_t len) { struct MT *mt = &((rb_random_mt_t *)rnd)->mt; - if (len <= 1) { - init_genrand(mt, len ? buf[0] : 0); - } - else { - init_by_array(mt, buf, (int)len); - } + init_by_array(mt, buf, (int)len); } static unsigned int diff --git a/test/ruby/test_rand.rb b/test/ruby/test_rand.rb index f066433f6a168d..a4beffd689e7b3 100644 --- a/test/ruby/test_rand.rb +++ b/test/ruby/test_rand.rb @@ -336,6 +336,14 @@ def test_seed } end + def test_seed_leading_zero_guard + guard = 1<<32 + range = 0...(1<<32) + all_assertions_foreach(nil, 0, 1, 2) do |i| + assert_not_equal(Random.new(i).rand(range), Random.new(i+guard).rand(range)) + end + end + def test_marshal bug3656 = '[ruby-core:31622]' assert_raise(TypeError, bug3656) { From 68bd1d685579f9ae96f1c8160cf1fd9820e508da Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 10 Nov 2022 09:07:20 -0500 Subject: [PATCH 073/104] Fix compiler issues in test on C99 Fixes the following issue when compiling using C99: ext/-test-/rb_call_super_kw/rb_call_super_kw.c ext/-test-/random/loop.c:16:39: error: extra ';' outside of a function [-Werror,-Wextra-semi] RB_RANDOM_DEFINE_INIT_INT32_FUNC(loop); --- ext/-test-/random/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/-test-/random/loop.c b/ext/-test-/random/loop.c index 805c8e9122999f..b789ab1d0177d4 100644 --- a/ext/-test-/random/loop.c +++ b/ext/-test-/random/loop.c @@ -13,7 +13,7 @@ static const rb_random_interface_t random_loop_if = { RB_RANDOM_INTERFACE_DEFINE_WITH_REAL(loop) }; -RB_RANDOM_DEFINE_INIT_INT32_FUNC(loop); +RB_RANDOM_DEFINE_INIT_INT32_FUNC(loop) static size_t random_loop_memsize(const void *ptr) { From 199b59f065ce6f1c13b8424f35a70c513523211b Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Wed, 9 Nov 2022 17:04:35 -0500 Subject: [PATCH 074/104] Fix bug in array pack with shared strings If string literals are long and they become shared, we need to make them independent before we can write to them. [Bug #19116] --- pack.c | 1 + test/ruby/test_array.rb | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/pack.c b/pack.c index 2817491b77453d..294d7dfa350ff7 100644 --- a/pack.c +++ b/pack.c @@ -217,6 +217,7 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer) else { if (!RB_TYPE_P(buffer, T_STRING)) rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); + rb_str_modify(buffer); res = buffer; } diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb index 20e6ee79178632..f58f8a27789eba 100644 --- a/test/ruby/test_array.rb +++ b/test/ruby/test_array.rb @@ -1294,6 +1294,12 @@ def test_pack =end end + def test_pack_with_buffer + n = [ 65, 66, 67 ] + str = "a" * 100 + assert_equal("aaaABC", n.pack("@3ccc", buffer: str.dup), "[Bug #19116]") + end + def test_pop a = @cls[ 'cat', 'dog' ] assert_equal('dog', a.pop) From 9986697b621e5345177a1c395489dcc9fab8602b Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Tue, 8 Nov 2022 15:35:31 -0500 Subject: [PATCH 075/104] Omit test on IO Buffers which relies on String being embedded There is currently a bug ([#19084]) in how extended strings work in IO Buffers. Object Shapes changes will make the string in this test extended on 32 bit machines. Since this behavior is currently broken (unrelated to object shapes) on 32 bit machines, this test will then fail. We preemptively omit it so that this commit can be reverted once the bug is fixed. --- test/ruby/test_io_buffer.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_io_buffer.rb b/test/ruby/test_io_buffer.rb index 2204c3db09a97e..70070e93c93022 100644 --- a/test/ruby/test_io_buffer.rb +++ b/test/ruby/test_io_buffer.rb @@ -330,6 +330,10 @@ def test_invalidation end def test_read + # This is currently a bug in IO:Buffer [#19084] which affects extended + # strings. On 32 bit machines, the example below becomes extended, so + # we omit this test until the bug is fixed. + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 io = Tempfile.new io.write("Hello World") io.seek(0) @@ -339,7 +343,7 @@ def test_read assert_equal "Hello", buffer.get_string(0, 5) ensure - io.close! + io.close! if io end def test_write From 5246f4027ec574e77809845e1b1f7822cc2a5cef Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Tue, 8 Nov 2022 15:35:31 -0500 Subject: [PATCH 076/104] Transition shape when object's capacity changes This commit adds a `capacity` field to shapes, and adds shape transitions whenever an object's capacity changes. Objects which are allocated out of a bigger size pool will also make a transition from the root shape to the shape with the correct capacity for their size pool when they are allocated. This commit will allow us to remove numiv from objects completely, and will also mean we can guarantee that if two objects share shapes, their IVs are in the same positions (an embedded and extended object cannot share shapes). This will enable us to implement ivar sets in YJIT using object shapes. Co-Authored-By: Aaron Patterson --- class.c | 4 +- common.mk | 10 ++ ext/-test-/rational/depend | 1 + ext/pty/depend | 1 + ext/ripper/depend | 1 + ext/socket/depend | 15 ++ gc.c | 67 +++++++-- gc.h | 2 + gc.rb | 10 -- include/ruby/internal/core/robject.h | 10 ++ inits.c | 1 + internal/class.h | 4 +- internal/gc.h | 5 +- internal/variable.h | 3 +- lib/mjit/compiler.rb | 18 ++- mjit_c.rb | 26 ++++ object.c | 77 +++++++--- shape.c | 203 +++++++++++++++++++++------ shape.h | 28 +++- test/-ext-/string/test_cstr.rb | 6 +- test/objspace/test_objspace.rb | 26 ++-- test/ruby/test_gc_compact.rb | 13 +- test/ruby/test_shapes.rb | 19 +-- variable.c | 61 +++++--- vm.c | 11 -- vm_core.h | 1 - vm_insnhelper.c | 52 +++---- yjit/src/cruby_bindings.inc.rs | 9 +- 28 files changed, 482 insertions(+), 202 deletions(-) diff --git a/class.c b/class.c index d181fb0b2ee635..85663ada50ab79 100644 --- a/class.c +++ b/class.c @@ -197,7 +197,7 @@ class_alloc(VALUE flags, VALUE klass) { size_t alloc_size = sizeof(struct RClass); -#if USE_RVARGC +#if RCLASS_EXT_EMBEDDED alloc_size += sizeof(rb_classext_t); #endif @@ -206,7 +206,7 @@ class_alloc(VALUE flags, VALUE klass) if (RGENGC_WB_PROTECTED_CLASS) flags |= FL_WB_PROTECTED; RVARGC_NEWOBJ_OF(obj, struct RClass, klass, flags, alloc_size); -#if USE_RVARGC +#if RCLASS_EXT_EMBEDDED memset(RCLASS_EXT(obj), 0, sizeof(rb_classext_t)); #else obj->ptr = ZALLOC(rb_classext_t); diff --git a/common.mk b/common.mk index 81f99b14583045..99cda1a98ade45 100644 --- a/common.mk +++ b/common.mk @@ -6038,6 +6038,7 @@ enumerator.$(OBJEXT): {$(VPATH)}missing.h enumerator.$(OBJEXT): {$(VPATH)}onigmo.h enumerator.$(OBJEXT): {$(VPATH)}oniguruma.h enumerator.$(OBJEXT): {$(VPATH)}ruby_assert.h +enumerator.$(OBJEXT): {$(VPATH)}shape.h enumerator.$(OBJEXT): {$(VPATH)}st.h enumerator.$(OBJEXT): {$(VPATH)}subst.h error.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h @@ -9376,6 +9377,7 @@ memory_view.$(OBJEXT): {$(VPATH)}internal/xmalloc.h memory_view.$(OBJEXT): {$(VPATH)}memory_view.c memory_view.$(OBJEXT): {$(VPATH)}memory_view.h memory_view.$(OBJEXT): {$(VPATH)}missing.h +memory_view.$(OBJEXT): {$(VPATH)}shape.h memory_view.$(OBJEXT): {$(VPATH)}st.h memory_view.$(OBJEXT): {$(VPATH)}subst.h memory_view.$(OBJEXT): {$(VPATH)}util.h @@ -10629,6 +10631,7 @@ object.$(OBJEXT): {$(VPATH)}shape.h object.$(OBJEXT): {$(VPATH)}st.h object.$(OBJEXT): {$(VPATH)}subst.h object.$(OBJEXT): {$(VPATH)}util.h +object.$(OBJEXT): {$(VPATH)}variable.h pack.$(OBJEXT): $(hdrdir)/ruby/ruby.h pack.$(OBJEXT): $(top_srcdir)/internal/array.h pack.$(OBJEXT): $(top_srcdir)/internal/bits.h @@ -10810,6 +10813,7 @@ pack.$(OBJEXT): {$(VPATH)}onigmo.h pack.$(OBJEXT): {$(VPATH)}oniguruma.h pack.$(OBJEXT): {$(VPATH)}pack.c pack.$(OBJEXT): {$(VPATH)}pack.rbinc +pack.$(OBJEXT): {$(VPATH)}shape.h pack.$(OBJEXT): {$(VPATH)}st.h pack.$(OBJEXT): {$(VPATH)}subst.h pack.$(OBJEXT): {$(VPATH)}util.h @@ -11022,6 +11026,7 @@ parse.$(OBJEXT): {$(VPATH)}ractor.h parse.$(OBJEXT): {$(VPATH)}regenc.h parse.$(OBJEXT): {$(VPATH)}regex.h parse.$(OBJEXT): {$(VPATH)}ruby_assert.h +parse.$(OBJEXT): {$(VPATH)}shape.h parse.$(OBJEXT): {$(VPATH)}st.h parse.$(OBJEXT): {$(VPATH)}subst.h parse.$(OBJEXT): {$(VPATH)}symbol.h @@ -11853,6 +11858,7 @@ random.$(OBJEXT): {$(VPATH)}ractor.h random.$(OBJEXT): {$(VPATH)}random.c random.$(OBJEXT): {$(VPATH)}random.h random.$(OBJEXT): {$(VPATH)}ruby_atomic.h +random.$(OBJEXT): {$(VPATH)}shape.h random.$(OBJEXT): {$(VPATH)}siphash.c random.$(OBJEXT): {$(VPATH)}siphash.h random.$(OBJEXT): {$(VPATH)}st.h @@ -12045,6 +12051,7 @@ range.$(OBJEXT): {$(VPATH)}missing.h range.$(OBJEXT): {$(VPATH)}onigmo.h range.$(OBJEXT): {$(VPATH)}oniguruma.h range.$(OBJEXT): {$(VPATH)}range.c +range.$(OBJEXT): {$(VPATH)}shape.h range.$(OBJEXT): {$(VPATH)}st.h range.$(OBJEXT): {$(VPATH)}subst.h rational.$(OBJEXT): $(hdrdir)/ruby/ruby.h @@ -14024,6 +14031,7 @@ shape.$(OBJEXT): {$(VPATH)}constant.h shape.$(OBJEXT): {$(VPATH)}debug_counter.h shape.$(OBJEXT): {$(VPATH)}defines.h shape.$(OBJEXT): {$(VPATH)}encoding.h +shape.$(OBJEXT): {$(VPATH)}gc.h shape.$(OBJEXT): {$(VPATH)}id.h shape.$(OBJEXT): {$(VPATH)}id_table.h shape.$(OBJEXT): {$(VPATH)}intern.h @@ -16007,6 +16015,7 @@ time.$(OBJEXT): {$(VPATH)}missing.h time.$(OBJEXT): {$(VPATH)}onigmo.h time.$(OBJEXT): {$(VPATH)}oniguruma.h time.$(OBJEXT): {$(VPATH)}ruby_assert.h +time.$(OBJEXT): {$(VPATH)}shape.h time.$(OBJEXT): {$(VPATH)}st.h time.$(OBJEXT): {$(VPATH)}subst.h time.$(OBJEXT): {$(VPATH)}time.c @@ -16371,6 +16380,7 @@ transient_heap.$(OBJEXT): {$(VPATH)}internal/warning_push.h transient_heap.$(OBJEXT): {$(VPATH)}internal/xmalloc.h transient_heap.$(OBJEXT): {$(VPATH)}missing.h transient_heap.$(OBJEXT): {$(VPATH)}ruby_assert.h +transient_heap.$(OBJEXT): {$(VPATH)}shape.h transient_heap.$(OBJEXT): {$(VPATH)}st.h transient_heap.$(OBJEXT): {$(VPATH)}subst.h transient_heap.$(OBJEXT): {$(VPATH)}transient_heap.c diff --git a/ext/-test-/rational/depend b/ext/-test-/rational/depend index 8729695886a1d4..ce977821b88974 100644 --- a/ext/-test-/rational/depend +++ b/ext/-test-/rational/depend @@ -174,5 +174,6 @@ rat.o: $(top_srcdir)/internal/static_assert.h rat.o: $(top_srcdir)/internal/vm.h rat.o: $(top_srcdir)/internal/warnings.h rat.o: $(top_srcdir)/ruby_assert.h +rat.o: $(top_srcdir)/shape.h rat.o: rat.c # AUTOGENERATED DEPENDENCIES END diff --git a/ext/pty/depend b/ext/pty/depend index c43d3dcf9a7083..f251caae3f14d1 100644 --- a/ext/pty/depend +++ b/ext/pty/depend @@ -181,5 +181,6 @@ pty.o: $(top_srcdir)/internal/process.h pty.o: $(top_srcdir)/internal/signal.h pty.o: $(top_srcdir)/internal/static_assert.h pty.o: $(top_srcdir)/internal/warnings.h +pty.o: $(top_srcdir)/shape.h pty.o: pty.c # AUTOGENERATED DEPENDENCIES END diff --git a/ext/ripper/depend b/ext/ripper/depend index c77e4e1b7a2d57..85520b032e9dcb 100644 --- a/ext/ripper/depend +++ b/ext/ripper/depend @@ -252,6 +252,7 @@ ripper.o: $(top_srcdir)/internal/warnings.h ripper.o: $(top_srcdir)/node.h ripper.o: $(top_srcdir)/regenc.h ripper.o: $(top_srcdir)/ruby_assert.h +ripper.o: $(top_srcdir)/shape.h ripper.o: $(top_srcdir)/symbol.h ripper.o: ../../probes.h ripper.o: eventids2.c diff --git a/ext/socket/depend b/ext/socket/depend index ffe2fce844296b..28c5540cd64f3d 100644 --- a/ext/socket/depend +++ b/ext/socket/depend @@ -197,6 +197,7 @@ ancdata.o: $(top_srcdir)/internal/string.h ancdata.o: $(top_srcdir)/internal/thread.h ancdata.o: $(top_srcdir)/internal/vm.h ancdata.o: $(top_srcdir)/internal/warnings.h +ancdata.o: $(top_srcdir)/shape.h ancdata.o: ancdata.c ancdata.o: constdefs.h ancdata.o: rubysocket.h @@ -388,6 +389,7 @@ basicsocket.o: $(top_srcdir)/internal/string.h basicsocket.o: $(top_srcdir)/internal/thread.h basicsocket.o: $(top_srcdir)/internal/vm.h basicsocket.o: $(top_srcdir)/internal/warnings.h +basicsocket.o: $(top_srcdir)/shape.h basicsocket.o: basicsocket.c basicsocket.o: constdefs.h basicsocket.o: rubysocket.h @@ -579,6 +581,7 @@ constants.o: $(top_srcdir)/internal/string.h constants.o: $(top_srcdir)/internal/thread.h constants.o: $(top_srcdir)/internal/vm.h constants.o: $(top_srcdir)/internal/warnings.h +constants.o: $(top_srcdir)/shape.h constants.o: constants.c constants.o: constdefs.c constants.o: constdefs.h @@ -771,6 +774,7 @@ ifaddr.o: $(top_srcdir)/internal/string.h ifaddr.o: $(top_srcdir)/internal/thread.h ifaddr.o: $(top_srcdir)/internal/vm.h ifaddr.o: $(top_srcdir)/internal/warnings.h +ifaddr.o: $(top_srcdir)/shape.h ifaddr.o: constdefs.h ifaddr.o: ifaddr.c ifaddr.o: rubysocket.h @@ -962,6 +966,7 @@ init.o: $(top_srcdir)/internal/string.h init.o: $(top_srcdir)/internal/thread.h init.o: $(top_srcdir)/internal/vm.h init.o: $(top_srcdir)/internal/warnings.h +init.o: $(top_srcdir)/shape.h init.o: constdefs.h init.o: init.c init.o: rubysocket.h @@ -1153,6 +1158,7 @@ ipsocket.o: $(top_srcdir)/internal/string.h ipsocket.o: $(top_srcdir)/internal/thread.h ipsocket.o: $(top_srcdir)/internal/vm.h ipsocket.o: $(top_srcdir)/internal/warnings.h +ipsocket.o: $(top_srcdir)/shape.h ipsocket.o: constdefs.h ipsocket.o: ipsocket.c ipsocket.o: rubysocket.h @@ -1344,6 +1350,7 @@ option.o: $(top_srcdir)/internal/string.h option.o: $(top_srcdir)/internal/thread.h option.o: $(top_srcdir)/internal/vm.h option.o: $(top_srcdir)/internal/warnings.h +option.o: $(top_srcdir)/shape.h option.o: constdefs.h option.o: option.c option.o: rubysocket.h @@ -1535,6 +1542,7 @@ raddrinfo.o: $(top_srcdir)/internal/string.h raddrinfo.o: $(top_srcdir)/internal/thread.h raddrinfo.o: $(top_srcdir)/internal/vm.h raddrinfo.o: $(top_srcdir)/internal/warnings.h +raddrinfo.o: $(top_srcdir)/shape.h raddrinfo.o: constdefs.h raddrinfo.o: raddrinfo.c raddrinfo.o: rubysocket.h @@ -1726,6 +1734,7 @@ socket.o: $(top_srcdir)/internal/string.h socket.o: $(top_srcdir)/internal/thread.h socket.o: $(top_srcdir)/internal/vm.h socket.o: $(top_srcdir)/internal/warnings.h +socket.o: $(top_srcdir)/shape.h socket.o: constdefs.h socket.o: rubysocket.h socket.o: socket.c @@ -1917,6 +1926,7 @@ sockssocket.o: $(top_srcdir)/internal/string.h sockssocket.o: $(top_srcdir)/internal/thread.h sockssocket.o: $(top_srcdir)/internal/vm.h sockssocket.o: $(top_srcdir)/internal/warnings.h +sockssocket.o: $(top_srcdir)/shape.h sockssocket.o: constdefs.h sockssocket.o: rubysocket.h sockssocket.o: sockport.h @@ -2108,6 +2118,7 @@ tcpserver.o: $(top_srcdir)/internal/string.h tcpserver.o: $(top_srcdir)/internal/thread.h tcpserver.o: $(top_srcdir)/internal/vm.h tcpserver.o: $(top_srcdir)/internal/warnings.h +tcpserver.o: $(top_srcdir)/shape.h tcpserver.o: constdefs.h tcpserver.o: rubysocket.h tcpserver.o: sockport.h @@ -2299,6 +2310,7 @@ tcpsocket.o: $(top_srcdir)/internal/string.h tcpsocket.o: $(top_srcdir)/internal/thread.h tcpsocket.o: $(top_srcdir)/internal/vm.h tcpsocket.o: $(top_srcdir)/internal/warnings.h +tcpsocket.o: $(top_srcdir)/shape.h tcpsocket.o: constdefs.h tcpsocket.o: rubysocket.h tcpsocket.o: sockport.h @@ -2490,6 +2502,7 @@ udpsocket.o: $(top_srcdir)/internal/string.h udpsocket.o: $(top_srcdir)/internal/thread.h udpsocket.o: $(top_srcdir)/internal/vm.h udpsocket.o: $(top_srcdir)/internal/warnings.h +udpsocket.o: $(top_srcdir)/shape.h udpsocket.o: constdefs.h udpsocket.o: rubysocket.h udpsocket.o: sockport.h @@ -2681,6 +2694,7 @@ unixserver.o: $(top_srcdir)/internal/string.h unixserver.o: $(top_srcdir)/internal/thread.h unixserver.o: $(top_srcdir)/internal/vm.h unixserver.o: $(top_srcdir)/internal/warnings.h +unixserver.o: $(top_srcdir)/shape.h unixserver.o: constdefs.h unixserver.o: rubysocket.h unixserver.o: sockport.h @@ -2872,6 +2886,7 @@ unixsocket.o: $(top_srcdir)/internal/string.h unixsocket.o: $(top_srcdir)/internal/thread.h unixsocket.o: $(top_srcdir)/internal/vm.h unixsocket.o: $(top_srcdir)/internal/warnings.h +unixsocket.o: $(top_srcdir)/shape.h unixsocket.o: constdefs.h unixsocket.o: rubysocket.h unixsocket.o: sockport.h diff --git a/gc.c b/gc.c index 4adf86bf77e1af..84f3b8f206665e 100644 --- a/gc.c +++ b/gc.c @@ -138,6 +138,7 @@ #include "ractor_core.h" #include "builtin.h" +#include "shape.h" #define rb_setjmp(env) RUBY_SETJMP(env) #define rb_jmp_buf rb_jmpbuf_t @@ -2593,6 +2594,12 @@ size_pool_slot_size(unsigned char pool_id) return slot_size; } +size_t +rb_size_pool_slot_size(unsigned char pool_id) +{ + return size_pool_slot_size(pool_id); +} + bool rb_gc_size_allocatable_p(size_t size) { @@ -2797,6 +2804,9 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t * } obj = newobj_alloc(objspace, cr, size_pool_idx, true); +#if SHAPE_IN_BASIC_FLAGS + flags |= (VALUE)(size_pool_idx) << SHAPE_FLAG_SHIFT; +#endif newobj_init(klass, flags, wb_protected, objspace, obj); gc_event_hook_prep(objspace, RUBY_INTERNAL_EVENT_NEWOBJ, obj, newobj_fill(obj, 0, 0, 0)); @@ -2848,6 +2858,9 @@ newobj_of0(VALUE klass, VALUE flags, int wb_protected, rb_ractor_t *cr, size_t a gc_event_hook_available_p(objspace)) && wb_protected) { obj = newobj_alloc(objspace, cr, size_pool_idx, false); +#if SHAPE_IN_BASIC_FLAGS + flags |= (VALUE)size_pool_idx << SHAPE_FLAG_SHIFT; +#endif newobj_init(klass, flags, wb_protected, objspace, obj); } else { @@ -2916,10 +2929,10 @@ rb_class_instance_allocate_internal(VALUE klass, VALUE flags, bool wb_protected) GC_ASSERT((flags & RUBY_T_MASK) == T_OBJECT); GC_ASSERT(flags & ROBJECT_EMBED); - uint32_t index_tbl_num_entries = RCLASS_EXT(klass)->max_iv_count; - size_t size; #if USE_RVARGC + uint32_t index_tbl_num_entries = RCLASS_EXT(klass)->max_iv_count; + size = rb_obj_embedded_size(index_tbl_num_entries); if (!rb_gc_size_allocatable_p(size)) { size = sizeof(struct RObject); @@ -2932,7 +2945,7 @@ rb_class_instance_allocate_internal(VALUE klass, VALUE flags, bool wb_protected) #if USE_RVARGC uint32_t capa = (uint32_t)((rb_gc_obj_slot_size(obj) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); - ROBJECT(obj)->numiv = capa; + ROBJECT_SET_NUMIV(obj, capa); #endif #if RUBY_DEBUG @@ -3454,7 +3467,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) xfree(RCLASS_SUPERCLASSES(obj)); } -#if !USE_RVARGC +#if SIZE_POOL_COUNT == 1 if (RCLASS_EXT(obj)) xfree(RCLASS_EXT(obj)); #endif @@ -4869,7 +4882,7 @@ obj_memsize_of(VALUE obj, int use_all_types) if (FL_TEST_RAW(obj, RCLASS_SUPERCLASSES_INCLUDE_SELF)) { size += (RCLASS_SUPERCLASS_DEPTH(obj) + 1) * sizeof(VALUE); } -#if !USE_RVARGC +#if SIZE_POOL_COUNT == 1 size += sizeof(rb_classext_t); #endif } @@ -6054,6 +6067,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size); /* forwarding_object is now our actual object, and "object" * is the free slot for the original page */ + struct heap_page *orig_page = GET_HEAP_PAGE(object); orig_page->free_slots++; heap_page_add_freeobj(objspace, orig_page, object); @@ -8387,6 +8401,7 @@ static rb_size_pool_t * gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src) { size_t obj_size; + size_t idx = 0; switch (BUILTIN_TYPE(src)) { case T_ARRAY: @@ -8406,17 +8421,16 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V } if (rb_gc_size_allocatable_p(obj_size)){ - return &size_pools[size_pool_idx_for_size(obj_size)]; - } - else { - return &size_pools[0]; + idx = size_pool_idx_for_size(obj_size); } + return &size_pools[idx]; } static bool gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src) { GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED); + rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src)); if (gc_compact_heap_cursors_met_p(dheap)) { @@ -10003,9 +10017,10 @@ static void gc_ref_update_object(rb_objspace_t *objspace, VALUE v) { VALUE *ptr = ROBJECT_IVPTR(v); - uint32_t numiv = ROBJECT_NUMIV(v); #if USE_RVARGC + uint32_t numiv = ROBJECT_NUMIV(v); + size_t slot_size = rb_gc_obj_slot_size(v); size_t embed_size = rb_obj_embedded_size(numiv); if (slot_size >= embed_size && !RB_FL_TEST_RAW(v, ROBJECT_EMBED)) { @@ -10019,9 +10034,17 @@ gc_ref_update_object(rb_objspace_t *objspace, VALUE v) xfree(ptr); } ptr = ROBJECT(v)->as.ary; - - uint32_t capa = (uint32_t)((slot_size - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); - ROBJECT(v)->numiv = capa; + size_t size_pool_shape_id = size_pool_idx_for_size(embed_size); + rb_shape_t * initial_shape = rb_shape_get_shape_by_id((shape_id_t)size_pool_shape_id); + rb_shape_t * new_shape = rb_shape_rebuild_shape(initial_shape, rb_shape_get_shape(v)); + rb_shape_set_shape(v, new_shape); + ROBJECT_SET_NUMIV(v, new_shape->capacity); +#if RUBY_DEBUG + if(RB_TYPE_P(v, T_OBJECT) && ROBJECT_IV_CAPACITY(v) != ROBJECT_NUMIV(v)) { + fprintf(stderr, "shape capa: %d, v capa: %d\n", ROBJECT_IV_CAPACITY(v), ROBJECT_NUMIV(v)); + } +#endif + RUBY_ASSERT(!RB_TYPE_P(v, T_OBJECT) || ROBJECT_IV_CAPACITY(v) == ROBJECT_NUMIV(v)); } #endif @@ -14293,6 +14316,22 @@ rb_gcdebug_remove_stress_to_class(int argc, VALUE *argv, VALUE self) */ #include "gc.rbinc" +/* + * call-seq: + * GC.using_rvargc? -> true or false + * + * Returns true if using experimental feature Variable Width Allocation, false + * otherwise. + */ +static VALUE +gc_using_rvargc_p(VALUE mod) +{ +#if USE_RVARGC + return Qtrue; +#else + return Qfalse; +#endif +} void Init_GC(void) @@ -14371,6 +14410,8 @@ Init_GC(void) rb_define_singleton_method(rb_mGC, "malloc_allocations", gc_malloc_allocations, 0); #endif + rb_define_singleton_method(rb_mGC, "using_rvargc?", gc_using_rvargc_p, 0); + if (GC_COMPACTION_SUPPORTED) { rb_define_singleton_method(rb_mGC, "compact", gc_compact, 0); rb_define_singleton_method(rb_mGC, "auto_compact", gc_get_auto_compact, 0); diff --git a/gc.h b/gc.h index e1ce802095b98c..b2356444c62fa7 100644 --- a/gc.h +++ b/gc.h @@ -120,6 +120,8 @@ VALUE rb_gc_disable_no_rest(void); struct rb_thread_struct; +size_t rb_size_pool_slot_size(unsigned char pool_id); + RUBY_SYMBOL_EXPORT_BEGIN /* exports for objspace module */ diff --git a/gc.rb b/gc.rb index 9144a9660362e5..8af36e0cb8fa65 100644 --- a/gc.rb +++ b/gc.rb @@ -252,16 +252,6 @@ def self.verify_compaction_references(toward: nil, double_heap: false, expand_he end end - # call-seq: - # GC.using_rvargc? -> true or false - # - # Returns true if using experimental feature Variable Width Allocation, false - # otherwise. - def self.using_rvargc? # :nodoc: - GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] > 1 - end - - # call-seq: # GC.measure_total_time = true/false # diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h index bec0b45fd4e566..e0514d7dd22ffb 100644 --- a/include/ruby/internal/core/robject.h +++ b/include/ruby/internal/core/robject.h @@ -192,6 +192,16 @@ ROBJECT_NUMIV(VALUE obj) #endif } +static inline void +ROBJECT_SET_NUMIV(VALUE obj, uint32_t capacity) +{ +#if USE_RVARGC + ROBJECT(obj)->numiv = capacity; +#else + ROBJECT(obj)->as.heap.numiv = capacity; +#endif +} + RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** diff --git a/inits.c b/inits.c index d1204c1324f73b..3c2b11c851df43 100644 --- a/inits.c +++ b/inits.c @@ -20,6 +20,7 @@ static void Init_builtin_prelude(void); void rb_call_inits(void) { + CALL(default_shapes); CALL(Thread_Mutex); #if USE_TRANSIENT_HEAP CALL(TransientHeap); diff --git a/internal/class.h b/internal/class.h index 784d508e20d194..80807256345572 100644 --- a/internal/class.h +++ b/internal/class.h @@ -62,7 +62,7 @@ struct RClass { struct RBasic basic; VALUE super; struct rb_id_table *m_tbl; -#if !USE_RVARGC +#if SIZE_POOL_COUNT == 1 struct rb_classext_struct *ptr; #endif }; @@ -70,7 +70,7 @@ struct RClass { typedef struct rb_subclass_entry rb_subclass_entry_t; typedef struct rb_classext_struct rb_classext_t; -#if USE_RVARGC +#if RCLASS_EXT_EMBEDDED # define RCLASS_EXT(c) ((rb_classext_t *)((char *)(c) + sizeof(struct RClass))) #else # define RCLASS_EXT(c) (RCLASS(c)->ptr) diff --git a/internal/gc.h b/internal/gc.h index 84b7f9fa3e9112..5b2b9e8f706ee4 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -14,6 +14,7 @@ #include "internal/compilers.h" /* for __has_attribute */ #include "ruby/ruby.h" /* for rb_event_flag_t */ +#include "shape.h" struct rb_execution_context_struct; /* in vm_core.h */ struct rb_objspace; /* in vm_core.h */ @@ -67,12 +68,14 @@ struct rb_objspace; /* in vm_core.h */ rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \ (VALUE)(b), __FILE__, __LINE__) -#if USE_RVARGC +#if USE_RVARGC && SHAPE_IN_BASIC_FLAGS # define SIZE_POOL_COUNT 5 #else # define SIZE_POOL_COUNT 1 #endif +#define RCLASS_EXT_EMBEDDED (SIZE_POOL_COUNT > 1) + typedef struct ractor_newobj_size_pool_cache { struct RVALUE *freelist; struct heap_page *using_page; diff --git a/internal/variable.h b/internal/variable.h index 734884a5f6baa1..553e87c4a8b325 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -13,6 +13,7 @@ #include "constant.h" /* for rb_const_entry_t */ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/ruby.h" /* for VALUE */ +#include "shape.h" /* for rb_shape_t */ /* global variable */ @@ -53,7 +54,7 @@ VALUE rb_gvar_get(ID); VALUE rb_gvar_set(ID, VALUE); VALUE rb_gvar_defined(ID); void rb_const_warn_if_deprecated(const rb_const_entry_t *, VALUE, ID); -void rb_init_iv_list(VALUE obj); +rb_shape_t * rb_grow_iv_list(VALUE obj); void rb_ensure_iv_list_size(VALUE obj, uint32_t len, uint32_t newsize); struct gen_ivtbl * rb_ensure_generic_iv_list_size(VALUE obj, uint32_t newsize); MJIT_SYMBOL_EXPORT_END diff --git a/lib/mjit/compiler.rb b/lib/mjit/compiler.rb index 55fcee6b877fcf..575ae6f84c518c 100644 --- a/lib/mjit/compiler.rb +++ b/lib/mjit/compiler.rb @@ -353,10 +353,20 @@ def compile_ivar(insn_name, stack_size, pos, status, operands, body) ic_copy = (status.is_entries + (C.iseq_inline_storage_entry.new(operands[1]) - body.is_entries)).iv_cache dest_shape_id = ic_copy.value >> C.SHAPE_FLAG_SHIFT attr_index = ic_copy.value & ((1 << C.SHAPE_FLAG_SHIFT) - 1) + + capa = nil source_shape_id = if dest_shape_id == C.INVALID_SHAPE_ID dest_shape_id else - C.rb_shape_get_shape_by_id(dest_shape_id).parent_id + parent_id = C.rb_shape_get_shape_by_id(dest_shape_id).parent_id + parent = C.rb_shape_get_shape_by_id(parent_id) + + if parent.type == C.SHAPE_CAPACITY_CHANGE + capa = parent.capacity + parent.parent_id + else + parent_id + end end src = +'' @@ -374,9 +384,9 @@ def compile_ivar(insn_name, stack_size, pos, status, operands, body) src << " const shape_id_t dest_shape_id = (shape_id_t)#{dest_shape_id};\n" src << " if (source_shape_id == ROBJECT_SHAPE_ID(obj) && \n" src << " dest_shape_id != ROBJECT_SHAPE_ID(obj)) {\n" - src << " if (UNLIKELY(index >= ROBJECT_NUMIV(obj))) {\n" - src << " rb_init_iv_list(obj);\n" - src << " }\n" + # Conditionally generate a capacity change if there is one + # between the destination and the parent IV set + src << " rb_ensure_iv_list_size(obj, RBOJECT_NUMIV(obj), #{capa});\n" if capa src << " ROBJECT_SET_SHAPE_ID(obj, dest_shape_id);\n" src << " VALUE *ptr = ROBJECT_IVPTR(obj);\n" src << " RB_OBJ_WRITE(obj, &ptr[index], stack[#{stack_size - 1}]);\n" diff --git a/mjit_c.rb b/mjit_c.rb index 7684755b6be454..533c97285dd203 100644 --- a/mjit_c.rb +++ b/mjit_c.rb @@ -13,6 +13,30 @@ def SHAPE_FLAG_SHIFT Primitive.cexpr! 'UINT2NUM(SHAPE_FLAG_SHIFT)' end + def SHAPE_ROOT + Primitive.cexpr! 'UINT2NUM(SHAPE_ROOT)' + end + + def SHAPE_IVAR + Primitive.cexpr! 'UINT2NUM(SHAPE_IVAR)' + end + + def SHAPE_FROZEN + Primitive.cexpr! 'UINT2NUM(SHAPE_FROZEN)' + end + + def SHAPE_CAPACITY_CHANGE + Primitive.cexpr! 'UINT2NUM(SHAPE_CAPACITY_CHANGE)' + end + + def SHAPE_IVAR_UNDEF + Primitive.cexpr! 'UINT2NUM(SHAPE_IVAR_UNDEF)' + end + + def SHAPE_INITIAL_CAPACITY + Primitive.cexpr! 'UINT2NUM(SHAPE_INITIAL_CAPACITY)' + end + def ROBJECT_EMBED_LEN_MAX Primitive.cexpr! 'INT2NUM(RBIMPL_EMBED_LEN_MAX_OF(VALUE))' end @@ -598,7 +622,9 @@ def C.rb_shape edges: [CType::Pointer.new { self.rb_id_table }, Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), edges)")], edge_name: [self.ID, Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), edge_name)")], next_iv_index: [self.attr_index_t, Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), next_iv_index)")], + capacity: [CType::Immediate.parse("uint32_t"), Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), capacity)")], type: [CType::Immediate.parse("uint8_t"), Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), type)")], + size_pool_index: [CType::Immediate.parse("uint8_t"), Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), size_pool_index)")], parent_id: [self.shape_id_t, Primitive.cexpr!("OFFSETOF((*((struct rb_shape *)NULL)), parent_id)")], ) end diff --git a/object.c b/object.c index aa337ea2cebd9f..9a06500b6bb6ca 100644 --- a/object.c +++ b/object.c @@ -33,6 +33,7 @@ #include "internal/string.h" #include "internal/symbol.h" #include "internal/variable.h" +#include "variable.h" #include "probes.h" #include "ruby/encoding.h" #include "ruby/st.h" @@ -268,21 +269,64 @@ rb_obj_singleton_class(VALUE obj) MJIT_FUNC_EXPORTED void rb_obj_copy_ivar(VALUE dest, VALUE obj) { - uint32_t dest_len = ROBJECT_NUMIV(dest); - uint32_t src_len = ROBJECT_NUMIV(obj); + RUBY_ASSERT(!RB_TYPE_P(obj, T_CLASS) && !RB_TYPE_P(obj, T_MODULE)); - if (dest_len < src_len) { - rb_ensure_iv_list_size(dest, dest_len, src_len); - RUBY_ASSERT(!(RBASIC(dest)->flags & ROBJECT_EMBED)); + RUBY_ASSERT(BUILTIN_TYPE(dest) == BUILTIN_TYPE(obj)); + uint32_t src_num_ivs = RBASIC_IV_COUNT(obj); + rb_shape_t * src_shape = rb_shape_get_shape(obj); + rb_shape_t * shape_to_set_on_dest = src_shape; + VALUE * src_buf; + VALUE * dest_buf; + + if (!src_num_ivs) { + return; } - else { - RUBY_ASSERT((RBASIC(dest)->flags & ROBJECT_EMBED)); + + // The copy should be mutable, so we don't want the frozen shape + if (rb_shape_frozen_shape_p(src_shape)) { + shape_to_set_on_dest = rb_shape_get_shape_by_id(src_shape->parent_id); + } + + src_buf = ROBJECT_IVPTR(obj); + dest_buf = ROBJECT_IVPTR(dest); + + rb_shape_t * initial_shape = rb_shape_get_shape(dest); + + if (initial_shape->size_pool_index != src_shape->size_pool_index) { + RUBY_ASSERT(initial_shape->parent_id == ROOT_SHAPE_ID || initial_shape->type == SHAPE_ROOT); + + shape_to_set_on_dest = rb_shape_rebuild_shape(initial_shape, src_shape); } - VALUE * dest_buf = ROBJECT_IVPTR(dest); - VALUE * src_buf = ROBJECT_IVPTR(obj); + RUBY_ASSERT(src_num_ivs <= shape_to_set_on_dest->capacity); + if (initial_shape->capacity < shape_to_set_on_dest->capacity) { + rb_ensure_iv_list_size(dest, initial_shape->capacity, shape_to_set_on_dest->capacity); + dest_buf = ROBJECT_IVPTR(dest); + + rb_shape_t * initial_shape = rb_shape_get_shape(dest); + + if (initial_shape->size_pool_index != src_shape->size_pool_index) { + RUBY_ASSERT(initial_shape->parent_id == ROOT_SHAPE_ID || initial_shape->type == SHAPE_ROOT); + + shape_to_set_on_dest = rb_shape_rebuild_shape(initial_shape, src_shape); + } - MEMCPY(dest_buf, src_buf, VALUE, ROBJECT_IV_COUNT(obj)); + RUBY_ASSERT(src_num_ivs <= shape_to_set_on_dest->capacity); + if (initial_shape->capacity < shape_to_set_on_dest->capacity) { + rb_ensure_iv_list_size(dest, initial_shape->capacity, shape_to_set_on_dest->capacity); + dest_buf = ROBJECT_IVPTR(dest); + } + } + + MEMCPY(dest_buf, src_buf, VALUE, src_num_ivs); + + // Fire write barriers + for (uint32_t i = 0; i < src_num_ivs; i++) { + RB_OBJ_WRITTEN(dest, Qundef, dest_buf[i]); + } + + rb_shape_set_shape(dest, shape_to_set_on_dest); + RUBY_ASSERT(!RB_TYPE_P(obj, T_OBJECT) || ROBJECT_IV_CAPACITY(dest) == ROBJECT_NUMIV(dest)); } static void @@ -301,19 +345,6 @@ init_copy(VALUE dest, VALUE obj) if (RB_TYPE_P(obj, T_OBJECT)) { rb_obj_copy_ivar(dest, obj); } - - if (!RB_TYPE_P(obj, T_CLASS) && !RB_TYPE_P(obj, T_MODULE)) { - rb_shape_t *shape_to_set = rb_shape_get_shape(obj); - - // If the object is frozen, the "dup"'d object will *not* be frozen, - // so we need to copy the frozen shape's parent to the new object. - if (rb_shape_frozen_shape_p(shape_to_set)) { - shape_to_set = rb_shape_get_shape_by_id(shape_to_set->parent_id); - } - - // shape ids are different - rb_shape_set_shape(dest, shape_to_set); - } } static VALUE immutable_obj_clone(VALUE obj, VALUE kwfreeze); diff --git a/shape.c b/shape.c index 1de89d3f8f2b42..e19667ae2c17b3 100644 --- a/shape.c +++ b/shape.c @@ -1,15 +1,19 @@ #include "vm_core.h" #include "vm_sync.h" #include "shape.h" +#include "gc.h" #include "internal/class.h" #include "internal/symbol.h" #include "internal/variable.h" #include +static ID id_frozen; +static ID size_pool_edge_names[SIZE_POOL_COUNT]; + /* * Shape getters */ -static rb_shape_t* +rb_shape_t * rb_shape_get_root_shape(void) { return GET_VM()->root_shape; @@ -21,12 +25,6 @@ rb_shape_id(rb_shape_t * shape) return (shape_id_t)(shape - GET_VM()->shape_list); } -static rb_shape_t* -rb_shape_get_frozen_root_shape(void) -{ - return GET_VM()->frozen_root_shape; -} - bool rb_shape_root_shape_p(rb_shape_t* shape) { @@ -68,7 +66,7 @@ shape_id_t rb_shape_get_shape_id(VALUE obj) { if (RB_SPECIAL_CONST_P(obj)) { - return FROZEN_ROOT_SHAPE_ID; + return SPECIAL_CONST_SHAPE_ID; } #if SHAPE_IN_BASIC_FLAGS @@ -113,12 +111,9 @@ rb_shape_lookup_id(rb_shape_t* shape, ID id, enum shape_type shape_type) } static rb_shape_t* -get_next_shape_internal(rb_shape_t* shape, ID id, VALUE obj, enum shape_type shape_type) +get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type) { rb_shape_t *res = NULL; - - RUBY_ASSERT(SHAPE_FROZEN != (enum shape_type)shape->type || RB_TYPE_P(obj, T_MODULE) || RB_TYPE_P(obj, T_CLASS)); - RB_VM_LOCK_ENTER(); { if (rb_shape_lookup_id(shape, id, shape_type)) { @@ -142,23 +137,18 @@ get_next_shape_internal(rb_shape_t* shape, ID id, VALUE obj, enum shape_type sha rb_shape_t * new_shape = rb_shape_alloc(id, shape); new_shape->type = (uint8_t)shape_type; + new_shape->capacity = shape->capacity; switch (shape_type) { case SHAPE_IVAR: - new_shape->next_iv_index = rb_shape_get_shape_by_id(new_shape->parent_id)->next_iv_index + 1; - - // Check if we should update next_iv_index on the object's class - if (BUILTIN_TYPE(obj) == T_OBJECT) { - VALUE klass = rb_obj_class(obj); - if (new_shape->next_iv_index > RCLASS_EXT(klass)->max_iv_count) { - RCLASS_EXT(klass)->max_iv_count = new_shape->next_iv_index; - } - } + new_shape->next_iv_index = shape->next_iv_index + 1; break; + case SHAPE_CAPACITY_CHANGE: case SHAPE_IVAR_UNDEF: case SHAPE_FROZEN: - new_shape->next_iv_index = rb_shape_get_shape_by_id(new_shape->parent_id)->next_iv_index; + new_shape->next_iv_index = shape->next_iv_index; break; + case SHAPE_INITIAL_CAPACITY: case SHAPE_ROOT: rb_bug("Unreachable"); break; @@ -183,7 +173,7 @@ rb_shape_frozen_shape_p(rb_shape_t* shape) void rb_shape_transition_shape_remove_ivar(VALUE obj, ID id, rb_shape_t *shape) { - rb_shape_t* next_shape = get_next_shape_internal(shape, id, obj, SHAPE_IVAR_UNDEF); + rb_shape_t * next_shape = get_next_shape_internal(shape, id, SHAPE_IVAR_UNDEF); if (shape == next_shape) { return; @@ -206,16 +196,11 @@ rb_shape_transition_shape_frozen(VALUE obj) rb_shape_t* next_shape; if (shape == rb_shape_get_root_shape()) { - next_shape = rb_shape_get_frozen_root_shape(); + rb_shape_set_shape_id(obj, SPECIAL_CONST_SHAPE_ID); + return; } - else { - static ID id_frozen; - if (!id_frozen) { - id_frozen = rb_make_internal_id(); - } - next_shape = get_next_shape_internal(shape, (ID)id_frozen, obj, SHAPE_FROZEN); - } + next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN); RUBY_ASSERT(next_shape); rb_shape_set_shape(obj, next_shape); @@ -231,10 +216,39 @@ rb_shape_transition_shape(VALUE obj, ID id, rb_shape_t *shape) rb_shape_set_shape(obj, next_shape); } -rb_shape_t* +/* + * This function is used for assertions where we don't want to increment + * max_iv_count + */ +rb_shape_t * +rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id) +{ + return get_next_shape_internal(shape, id, SHAPE_IVAR); +} + +rb_shape_t * rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id) { - return get_next_shape_internal(shape, id, obj, SHAPE_IVAR); + rb_shape_t * new_shape = rb_shape_get_next_iv_shape(shape, id); + + // Check if we should update max_iv_count on the object's class + if (BUILTIN_TYPE(obj) == T_OBJECT) { + VALUE klass = rb_obj_class(obj); + if (new_shape->next_iv_index > RCLASS_EXT(klass)->max_iv_count) { + RCLASS_EXT(klass)->max_iv_count = new_shape->next_iv_index; + } + } + + return new_shape; +} + +rb_shape_t * +rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity) +{ + ID edge_name = rb_make_temporary_id(new_capacity); + rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE); + new_shape->capacity = new_capacity; + return new_shape; } bool @@ -250,11 +264,13 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) RUBY_ASSERT(shape->next_iv_index > 0); *value = shape->next_iv_index - 1; return true; + case SHAPE_CAPACITY_CHANGE: case SHAPE_IVAR_UNDEF: case SHAPE_ROOT: + case SHAPE_INITIAL_CAPACITY: return false; case SHAPE_FROZEN: - rb_bug("Ivar should not exist on frozen transition\n"); + rb_bug("Ivar should not exist on transition\n"); } } shape = rb_shape_get_shape_by_id(shape->parent_id); @@ -289,10 +305,19 @@ rb_shape_alloc_with_parent_id(ID edge_name, shape_id_t parent_id) return shape; } +rb_shape_t * +rb_shape_alloc_with_size_pool_index(ID edge_name, rb_shape_t * parent, uint8_t size_pool_index) +{ + rb_shape_t * shape = rb_shape_alloc_with_parent_id(edge_name, rb_shape_id(parent)); + shape->size_pool_index = size_pool_index; + return shape; +} + + rb_shape_t * rb_shape_alloc(ID edge_name, rb_shape_t * parent) { - return rb_shape_alloc_with_parent_id(edge_name, rb_shape_id(parent)); + return rb_shape_alloc_with_size_pool_index(edge_name, parent, parent->size_pool_index); } MJIT_FUNC_EXPORTED void @@ -307,6 +332,39 @@ rb_shape_flags_mask(void) return SHAPE_FLAG_MASK; } +rb_shape_t * +rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape) +{ + rb_shape_t * midway_shape; + + if (dest_shape->type != SHAPE_ROOT) { + midway_shape = rb_shape_rebuild_shape(initial_shape, rb_shape_get_shape_by_id(dest_shape->parent_id)); + } + else { + midway_shape = initial_shape; + } + + switch (dest_shape->type) { + case SHAPE_IVAR: + if (midway_shape->capacity < midway_shape->next_iv_index) { + // There isn't enough room to write this IV, so we need to increase the capacity + midway_shape = rb_shape_transition_shape_capa(midway_shape, midway_shape->capacity * 2); + } + + midway_shape = rb_shape_get_next_iv_shape(midway_shape, dest_shape->edge_name); + break; + case SHAPE_IVAR_UNDEF: + midway_shape = get_next_shape_internal(midway_shape, dest_shape->edge_name, SHAPE_IVAR_UNDEF); + break; + case SHAPE_ROOT: + case SHAPE_FROZEN: + case SHAPE_CAPACITY_CHANGE: + break; + } + + return midway_shape; +} + #if VM_CHECK_MODE > 0 VALUE rb_cShape; @@ -335,6 +393,14 @@ rb_shape_type(VALUE self) return INT2NUM(shape->type); } +static VALUE +rb_shape_capacity(VALUE self) +{ + rb_shape_t * shape; + TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); + return INT2NUM(shape->capacity); +} + static VALUE rb_shape_parent_id(VALUE self) { @@ -398,11 +464,16 @@ rb_shape_edge_name(VALUE self) rb_shape_t* shape; TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); - if (shape->edge_name) { - return ID2SYM(shape->edge_name); + if ((shape->edge_name & (ID_INTERNAL)) == ID_INTERNAL) { + return INT2NUM(shape->capacity); } else { - return Qnil; + if (shape->edge_name) { + return ID2SYM(shape->edge_name); + } + else { + return Qnil; + } } } @@ -415,6 +486,15 @@ rb_shape_next_iv_index(VALUE self) return INT2NUM(shape->next_iv_index); } +static VALUE +rb_shape_size_pool_index(VALUE self) +{ + rb_shape_t * shape; + TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); + + return INT2NUM(shape->size_pool_index); +} + static VALUE rb_shape_export_depth(VALUE self) { @@ -454,12 +534,6 @@ rb_shape_root_shape(VALUE self) return rb_shape_t_to_rb_cShape(rb_shape_get_root_shape()); } -static VALUE -rb_shape_frozen_root_shape(VALUE self) -{ - return rb_shape_t_to_rb_cShape(rb_shape_get_frozen_root_shape()); -} - VALUE rb_obj_shape(rb_shape_t* shape); static enum rb_id_table_iterator_result collect_keys_and_values(ID key, VALUE value, void *ref) @@ -518,6 +592,43 @@ rb_shape_find_by_id(VALUE mod, VALUE id) } #endif +void +Init_default_shapes(void) +{ + id_frozen = rb_make_internal_id(); + + // Shapes by size pool + for (int i = 0; i < SIZE_POOL_COUNT; i++) { + size_pool_edge_names[i] = rb_make_internal_id(); + } + + // Root shape + rb_shape_t * root = rb_shape_alloc_with_parent_id(0, INVALID_SHAPE_ID); + root->capacity = (uint32_t)((rb_size_pool_slot_size(0) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); + root->type = SHAPE_ROOT; + root->size_pool_index = 0; + GET_VM()->root_shape = root; + RUBY_ASSERT(rb_shape_id(GET_VM()->root_shape) == ROOT_SHAPE_ID); + + // Shapes by size pool + for (int i = 1; i < SIZE_POOL_COUNT; i++) { + uint32_t capa = (uint32_t)((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); + rb_shape_t * new_shape = rb_shape_transition_shape_capa(root, capa); + new_shape->type = SHAPE_INITIAL_CAPACITY; + new_shape->size_pool_index = i; + RUBY_ASSERT(rb_shape_id(new_shape) == (shape_id_t)i); + } + + // Special const shape +#if RUBY_DEBUG + rb_shape_t * special_const_shape = +#endif + get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN); + RUBY_ASSERT(rb_shape_id(special_const_shape) == SPECIAL_CONST_SHAPE_ID); + RUBY_ASSERT(SPECIAL_CONST_SHAPE_ID == (GET_VM()->next_shape_id - 1)); + RUBY_ASSERT(rb_shape_frozen_shape_p(special_const_shape)); +} + void Init_shape(void) { @@ -530,21 +641,23 @@ Init_shape(void) rb_define_method(rb_cShape, "edges", rb_shape_edges, 0); rb_define_method(rb_cShape, "edge_name", rb_shape_edge_name, 0); rb_define_method(rb_cShape, "next_iv_index", rb_shape_next_iv_index, 0); + rb_define_method(rb_cShape, "size_pool_index", rb_shape_size_pool_index, 0); rb_define_method(rb_cShape, "depth", rb_shape_export_depth, 0); rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0); rb_define_method(rb_cShape, "type", rb_shape_type, 0); + rb_define_method(rb_cShape, "capacity", rb_shape_capacity, 0); rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT)); rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR)); rb_define_const(rb_cShape, "SHAPE_IVAR_UNDEF", INT2NUM(SHAPE_IVAR_UNDEF)); rb_define_const(rb_cShape, "SHAPE_FROZEN", INT2NUM(SHAPE_FROZEN)); rb_define_const(rb_cShape, "SHAPE_BITS", INT2NUM(SHAPE_BITS)); rb_define_const(rb_cShape, "SHAPE_FLAG_SHIFT", INT2NUM(SHAPE_FLAG_SHIFT)); + rb_define_const(rb_cShape, "SPECIAL_CONST_SHAPE_ID", INT2NUM(SPECIAL_CONST_SHAPE_ID)); rb_define_singleton_method(rb_cShape, "transition_tree", shape_transition_tree, 0); rb_define_singleton_method(rb_cShape, "find_by_id", rb_shape_find_by_id, 1); rb_define_singleton_method(rb_cShape, "next_shape_id", next_shape_id, 0); rb_define_singleton_method(rb_cShape, "of", rb_shape_debug_shape, 1); rb_define_singleton_method(rb_cShape, "root_shape", rb_shape_root_shape, 0); - rb_define_singleton_method(rb_cShape, "frozen_root_shape", rb_shape_frozen_root_shape, 0); #endif } diff --git a/shape.h b/shape.h index 8e1bf46ec9cd53..a7450cdeea9ab7 100644 --- a/shape.h +++ b/shape.h @@ -40,13 +40,17 @@ typedef uint16_t shape_id_t; # define MAX_SHAPE_ID (SHAPE_MASK - 1) # define INVALID_SHAPE_ID SHAPE_MASK # define ROOT_SHAPE_ID 0x0 -# define FROZEN_ROOT_SHAPE_ID 0x1 +// We use SIZE_POOL_COUNT number of shape IDs for transitions out of different size pools +// The next available shapd ID will be the SPECIAL_CONST_SHAPE_ID +# define SPECIAL_CONST_SHAPE_ID SIZE_POOL_COUNT struct rb_shape { struct rb_id_table * edges; // id_table from ID (ivar) to next shape ID edge_name; // ID (ivar) for transition from parent to rb_shape attr_index_t next_iv_index; + uint32_t capacity; // Total capacity of the object with this shape uint8_t type; + uint8_t size_pool_index; shape_id_t parent_id; }; @@ -56,7 +60,9 @@ enum shape_type { SHAPE_ROOT, SHAPE_IVAR, SHAPE_FROZEN, + SHAPE_CAPACITY_CHANGE, SHAPE_IVAR_UNDEF, + SHAPE_INITIAL_CAPACITY, }; #if SHAPE_IN_BASIC_FLAGS @@ -124,6 +130,7 @@ static inline shape_id_t RCLASS_SHAPE_ID(VALUE obj) { #endif bool rb_shape_root_shape_p(rb_shape_t* shape); +rb_shape_t * rb_shape_get_root_shape(void); rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id); @@ -135,21 +142,37 @@ rb_shape_t* rb_shape_get_shape(VALUE obj); int rb_shape_frozen_shape_p(rb_shape_t* shape); void rb_shape_transition_shape_frozen(VALUE obj); void rb_shape_transition_shape_remove_ivar(VALUE obj, ID id, rb_shape_t *shape); +rb_shape_t * rb_shape_transition_shape_capa(rb_shape_t * shape, uint32_t new_capacity); void rb_shape_transition_shape(VALUE obj, ID id, rb_shape_t *shape); +rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t * shape, ID id); rb_shape_t* rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id); bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t * value); shape_id_t rb_shape_id(rb_shape_t * shape); MJIT_SYMBOL_EXPORT_END +rb_shape_t * rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape); + +static inline uint32_t +ROBJECT_IV_CAPACITY(VALUE obj) +{ + RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); + return rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->capacity; +} + static inline uint32_t ROBJECT_IV_COUNT(VALUE obj) { RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); uint32_t ivc = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->next_iv_index; - RUBY_ASSERT(ivc <= ROBJECT_NUMIV(obj)); return ivc; } +static inline uint32_t +RBASIC_IV_COUNT(VALUE obj) +{ + return rb_shape_get_shape_by_id(rb_shape_get_shape_id(obj))->next_iv_index; +} + static inline uint32_t RCLASS_IV_COUNT(VALUE obj) { @@ -159,6 +182,7 @@ RCLASS_IV_COUNT(VALUE obj) } rb_shape_t * rb_shape_alloc(ID edge_name, rb_shape_t * parent); +rb_shape_t * rb_shape_alloc_with_size_pool_index(ID edge_name, rb_shape_t * parent, uint8_t size_pool_index); rb_shape_t * rb_shape_alloc_with_parent_id(ID edge_name, shape_id_t parent_id); bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id); diff --git a/test/-ext-/string/test_cstr.rb b/test/-ext-/string/test_cstr.rb index d909781700a78f..efc64119dc22ff 100644 --- a/test/-ext-/string/test_cstr.rb +++ b/test/-ext-/string/test_cstr.rb @@ -43,7 +43,11 @@ def test_frozen end def test_rb_str_new_frozen_embed - str = Bug::String.cstr_noembed("rbconfig.rb") + # "rbconfi" is the smallest "maximum embeddable string". VWA adds + # a capacity field, which removes one pointer capacity for embedded objects, + # so if VWA is enabled, but there is only one size pool, then the + # maximum embeddable capacity on 32 bit machines is 8 bytes. + str = Bug::String.cstr_noembed("rbconfi") str = Bug::String.rb_str_new_frozen(str) assert_equal true, Bug::String.cstr_embedded?(str) end diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 5994fadeffd958..2366ec3b610141 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -277,7 +277,7 @@ def test_dump_to_default info = nil ObjectSpace.trace_object_allocations do line = __LINE__ + 1 - str = "hello world" + str = "hello w" info = ObjectSpace.dump(str) end assert_dump_object(info, line) @@ -289,7 +289,7 @@ def test_dump_to_io th = Thread.start {r.read} ObjectSpace.trace_object_allocations do line = __LINE__ + 1 - str = "hello world" + str = "hello w" ObjectSpace.dump(str, output: w) end w.close @@ -301,7 +301,7 @@ def test_dump_to_io def assert_dump_object(info, line) loc = caller_locations(1, 1)[0] assert_match(/"type":"STRING"/, info) - assert_match(/"embedded":true, "bytesize":11, "value":"hello world", "encoding":"UTF-8"/, info) + assert_match(/"embedded":true, "bytesize":7, "value":"hello w", "encoding":"UTF-8"/, info) assert_match(/"file":"#{Regexp.escape __FILE__}", "line":#{line}/, info) assert_match(/"method":"#{loc.base_label}"/, info) JSON.parse(info) if defined?(JSON) @@ -549,17 +549,17 @@ def assert_test_string_entry_correct_in_dump_all(output) # # This test makes assertions on the assignment to `str`, so we look for # the second appearance of /TEST STRING/ in the output - test_string_in_dump_all = output.grep(/TEST STRING/) - assert_equal(test_string_in_dump_all.size, 2) + test_string_in_dump_all = output.grep(/TEST2/) + assert_equal(2, test_string_in_dump_all.size, "number of strings") entry_hash = JSON.parse(test_string_in_dump_all[1]) - assert_equal(entry_hash["bytesize"], 11) - assert_equal(entry_hash["value"], "TEST STRING") - assert_equal(entry_hash["encoding"], "UTF-8") - assert_equal(entry_hash["file"], "-") - assert_equal(entry_hash["line"], 4) - assert_equal(entry_hash["method"], "dump_my_heap_please") + assert_equal(5, entry_hash["bytesize"], "bytesize is wrong") + assert_equal("TEST2", entry_hash["value"], "value is wrong") + assert_equal("UTF-8", entry_hash["encoding"], "encoding is wrong") + assert_equal("-", entry_hash["file"], "file is wrong") + assert_equal(4, entry_hash["line"], "line is wrong") + assert_equal("dump_my_heap_please", entry_hash["method"], "method is wrong") assert_not_nil(entry_hash["generation"]) end @@ -571,7 +571,7 @@ def test_dump_all def dump_my_heap_please ObjectSpace.trace_object_allocations_start GC.start - str = "TEST STRING".force_encoding("UTF-8") + str = "TEST2".force_encoding("UTF-8") ObjectSpace.dump_all(output: :stdout) end @@ -586,7 +586,7 @@ def dump_my_heap_please def dump_my_heap_please ObjectSpace.trace_object_allocations_start GC.start - (str = "TEST STRING").force_encoding("UTF-8") + (str = "TEST2").force_encoding("UTF-8") ObjectSpace.dump_all().path end diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index 92a2be11748d0c..bae29a316222ea 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -210,7 +210,7 @@ def obj.tracee end def test_moving_arrays_down_size_pools - omit if !GC.using_rvargc? + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) begin; ARY_COUNT = 500 @@ -229,7 +229,8 @@ def test_moving_arrays_down_size_pools end def test_moving_arrays_up_size_pools - omit if !GC.using_rvargc? + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) begin; ARY_COUNT = 500 @@ -250,6 +251,8 @@ def test_moving_arrays_up_size_pools end def test_moving_objects_between_size_pools + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) begin; class Foo @@ -274,7 +277,8 @@ def add_ivars end def test_moving_strings_up_size_pools - omit if !GC.using_rvargc? + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) begin; STR_COUNT = 500 @@ -292,7 +296,8 @@ def test_moving_strings_up_size_pools end def test_moving_strings_down_size_pools - omit if !GC.using_rvargc? + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) begin; STR_COUNT = 500 diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index 23696acc703b3b..326ff3a4537391 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -86,15 +86,10 @@ def test_iv_index assert_equal(2, bar_shape.next_iv_index) end - def test_new_obj_has_root_shape - assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of(Object.new)) - end + class TestObject; end - def test_frozen_new_obj_has_frozen_root_shape - assert_shape_equal( - RubyVM::Shape.frozen_root_shape, - RubyVM::Shape.of(Object.new.freeze) - ) + def test_new_obj_has_root_shape + assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of(TestObject.new)) end def test_str_has_root_shape @@ -109,12 +104,12 @@ def test_hash_has_root_shape assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({})) end - def test_true_has_frozen_root_shape - assert_shape_equal(RubyVM::Shape.frozen_root_shape, RubyVM::Shape.of(true)) + def test_true_has_special_const_shape_id + assert_equal(RubyVM::Shape::SPECIAL_CONST_SHAPE_ID, RubyVM::Shape.of(true).id) end - def test_nil_has_frozen_root_shape - assert_shape_equal(RubyVM::Shape.frozen_root_shape, RubyVM::Shape.of(nil)) + def test_nil_has_special_const_shape_id + assert_equal(RubyVM::Shape::SPECIAL_CONST_SHAPE_ID, RubyVM::Shape.of(nil).id) end def test_basic_shape_transition diff --git a/variable.c b/variable.c index c9c4be1c4355ba..bdde4d96071a9a 100644 --- a/variable.c +++ b/variable.c @@ -1092,7 +1092,7 @@ rb_generic_shape_id(VALUE obj) shape_id = ivtbl->shape_id; } else if (OBJ_FROZEN(obj)) { - shape_id = FROZEN_ROOT_SHAPE_ID; + shape_id = SPECIAL_CONST_SHAPE_ID; } } RB_VM_LOCK_LEAVE(); @@ -1364,26 +1364,21 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote) #endif void -rb_ensure_iv_list_size(VALUE obj, uint32_t len, uint32_t newsize) +rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capacity) { VALUE *ptr = ROBJECT_IVPTR(obj); VALUE *newptr; if (RBASIC(obj)->flags & ROBJECT_EMBED) { - newptr = obj_ivar_heap_alloc(obj, newsize); - MEMCPY(newptr, ptr, VALUE, len); + newptr = obj_ivar_heap_alloc(obj, new_capacity); + MEMCPY(newptr, ptr, VALUE, current_capacity); RB_FL_UNSET_RAW(obj, ROBJECT_EMBED); ROBJECT(obj)->as.heap.ivptr = newptr; } else { - newptr = obj_ivar_heap_realloc(obj, len, newsize); + newptr = obj_ivar_heap_realloc(obj, current_capacity, new_capacity); } - -#if USE_RVARGC - ROBJECT(obj)->numiv = newsize; -#else - ROBJECT(obj)->as.heap.numiv = newsize; -#endif + ROBJECT_SET_NUMIV(obj, new_capacity); } struct gen_ivtbl * @@ -1407,12 +1402,25 @@ rb_ensure_generic_iv_list_size(VALUE obj, uint32_t newsize) } // @note May raise when there are too many instance variables. -void -rb_init_iv_list(VALUE obj) +rb_shape_t * +rb_grow_iv_list(VALUE obj) { - uint32_t newsize = (uint32_t)(rb_shape_get_shape(obj)->next_iv_index * 2.0); uint32_t len = ROBJECT_NUMIV(obj); - rb_ensure_iv_list_size(obj, len, newsize < len ? len : newsize); + RUBY_ASSERT(len > 0); + uint32_t newsize = (uint32_t)(len * 2); + rb_ensure_iv_list_size(obj, len, newsize); + rb_shape_t * res; + +#if USE_RVARGC + ROBJECT_SET_NUMIV(obj, newsize); +#else + ROBJECT(obj)->as.heap.numiv = newsize; +#endif + + res = rb_shape_transition_shape_capa(rb_shape_get_shape(obj), newsize); + rb_shape_set_shape(obj, res); + RUBY_ASSERT(!RB_TYPE_P(obj, T_OBJECT) || ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); + return res; } static VALUE @@ -1423,9 +1431,10 @@ obj_ivar_set(VALUE obj, ID id, VALUE val) // Get the current shape rb_shape_t * shape = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj)); + bool found = true; if (!rb_shape_get_iv_index(shape, id, &index)) { - shape = rb_shape_get_next(shape, obj, id); - index = shape->next_iv_index - 1; + index = shape->next_iv_index; + found = false; } uint32_t len = ROBJECT_NUMIV(obj); @@ -1434,12 +1443,16 @@ obj_ivar_set(VALUE obj, ID id, VALUE val) // on this object until the buffer has been allocated, otherwise // GC could read off the end of the buffer. if (len <= index) { - uint32_t newsize = (uint32_t)((len + 1) * 1.25); - rb_ensure_iv_list_size(obj, len, newsize); + shape = rb_grow_iv_list(obj); + } + + if (!found) { + shape = rb_shape_get_next(shape, obj, id); + RUBY_ASSERT(index == (shape->next_iv_index - 1)); + rb_shape_set_shape(obj, shape); } RB_OBJ_WRITE(obj, &ROBJECT_IVPTR(obj)[index], val); - rb_shape_set_shape(obj, shape); return val; } @@ -1475,7 +1488,7 @@ rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id) RCLASS_EXT(obj)->shape_id = shape_id; break; default: - if (shape_id != FROZEN_ROOT_SHAPE_ID) { + if (shape_id != SPECIAL_CONST_SHAPE_ID) { struct gen_ivtbl *ivtbl = 0; RB_VM_LOCK_ENTER(); { @@ -1599,8 +1612,10 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu callback(shape->edge_name, val, itr_data->arg); } return; - case SHAPE_IVAR_UNDEF: + case SHAPE_INITIAL_CAPACITY: + case SHAPE_CAPACITY_CHANGE: case SHAPE_FROZEN: + case SHAPE_IVAR_UNDEF: iterate_over_shapes_with_callback(rb_shape_get_shape_by_id(shape->parent_id), callback, itr_data); return; } @@ -3922,7 +3937,7 @@ rb_iv_tbl_copy(VALUE dst, VALUE src) RUBY_ASSERT(rb_type(dst) == rb_type(src)); RUBY_ASSERT(RB_TYPE_P(dst, T_CLASS) || RB_TYPE_P(dst, T_MODULE)); - RUBY_ASSERT(RCLASS_SHAPE_ID(dst) == ROOT_SHAPE_ID); + RUBY_ASSERT(RCLASS_SHAPE_ID(dst) == ROOT_SHAPE_ID || rb_shape_get_shape_by_id(RCLASS_SHAPE_ID(dst))->type == SHAPE_INITIAL_CAPACITY); RUBY_ASSERT(!RCLASS_IVPTR(dst)); rb_ivar_foreach(src, tbl_copy_i, dst); diff --git a/vm.c b/vm.c index 0077522317fd39..058c361183d970 100644 --- a/vm.c +++ b/vm.c @@ -4044,17 +4044,6 @@ Init_vm_objects(void) if (!vm->shape_list) { rb_memerror(); } - - // Root shape - vm->root_shape = rb_shape_alloc_with_parent_id(0, INVALID_SHAPE_ID); - RUBY_ASSERT(rb_shape_id(vm->root_shape) == ROOT_SHAPE_ID); - - // Frozen root shape - vm->frozen_root_shape = rb_shape_alloc_with_parent_id(rb_make_internal_id(), rb_shape_id(vm->root_shape)); - vm->frozen_root_shape->type = (uint8_t)SHAPE_FROZEN; - RUBY_ASSERT(rb_shape_id(vm->frozen_root_shape) == FROZEN_ROOT_SHAPE_ID); - - vm->next_shape_id = 2; } /* Stub for builtin function when not building YJIT units*/ diff --git a/vm_core.h b/vm_core.h index bdfff95bd5caca..579812e37f9613 100644 --- a/vm_core.h +++ b/vm_core.h @@ -691,7 +691,6 @@ typedef struct rb_vm_struct { /* object shapes */ rb_shape_t *shape_list; rb_shape_t *root_shape; - rb_shape_t *frozen_root_shape; shape_id_t next_shape_id; /* load */ diff --git a/vm_insnhelper.c b/vm_insnhelper.c index cff4b9138a6ead..7b243929323f87 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -50,11 +50,6 @@ MJIT_STATIC VALUE ruby_vm_special_exception_copy(VALUE exc) { VALUE e = rb_obj_alloc(rb_class_real(RBASIC_CLASS(exc))); - rb_shape_t * shape = rb_shape_get_shape(exc); - if (rb_shape_frozen_shape_p(shape)) { - shape = rb_shape_get_shape_by_id(shape->parent_id); - } - rb_shape_set_shape(e, shape); rb_obj_copy_ivar(e, exc); return e; } @@ -1310,37 +1305,33 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, rb_shape_t* shape = rb_shape_get_shape(obj); shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj); - rb_shape_t* next_shape = rb_shape_get_next(shape, obj, id); + if (!rb_shape_get_iv_index(shape, id, &index)) { + if (UNLIKELY(shape->next_iv_index >= num_iv)) { + RUBY_ASSERT(shape->next_iv_index == num_iv); - if (shape != next_shape) { - RUBY_ASSERT(next_shape->parent_id == rb_shape_id(shape)); - next_shape_id = rb_shape_id(next_shape); - } + shape = rb_grow_iv_list(obj); + RUBY_ASSERT(shape->type == SHAPE_CAPACITY_CHANGE); + } + + index = shape->next_iv_index; - if (rb_shape_get_iv_index(next_shape, id, &index)) { // based off the hash stored in the transition tree if (index >= MAX_IVARS) { rb_raise(rb_eArgError, "too many instance variables"); } - populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); - } - else { - rb_bug("Didn't find instance variable %s\n", rb_id2name(id)); - } - - // Ensure the IV buffer is wide enough to store the IV - if (UNLIKELY(index >= num_iv)) { - RUBY_ASSERT(index == num_iv); - rb_init_iv_list(obj); - } + rb_shape_t * next_shape = rb_shape_get_next(shape, obj, id); + RUBY_ASSERT(next_shape->type == SHAPE_IVAR); + RUBY_ASSERT(index == (next_shape->next_iv_index - 1)); + next_shape_id = rb_shape_id(next_shape); - if (shape != next_shape) { rb_shape_set_shape(obj, next_shape); } + + populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); + VALUE *ptr = ROBJECT_IVPTR(obj); RB_OBJ_WRITE(obj, &ptr[index], val); RB_DEBUG_COUNTER_INC(ivar_set_ic_miss_iv_hit); - return val; } case T_CLASS: @@ -1450,17 +1441,18 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i else if (dest_shape_id != INVALID_SHAPE_ID) { rb_shape_t *dest_shape = rb_shape_get_shape_by_id(dest_shape_id); shape_id_t source_shape_id = dest_shape->parent_id; - if (shape_id == source_shape_id && dest_shape->edge_name == id && dest_shape->type == SHAPE_IVAR) { + + RUBY_ASSERT(dest_shape->type == SHAPE_IVAR || dest_shape->type == SHAPE_IVAR_UNDEF); + + if (shape_id == source_shape_id && dest_shape->edge_name == id) { RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - if (UNLIKELY(index >= ROBJECT_NUMIV(obj))) { - rb_init_iv_list(obj); - } + RUBY_ASSERT(ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); ROBJECT_SET_SHAPE_ID(obj, dest_shape_id); - RUBY_ASSERT(rb_shape_get_next(rb_shape_get_shape_by_id(source_shape_id), obj, id) == dest_shape); + RUBY_ASSERT(rb_shape_get_next_iv_shape(rb_shape_get_shape_by_id(source_shape_id), id) == dest_shape); + RUBY_ASSERT(ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); RUBY_ASSERT(index < ROBJECT_NUMIV(obj)); - } else { break; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 9ede3030ff796d..d6218385b001cd 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -248,10 +248,9 @@ extern "C" { } pub const ROBJECT_EMBED: ruby_robject_flags = 8192; pub type ruby_robject_flags = u32; -pub const ROBJECT_OFFSET_NUMIV: i32 = 16; -pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: i32 = 24; -pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: i32 = 32; -pub const ROBJECT_OFFSET_AS_ARY: i32 = 24; +pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: i32 = 16; +pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: i32 = 24; +pub const ROBJECT_OFFSET_AS_ARY: i32 = 16; extern "C" { pub static mut rb_mKernel: VALUE; } @@ -420,7 +419,9 @@ pub struct rb_shape { pub edges: *mut rb_id_table, pub edge_name: ID, pub next_iv_index: attr_index_t, + pub capacity: u32, pub type_: u8, + pub size_pool_index: u8, pub parent_id: shape_id_t, } pub type rb_shape_t = rb_shape; From c726c48a3dacd9ca1cb0d96fee98890cb74b37d3 Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Tue, 8 Nov 2022 14:09:43 -0500 Subject: [PATCH 077/104] Remove numiv from RObject Since object shapes store the capacity of an object, we no longer need the numiv field on RObjects. This gives us one extra slot which we can use to give embedded objects one more instance variable (for a total of 3 ivs). This commit removes the concept of numiv from RObject. --- ext/objspace/objspace_dump.c | 2 +- gc.c | 22 +++-------- include/ruby/internal/core/robject.h | 59 +--------------------------- lib/mjit/compiler.rb | 2 +- object.c | 1 - variable.c | 20 +++------- vm_core.h | 2 +- vm_insnhelper.c | 6 +-- yjit/src/codegen.rs | 8 ---- 9 files changed, 16 insertions(+), 106 deletions(-) diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index 7c7cae34887f9c..61fa0f04ac7de9 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -515,7 +515,7 @@ dump_object(VALUE obj, struct dump_config *dc) case T_OBJECT: dump_append(dc, ", \"ivars\":"); - dump_append_lu(dc, ROBJECT_NUMIV(obj)); + dump_append_lu(dc, ROBJECT_IV_CAPACITY(obj)); break; case T_FILE: diff --git a/gc.c b/gc.c index 84f3b8f206665e..8b8f0c11f198da 100644 --- a/gc.c +++ b/gc.c @@ -2943,14 +2943,9 @@ rb_class_instance_allocate_internal(VALUE klass, VALUE flags, bool wb_protected) VALUE obj = newobj_of(klass, flags, 0, 0, 0, wb_protected, size); -#if USE_RVARGC - uint32_t capa = (uint32_t)((rb_gc_obj_slot_size(obj) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); - ROBJECT_SET_NUMIV(obj, capa); -#endif - #if RUBY_DEBUG VALUE *ptr = ROBJECT_IVPTR(obj); - for (size_t i = 0; i < ROBJECT_NUMIV(obj); i++) { + for (size_t i = 0; i < ROBJECT_IV_CAPACITY(obj); i++) { ptr[i] = Qundef; } #endif @@ -4859,7 +4854,7 @@ obj_memsize_of(VALUE obj, int use_all_types) switch (BUILTIN_TYPE(obj)) { case T_OBJECT: if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { - size += ROBJECT_NUMIV(obj) * sizeof(VALUE); + size += ROBJECT_IV_CAPACITY(obj) * sizeof(VALUE); } break; case T_MODULE: @@ -8409,7 +8404,7 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V break; case T_OBJECT: - obj_size = rb_obj_embedded_size(ROBJECT_NUMIV(src)); + obj_size = rb_obj_embedded_size(ROBJECT_IV_CAPACITY(src)); break; case T_STRING: @@ -10019,7 +10014,7 @@ gc_ref_update_object(rb_objspace_t *objspace, VALUE v) VALUE *ptr = ROBJECT_IVPTR(v); #if USE_RVARGC - uint32_t numiv = ROBJECT_NUMIV(v); + uint32_t numiv = ROBJECT_IV_CAPACITY(v); size_t slot_size = rb_gc_obj_slot_size(v); size_t embed_size = rb_obj_embedded_size(numiv); @@ -10038,13 +10033,6 @@ gc_ref_update_object(rb_objspace_t *objspace, VALUE v) rb_shape_t * initial_shape = rb_shape_get_shape_by_id((shape_id_t)size_pool_shape_id); rb_shape_t * new_shape = rb_shape_rebuild_shape(initial_shape, rb_shape_get_shape(v)); rb_shape_set_shape(v, new_shape); - ROBJECT_SET_NUMIV(v, new_shape->capacity); -#if RUBY_DEBUG - if(RB_TYPE_P(v, T_OBJECT) && ROBJECT_IV_CAPACITY(v) != ROBJECT_NUMIV(v)) { - fprintf(stderr, "shape capa: %d, v capa: %d\n", ROBJECT_IV_CAPACITY(v), ROBJECT_NUMIV(v)); - } -#endif - RUBY_ASSERT(!RB_TYPE_P(v, T_OBJECT) || ROBJECT_IV_CAPACITY(v) == ROBJECT_NUMIV(v)); } #endif @@ -13975,7 +13963,7 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU } case T_OBJECT: { - uint32_t len = ROBJECT_NUMIV(obj); + uint32_t len = ROBJECT_IV_CAPACITY(obj); if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) { APPEND_F("(embed) len:%d", len); diff --git a/include/ruby/internal/core/robject.h b/include/ruby/internal/core/robject.h index e0514d7dd22ffb..f51c5240810cbf 100644 --- a/include/ruby/internal/core/robject.h +++ b/include/ruby/internal/core/robject.h @@ -44,7 +44,7 @@ /** @cond INTERNAL_MACRO */ #define ROBJECT_EMBED_LEN_MAX ROBJECT_EMBED_LEN_MAX #define ROBJECT_EMBED ROBJECT_EMBED -#define ROBJECT_NUMIV ROBJECT_NUMIV +#define ROBJECT_IV_CAPACITY ROBJECT_IV_CAPACITY #define ROBJECT_IVPTR ROBJECT_IVPTR /** @endcond */ @@ -96,14 +96,6 @@ struct RObject { /** Basic part, including flags and class. */ struct RBasic basic; -#if USE_RVARGC - /** - * Number of instance variables. This is per object; objects might - * differ in this field even if they have the identical classes. - */ - uint32_t numiv; -#endif - /** Object's specific fields. */ union { @@ -112,14 +104,6 @@ struct RObject { * this pattern. */ struct { -#if !USE_RVARGC - /** - * Number of instance variables. This is per object; objects might - * differ in this field even if they have the identical classes. - */ - uint32_t numiv; -#endif - /** Pointer to a C array that holds instance variables. */ VALUE *ivptr; @@ -156,52 +140,11 @@ struct RObject { /* Offsets for YJIT */ #ifndef __cplusplus -# if USE_RVARGC -static const int32_t ROBJECT_OFFSET_NUMIV = offsetof(struct RObject, numiv); -# else -static const int32_t ROBJECT_OFFSET_NUMIV = offsetof(struct RObject, as.heap.numiv); -# endif static const int32_t ROBJECT_OFFSET_AS_HEAP_IVPTR = offsetof(struct RObject, as.heap.ivptr); static const int32_t ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL = offsetof(struct RObject, as.heap.iv_index_tbl); static const int32_t ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary); #endif -RBIMPL_ATTR_PURE_UNLESS_DEBUG() -RBIMPL_ATTR_ARTIFICIAL() -/** - * Queries the number of instance variables. - * - * @param[in] obj Object in question. - * @return Its number of instance variables. - * @pre `obj` must be an instance of ::RObject. - */ -static inline uint32_t -ROBJECT_NUMIV(VALUE obj) -{ - RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); - -#if USE_RVARGC - return ROBJECT(obj)->numiv; -#else - if (RB_FL_ANY_RAW(obj, ROBJECT_EMBED)) { - return ROBJECT_EMBED_LEN_MAX; - } - else { - return ROBJECT(obj)->as.heap.numiv; - } -#endif -} - -static inline void -ROBJECT_SET_NUMIV(VALUE obj, uint32_t capacity) -{ -#if USE_RVARGC - ROBJECT(obj)->numiv = capacity; -#else - ROBJECT(obj)->as.heap.numiv = capacity; -#endif -} - RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_ARTIFICIAL() /** diff --git a/lib/mjit/compiler.rb b/lib/mjit/compiler.rb index 575ae6f84c518c..9e6ea0a1196d48 100644 --- a/lib/mjit/compiler.rb +++ b/lib/mjit/compiler.rb @@ -386,7 +386,7 @@ def compile_ivar(insn_name, stack_size, pos, status, operands, body) src << " dest_shape_id != ROBJECT_SHAPE_ID(obj)) {\n" # Conditionally generate a capacity change if there is one # between the destination and the parent IV set - src << " rb_ensure_iv_list_size(obj, RBOJECT_NUMIV(obj), #{capa});\n" if capa + src << " rb_ensure_iv_list_size(obj, ROBJECT_IV_CAPACITY(obj), #{capa});\n" if capa src << " ROBJECT_SET_SHAPE_ID(obj, dest_shape_id);\n" src << " VALUE *ptr = ROBJECT_IVPTR(obj);\n" src << " RB_OBJ_WRITE(obj, &ptr[index], stack[#{stack_size - 1}]);\n" diff --git a/object.c b/object.c index 9a06500b6bb6ca..f51bd3486b43fe 100644 --- a/object.c +++ b/object.c @@ -326,7 +326,6 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) } rb_shape_set_shape(dest, shape_to_set_on_dest); - RUBY_ASSERT(!RB_TYPE_P(obj, T_OBJECT) || ROBJECT_IV_CAPACITY(dest) == ROBJECT_NUMIV(dest)); } static void diff --git a/variable.c b/variable.c index bdde4d96071a9a..2fed1e35121687 100644 --- a/variable.c +++ b/variable.c @@ -1346,7 +1346,7 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote) if (ROBJ_TRANSIENT_P(obj)) { assert(!RB_FL_TEST_RAW(obj, ROBJECT_EMBED)); - uint32_t len = ROBJECT_NUMIV(obj); + uint32_t len = ROBJECT_IV_CAPACITY(obj); const VALUE *old_ptr = ROBJECT_IVPTR(obj); VALUE *new_ptr; @@ -1378,7 +1378,6 @@ rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capaci else { newptr = obj_ivar_heap_realloc(obj, current_capacity, new_capacity); } - ROBJECT_SET_NUMIV(obj, new_capacity); } struct gen_ivtbl * @@ -1405,21 +1404,14 @@ rb_ensure_generic_iv_list_size(VALUE obj, uint32_t newsize) rb_shape_t * rb_grow_iv_list(VALUE obj) { - uint32_t len = ROBJECT_NUMIV(obj); + rb_shape_t * initial_shape = rb_shape_get_shape(obj); + uint32_t len = initial_shape->capacity; RUBY_ASSERT(len > 0); uint32_t newsize = (uint32_t)(len * 2); rb_ensure_iv_list_size(obj, len, newsize); - rb_shape_t * res; -#if USE_RVARGC - ROBJECT_SET_NUMIV(obj, newsize); -#else - ROBJECT(obj)->as.heap.numiv = newsize; -#endif - - res = rb_shape_transition_shape_capa(rb_shape_get_shape(obj), newsize); + rb_shape_t * res = rb_shape_transition_shape_capa(initial_shape, newsize); rb_shape_set_shape(obj, res); - RUBY_ASSERT(!RB_TYPE_P(obj, T_OBJECT) || ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); return res; } @@ -1437,12 +1429,10 @@ obj_ivar_set(VALUE obj, ID id, VALUE val) found = false; } - uint32_t len = ROBJECT_NUMIV(obj); - // Reallocating can kick off GC. We can't set the new shape // on this object until the buffer has been allocated, otherwise // GC could read off the end of the buffer. - if (len <= index) { + if (shape->capacity <= index) { shape = rb_grow_iv_list(obj); } diff --git a/vm_core.h b/vm_core.h index 579812e37f9613..3364089f79452f 100644 --- a/vm_core.h +++ b/vm_core.h @@ -253,7 +253,7 @@ struct iseq_inline_constant_cache_entry { }; STATIC_ASSERT(sizeof_iseq_inline_constant_cache_entry, (offsetof(struct iseq_inline_constant_cache_entry, ic_cref) + - sizeof(const rb_cref_t *)) <= sizeof(struct RObject)); + sizeof(const rb_cref_t *)) <= RVALUE_SIZE); struct iseq_inline_constant_cache { struct iseq_inline_constant_cache_entry *entry; diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 7b243929323f87..a39b6f87ccc941 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1301,8 +1301,8 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, attr_index_t index; - uint32_t num_iv = ROBJECT_NUMIV(obj); rb_shape_t* shape = rb_shape_get_shape(obj); + uint32_t num_iv = shape->capacity; shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj); if (!rb_shape_get_iv_index(shape, id, &index)) { @@ -1446,13 +1446,11 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i if (shape_id == source_shape_id && dest_shape->edge_name == id) { RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - RUBY_ASSERT(ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); ROBJECT_SET_SHAPE_ID(obj, dest_shape_id); RUBY_ASSERT(rb_shape_get_next_iv_shape(rb_shape_get_shape_by_id(source_shape_id), id) == dest_shape); - RUBY_ASSERT(ROBJECT_IV_CAPACITY(obj) == ROBJECT_NUMIV(obj)); - RUBY_ASSERT(index < ROBJECT_NUMIV(obj)); + RUBY_ASSERT(index < dest_shape->capacity); } else { break; diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 7a1673c5cb520c..530bcf9edad7c5 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2100,14 +2100,6 @@ fn gen_get_ivar( } else { // Compile time value is *not* embedded. - if USE_RVARGC == 0 { - // Check that the extended table is big enough - // Check that the slot is inside the extended table (num_slots > index) - let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); - asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); - asm.jbe(counted_exit!(ocb, side_exit, getivar_idx_out_of_range).into()); - } - // Get a pointer to the extended table let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR)); From c771d835934141645db72bb98351c77b378ac17e Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 10 Nov 2022 11:30:54 -0500 Subject: [PATCH 078/104] Set up EXTSTATIC before checking it The bundle_loader check for darwin checks EXTSTATIC, but previously the setup for the variable comes after the check. I had trouble building using --with-static-linked-ext on darwin before this change. --- configure.ac | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index 59a6508ad8c785..b71116ace76d01 100644 --- a/configure.ac +++ b/configure.ac @@ -2964,6 +2964,23 @@ STATIC= ]) } +EXTSTATIC= +AC_SUBST(EXTSTATIC)dnl +AC_ARG_WITH(static-linked-ext, + AS_HELP_STRING([--with-static-linked-ext], [link external modules statically]), + [AS_CASE([$withval],[yes],[STATIC=;EXTSTATIC=static],[no],[],[EXTSTATIC="$withval"])]) +AS_CASE([",$EXTSTATIC,"], [,static,|*,enc,*], [ + ENCOBJS='enc/encinit.$(OBJEXT) enc/libenc.$(LIBEXT) enc/libtrans.$(LIBEXT)' + EXTOBJS='ext/extinit.$(OBJEXT)' + AC_DEFINE_UNQUOTED(EXTSTATIC, 1) + AC_SUBST(ENCSTATIC, static) +], [ + ENCOBJS='dmyenc.$(OBJEXT)' + EXTOBJS='dmyext.$(OBJEXT)' +]) +AC_SUBST(ENCOBJS) +AC_SUBST(EXTOBJS) + : "rpath" && { AS_CASE(["$target_os"], [solaris*], [ AS_IF([test "$GCC" = yes], [ @@ -3268,23 +3285,6 @@ AC_ARG_WITH(ext, AC_ARG_WITH(out-ext, AS_HELP_STRING([--with-out-ext=EXTS], [pass to --without-ext option of extmk.rb])) -EXTSTATIC= -AC_SUBST(EXTSTATIC)dnl -AC_ARG_WITH(static-linked-ext, - AS_HELP_STRING([--with-static-linked-ext], [link external modules statically]), - [AS_CASE([$withval],[yes],[STATIC=;EXTSTATIC=static],[no],[],[EXTSTATIC="$withval"])]) -AS_CASE([",$EXTSTATIC,"], [,static,|*,enc,*], [ - ENCOBJS='enc/encinit.$(OBJEXT) enc/libenc.$(LIBEXT) enc/libtrans.$(LIBEXT)' - EXTOBJS='ext/extinit.$(OBJEXT)' - AC_DEFINE_UNQUOTED(EXTSTATIC, 1) - AC_SUBST(ENCSTATIC, static) -], [ - ENCOBJS='dmyenc.$(OBJEXT)' - EXTOBJS='dmyext.$(OBJEXT)' -]) -AC_SUBST(ENCOBJS) -AC_SUBST(EXTOBJS) - AC_ARG_WITH(setup, AS_HELP_STRING([--with-setup=SETUP], [use extension libraries setup]), [setup=$withval]) From bab8051d2d20a13f4aa26330a25e72ccec980f7a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 9 Nov 2022 19:36:37 +0900 Subject: [PATCH 079/104] [Bug #19100] [DOC] Add NEWS about PRNG update and incompatiblity --- NEWS.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2b3bbc8390c05f..19f5546fca1645 100644 --- a/NEWS.md +++ b/NEWS.md @@ -291,6 +291,11 @@ The following deprecated methods are removed. * `Kernel#trust`, `Kernel#untrust`, `Kernel#untrusted?` [[Feature #16131]] +### Source code incompatiblity of extension libraries [[Bug #19100]] + +* Extension libraries provide PRNG, subclasses of `Random`, need updates. + See [PRNG update] below for more information. + ## Stdlib compatibility issues * `Psych` no longer bundles libyaml sources. @@ -300,6 +305,16 @@ The following deprecated methods are removed. ## C API updates +### Updated C APIs + +The following APIs are updated. + +* PRNG update + + `rb_random_interface_t` updated and versioned. + Extension libraries which use this interface and built for older versions. + Also `init_int32` function needs to be defined. + ### Removed C APIs The following deprecated APIs are removed. @@ -400,3 +415,4 @@ The following deprecated APIs are removed. [Feature #19013]: https://bugs.ruby-lang.org/issues/19013 [Feature #19026]: https://bugs.ruby-lang.org/issues/19026 [Feature #19060]: https://bugs.ruby-lang.org/issues/19060 +[Bug #19100]: https://bugs.ruby-lang.org/issues/19100 From 0de3bc92b4fc3bb9fc0930e98baed37044ed44e1 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 10 Nov 2022 09:31:11 -0800 Subject: [PATCH 080/104] [ruby/irb] Make $ and @ default aliases (https://github.com/ruby/irb/pull/438) https://github.com/ruby/irb/commit/0613589476 --- lib/irb/init.rb | 5 ++++- test/irb/test_cmd.rb | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/irb/init.rb b/lib/irb/init.rb index 09099f88b77346..8c9d473b74d84c 100644 --- a/lib/irb/init.rb +++ b/lib/irb/init.rb @@ -159,7 +159,10 @@ def IRB.init_config(ap_path) @CONF[:AT_EXIT] = [] - @CONF[:COMMAND_ALIASES] = {} + @CONF[:COMMAND_ALIASES] = { + :'$' => :show_source, + :'@' => :whereami, + } end def IRB.set_measure_callback(type = nil, arg = nil, &block) diff --git a/test/irb/test_cmd.rb b/test/irb/test_cmd.rb index f2d8a0299bc068..d233cbb9b5424e 100644 --- a/test/irb/test_cmd.rb +++ b/test/irb/test_cmd.rb @@ -551,7 +551,6 @@ def test_whereami def test_whereami_alias out, err = execute_lines( "@\n", - conf: { COMMAND_ALIASES: { :'@' => :whereami } } ) assert_empty err assert_match(/^From: .+ @ line \d+ :\n/, out) @@ -563,7 +562,6 @@ def test_vars_with_aliases out, err = execute_lines( "@foo\n", "$bar\n", - conf: { COMMAND_ALIASES: { :'$' => :show_source, :'@' => :whereami } } ) assert_empty err assert_match(/"foo"/, out) From 8b3347950e6344474430ed08f5fa19f613883660 Mon Sep 17 00:00:00 2001 From: Jimmy Miller Date: Thu, 10 Nov 2022 12:56:22 -0500 Subject: [PATCH 081/104] Enable --yjit-stats for release builds (#6694) * Enable --yjit-stats for release builds In order for people in the real world to report information about how their application runs with YJIT, we want to expose stats without requiring rebuilding ruby. We can do this without overhead, with the exception of count ratio in yjit, since this relies on the interpreter also counting instructions. This change exposes those stats, while not showing ratio in yjit if we are not in a stats build. * Update yjit.rb Co-authored-by: Takashi Kokubun Co-authored-by: Maxime Chevalier-Boisvert --- yjit.rb | 20 +++++++++++++------- yjit/src/codegen.rs | 17 ----------------- yjit/src/stats.rs | 15 +++++++++++---- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/yjit.rb b/yjit.rb index 21f2eea4def39c..595faa1bb3afc0 100644 --- a/yjit.rb +++ b/yjit.rb @@ -158,14 +158,17 @@ def self.runtime_stats # Average length of instruction sequences executed by YJIT avg_len_in_yjit = retired_in_yjit.to_f / total_exits - # Proportion of instructions that retire in YJIT - total_insns_count = retired_in_yjit + stats[:vm_insns_count] - yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + # This only available on yjit stats builds + if stats.key?(:vm_insns_count) + # Proportion of instructions that retire in YJIT + total_insns_count = retired_in_yjit + stats[:vm_insns_count] + yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count + stats[:ratio_in_yjit] = yjit_ratio_pct + end # Make those stats available in RubyVM::YJIT.runtime_stats as well stats[:side_exit_count] = side_exits stats[:total_exit_count] = total_exits - stats[:ratio_in_yjit] = yjit_ratio_pct stats[:avg_len_in_yjit] = avg_len_in_yjit stats @@ -263,13 +266,16 @@ def _print_stats $stderr.puts "freed_page_count: " + ("%10d" % stats[:freed_page_count]) $stderr.puts "code_gc_count: " + ("%10d" % stats[:code_gc_count]) $stderr.puts "num_gc_obj_refs: " + ("%10d" % stats[:num_gc_obj_refs]) - $stderr.puts "side_exit_count: " + ("%10d" % stats[:side_exit_count]) $stderr.puts "total_exit_count: " + ("%10d" % stats[:side_exit_count]) $stderr.puts "total_insns_count: " + ("%10d" % stats[:total_exit_count]) - $stderr.puts "vm_insns_count: " + ("%10d" % stats[:vm_insns_count]) + if stats.has_key?(:vm_insns_count) + $stderr.puts "vm_insns_count: " + ("%10d" % stats[:vm_insns_count]) + end $stderr.puts "yjit_insns_count: " + ("%10d" % stats[:exec_instruction]) - $stderr.puts "ratio_in_yjit: " + ("%9.1f" % stats[:ratio_in_yjit]) + "%" + if stats.has_key?(:ratio_in_yjit) + $stderr.puts "ratio_in_yjit: " + ("%9.1f" % stats[:ratio_in_yjit]) + "%" + end $stderr.puts "avg_len_in_yjit: " + ("%10.1f" % stats[:avg_len_in_yjit]) print_sorted_exit_counts(stats, prefix: "exit_") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 530bcf9edad7c5..b1b854ad7f79f4 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -7,7 +7,6 @@ use crate::core::*; use crate::cruby::*; use crate::invariants::*; use crate::options::*; -#[cfg(feature = "stats")] use crate::stats::*; use crate::utils::*; use CodegenStatus::*; @@ -181,12 +180,6 @@ fn jit_peek_at_block_handler(jit: &JITState, level: u32) -> VALUE { } } -/// Increment a profiling counter with counter_name -#[cfg(not(feature = "stats"))] -macro_rules! gen_counter_incr { - ($asm:tt, $counter_name:ident) => {}; -} -#[cfg(feature = "stats")] macro_rules! gen_counter_incr { ($asm:tt, $counter_name:ident) => { if (get_option!(gen_stats)) { @@ -204,15 +197,6 @@ macro_rules! gen_counter_incr { }; } -/// Increment a counter then take an existing side exit -#[cfg(not(feature = "stats"))] -macro_rules! counted_exit { - ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {{ - let _ = $ocb; - $existing_side_exit - }}; -} -#[cfg(feature = "stats")] macro_rules! counted_exit { ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => { // The counter is only incremented when stats are enabled @@ -422,7 +406,6 @@ fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { ); // Accumulate stats about interpreter exits - #[cfg(feature = "stats")] if get_option!(gen_stats) { asm.ccall( rb_yjit_count_side_exit_op as *const u8, diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index b7bbb4ae3ec3e7..128672a95943ac 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -289,12 +289,12 @@ make_counters! { /// Check if stats generation is enabled #[no_mangle] pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { - #[cfg(feature = "stats")] + if get_option!(gen_stats) { return Qtrue; + } else { + return Qfalse; } - - return Qfalse; } /// Primitive called in yjit.rb. @@ -404,7 +404,7 @@ fn rb_yjit_gen_stats_dict() -> VALUE { } // If the stats feature is enabled - #[cfg(feature = "stats")] + unsafe { // Indicate that the complete set of stats is available rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue); @@ -415,6 +415,13 @@ fn rb_yjit_gen_stats_dict() -> VALUE { let counter_ptr = get_counter_ptr(counter_name); let counter_val = *counter_ptr; + #[cfg(not(feature = "stats"))] + if counter_name == &"vm_insns_count" { + // If the stats feature is disabled, we don't have vm_insns_count + // so we are going to exlcude the key + continue; + } + // Put counter into hash let key = rust_str_to_sym(counter_name); let value = VALUE::fixnum_from_usize(counter_val as usize); From 354791c248d7e21d5b70ded7c548af954491e247 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 10 Nov 2022 10:00:15 -0800 Subject: [PATCH 082/104] Remove inconsistency I meant they should be also fixed in https://github.com/ruby/ruby/pull/6694#discussion_r1019445575 --- yjit.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yjit.rb b/yjit.rb index 595faa1bb3afc0..caa3a035d37228 100644 --- a/yjit.rb +++ b/yjit.rb @@ -269,11 +269,11 @@ def _print_stats $stderr.puts "side_exit_count: " + ("%10d" % stats[:side_exit_count]) $stderr.puts "total_exit_count: " + ("%10d" % stats[:side_exit_count]) $stderr.puts "total_insns_count: " + ("%10d" % stats[:total_exit_count]) - if stats.has_key?(:vm_insns_count) + if stats.key?(:vm_insns_count) $stderr.puts "vm_insns_count: " + ("%10d" % stats[:vm_insns_count]) end $stderr.puts "yjit_insns_count: " + ("%10d" % stats[:exec_instruction]) - if stats.has_key?(:ratio_in_yjit) + if stats.key?(:ratio_in_yjit) $stderr.puts "ratio_in_yjit: " + ("%9.1f" % stats[:ratio_in_yjit]) + "%" end $stderr.puts "avg_len_in_yjit: " + ("%10.1f" % stats[:avg_len_in_yjit]) From 7ee1cacb84e6b19908ac0e692601447597d40605 Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Thu, 10 Nov 2022 11:36:24 -0500 Subject: [PATCH 083/104] Extract `rb_shape_get_parent` helper Extract an `rb_shape_get_parent` method instead of continually calling `rb_shape_get_shape_by_id(shape->parent_id)` --- object.c | 2 +- shape.c | 16 +++++++++++----- shape.h | 1 + variable.c | 4 ++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/object.c b/object.c index f51bd3486b43fe..f0117d1a47d992 100644 --- a/object.c +++ b/object.c @@ -284,7 +284,7 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) // The copy should be mutable, so we don't want the frozen shape if (rb_shape_frozen_shape_p(src_shape)) { - shape_to_set_on_dest = rb_shape_get_shape_by_id(src_shape->parent_id); + shape_to_set_on_dest = rb_shape_get_parent(src_shape); } src_buf = ROBJECT_IVPTR(obj); diff --git a/shape.c b/shape.c index e19667ae2c17b3..b20ac10a14ce81 100644 --- a/shape.c +++ b/shape.c @@ -51,6 +51,12 @@ rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id) return shape; } +rb_shape_t * +rb_shape_get_parent(rb_shape_t * shape) +{ + return rb_shape_get_shape_by_id(shape->parent_id); +} + #if !SHAPE_IN_BASIC_FLAGS shape_id_t rb_rclass_shape_id(VALUE obj) @@ -105,7 +111,7 @@ rb_shape_lookup_id(rb_shape_t* shape, ID id, enum shape_type shape_type) return NULL; } } - shape = rb_shape_get_shape_by_id(shape->parent_id); + shape = rb_shape_get_parent(shape); } return NULL; } @@ -273,7 +279,7 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) rb_bug("Ivar should not exist on transition\n"); } } - shape = rb_shape_get_shape_by_id(shape->parent_id); + shape = rb_shape_get_parent(shape); } return false; } @@ -338,7 +344,7 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape) rb_shape_t * midway_shape; if (dest_shape->type != SHAPE_ROOT) { - midway_shape = rb_shape_rebuild_shape(initial_shape, rb_shape_get_shape_by_id(dest_shape->parent_id)); + midway_shape = rb_shape_rebuild_shape(initial_shape, rb_shape_get_parent(dest_shape)); } else { midway_shape = initial_shape; @@ -504,7 +510,7 @@ rb_shape_export_depth(VALUE self) unsigned int depth = 0; while (shape->parent_id != INVALID_SHAPE_ID) { depth++; - shape = rb_shape_get_shape_by_id(shape->parent_id); + shape = rb_shape_get_parent(shape); } return INT2NUM(depth); } @@ -515,7 +521,7 @@ rb_shape_parent(VALUE self) rb_shape_t * shape; TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); if (shape->parent_id != INVALID_SHAPE_ID) { - return rb_shape_t_to_rb_cShape(rb_shape_get_shape_by_id(shape->parent_id)); + return rb_shape_t_to_rb_cShape(rb_shape_get_parent(shape)); } else { return Qnil; diff --git a/shape.h b/shape.h index a7450cdeea9ab7..417a01369702a8 100644 --- a/shape.h +++ b/shape.h @@ -133,6 +133,7 @@ bool rb_shape_root_shape_p(rb_shape_t* shape); rb_shape_t * rb_shape_get_root_shape(void); rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id); +rb_shape_t * rb_shape_get_parent(rb_shape_t * shape); MJIT_SYMBOL_EXPORT_BEGIN rb_shape_t* rb_shape_get_shape_by_id(shape_id_t shape_id); diff --git a/variable.c b/variable.c index 2fed1e35121687..93abdac454752d 100644 --- a/variable.c +++ b/variable.c @@ -1583,7 +1583,7 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu case SHAPE_ROOT: return; case SHAPE_IVAR: - iterate_over_shapes_with_callback(rb_shape_get_shape_by_id(shape->parent_id), callback, itr_data); + iterate_over_shapes_with_callback(rb_shape_get_parent(shape), callback, itr_data); VALUE * iv_list; switch (BUILTIN_TYPE(itr_data->obj)) { case T_OBJECT: @@ -1606,7 +1606,7 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu case SHAPE_CAPACITY_CHANGE: case SHAPE_FROZEN: case SHAPE_IVAR_UNDEF: - iterate_over_shapes_with_callback(rb_shape_get_shape_by_id(shape->parent_id), callback, itr_data); + iterate_over_shapes_with_callback(rb_shape_get_parent(shape), callback, itr_data); return; } } From d9056328514041c22f73809921118fa7a1478092 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 10 Nov 2022 13:16:25 -0800 Subject: [PATCH 084/104] Define YJIT_STATS on --enable-yjit=stats (#6710) --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index b71116ace76d01..ca234e3c900ffc 100644 --- a/configure.ac +++ b/configure.ac @@ -3806,6 +3806,7 @@ AS_CASE(["${YJIT_SUPPORT}"], [stats], [ rb_rust_target_subdir=stats CARGO_BUILD_ARGS='--profile stats --features stats' + AC_DEFINE(YJIT_STATS, 1) ]) AS_IF([test -n "${CARGO_BUILD_ARGS}"], [ From 2b8191bdad7545b71f270d2b25a34cd2b3afa02f Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 10 Nov 2022 14:12:38 -0800 Subject: [PATCH 085/104] YJIT: Invalidate JIT code only for ISEQ_TRACE_EVENTS (#6695) --- test/ruby/test_yjit.rb | 48 ++++++++++++++++++++++++++++++++++++++++++ vm_trace.c | 10 +++++---- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/test/ruby/test_yjit.rb b/test/ruby/test_yjit.rb index 1a564889af6963..fab8768a7e4692 100644 --- a/test/ruby/test_yjit.rb +++ b/test/ruby/test_yjit.rb @@ -918,6 +918,54 @@ def test_code_gc_with_many_iseqs RUBY end + def test_trace_script_compiled # not ISEQ_TRACE_EVENTS + assert_compiles(<<~'RUBY', exits: :any, result: :ok) + @eval_counter = 0 + def eval_script + eval('@eval_counter += 1') + end + + @trace_counter = 0 + trace = TracePoint.new(:script_compiled) do |t| + @trace_counter += 1 + end + + eval_script # JIT without TracePoint + trace.enable + eval_script # call with TracePoint + trace.disable + + return :"eval_#{@eval_counter}" if @eval_counter != 2 + return :"trace_#{@trace_counter}" if @trace_counter != 1 + + :ok + RUBY + end + + def test_trace_b_call # ISEQ_TRACE_EVENTS + assert_compiles(<<~'RUBY', exits: :any, result: :ok) + @call_counter = 0 + def block_call + 1.times { @call_counter += 1 } + end + + @trace_counter = 0 + trace = TracePoint.new(:b_call) do |t| + @trace_counter += 1 + end + + block_call # JIT without TracePoint + trace.enable + block_call # call with TracePoint + trace.disable + + return :"call_#{@call_counter}" if @call_counter != 2 + return :"trace_#{@trace_counter}" if @trace_counter != 1 + + :ok + RUBY + end + private def code_gc_helpers diff --git a/vm_trace.c b/vm_trace.c index 93a8c1a4ed180f..caed71e96f72fb 100644 --- a/vm_trace.c +++ b/vm_trace.c @@ -87,8 +87,9 @@ update_global_event_hook(rb_event_flag_t prev_events, rb_event_flag_t new_events { rb_event_flag_t new_iseq_events = new_events & ISEQ_TRACE_EVENTS; rb_event_flag_t enabled_iseq_events = ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS; + bool trace_iseq_p = new_iseq_events & ~enabled_iseq_events; - if (new_iseq_events & ~enabled_iseq_events) { + if (trace_iseq_p) { // :class events are triggered only in ISEQ_TYPE_CLASS, but mjit_target_iseq_p ignores such iseqs. // Thus we don't need to cancel JIT-ed code for :class events. if (new_iseq_events != RUBY_EVENT_CLASS) { @@ -111,10 +112,11 @@ update_global_event_hook(rb_event_flag_t prev_events, rb_event_flag_t new_events ruby_vm_event_enabled_global_flags |= new_events; rb_objspace_set_event_hook(new_events); - if (new_events & RUBY_EVENT_TRACEPOINT_ALL) { - // Invalidate all code if listening for any TracePoint event. + if (trace_iseq_p) { + // Invalidate all code when ISEQs are modified to use trace_* insns above. // Internal events fire inside C routines so don't need special handling. - // Do this last so other ractors see updated vm events when they wake up. + // Do this after event flags updates so other ractors see updated vm events + // when they wake up. rb_yjit_tracing_invalidate_all(); } } From 51d161980ae0e26602eb17bed5f9cc0beb61f9a3 Mon Sep 17 00:00:00 2001 From: "Thomas E. Enebo" Date: Thu, 10 Nov 2022 13:37:57 -0600 Subject: [PATCH 086/104] [ruby/racc] Make racc test more flexible (for JRuby). JRuby uses these same files for testing racc. The existing logic will not find 'racc' in a JRuby project checkout. This change allows it to work by just assuming 'ruby -S racc' when running tests. This will not change C Ruby's detection when setting up tests (since earlier checks will find racc). https://github.com/ruby/racc/commit/b74ce5e018 --- test/racc/case.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/racc/case.rb b/test/racc/case.rb index d917f3a4e4687d..ebc30b8288ae49 100644 --- a/test/racc/case.rb +++ b/test/racc/case.rb @@ -17,6 +17,8 @@ class TestCase < Test::Unit::TestCase TEST_DIR = test_dir racc = File.join(PROJECT_DIR, 'bin', 'racc') racc = File.join(PROJECT_DIR, '..', 'libexec', 'racc') unless File.exist?(racc) + racc = 'racc' unless File.exist?(racc) + RACC = racc ASSET_DIR = File.join(TEST_DIR, 'assets') # test grammars REGRESS_DIR = File.join(TEST_DIR, 'regress') # known-good generated outputs From 06e17d7659f42cd59634931e5c8d9dd211c7b2e6 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 11 Nov 2022 07:41:08 +0900 Subject: [PATCH 087/104] Run EnvUtil.apply_timeout_scale outside of assert_separately It does not work well in assert_separately --- test/ruby/test_regexp.rb | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index ee4c2cef5b22cb..87ce6987e39e30 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1580,9 +1580,8 @@ def assert_match_each(re, conds, msg = nil) def test_s_timeout assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + timeout = #{ EnvUtil.apply_timeout_scale(0.2).inspect } begin; - timeout = EnvUtil.apply_timeout_scale(0.2) - Regexp.timeout = timeout assert_equal(timeout, Regexp.timeout) @@ -1622,13 +1621,10 @@ def test_s_timeout_corner_cases def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") - global_timeout = #{ global_timeout.inspect } - per_instance_timeout = #{ per_instance_timeout.inspect } - expected_timeout = #{ expected_timeout.inspect } + global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } + per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect } + expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect } begin; - global_timeout = EnvUtil.apply_timeout_scale(global_timeout) - per_instance_timeout = EnvUtil.apply_timeout_scale(per_instance_timeout) - Regexp.timeout = global_timeout re = Regexp.new("^(a*)\\1b?a*$", timeout: per_instance_timeout) @@ -1676,8 +1672,8 @@ def test_timeout_corner_cases def test_cache_optimization_exponential assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + timeout = #{ EnvUtil.apply_timeout_scale(2).inspect } begin; - timeout = EnvUtil.apply_timeout_scale(2) Regexp.timeout = timeout assert_nil(/^(a*)*$/ =~ "a" * 1000000 + "x") @@ -1686,8 +1682,8 @@ def test_cache_optimization_exponential def test_cache_optimization_square assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + timeout = #{ EnvUtil.apply_timeout_scale(2).inspect } begin; - timeout = EnvUtil.apply_timeout_scale(2) Regexp.timeout = timeout assert_nil(/^a*b?a*$/ =~ "a" * 1000000 + "x") From b5f809c4961bd43b8e77cceebad44e341618998d Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Sat, 29 Oct 2022 01:19:36 +0900 Subject: [PATCH 088/104] [rubygems/rubygems] Load "bundler/setup" in lib/rubygems.rb Ruby interpreter loads some special gems at startup: did_you_mean, error_highlight, and syntax_suggest. These gems are loaded before `bundler/setup` is loaded by `RUBYOPT=-rbundler/setup`. So, the versions of the gems are not controllable by Gemfile. This change will `require "bundler/setup"` in rubygems.rb (i.e., before the special gems are loaded). Now `bundle exec` sets an environment variable `BUNDLER_SETUP`, and rubygems requires the variable if defined. See also: https://bugs.ruby-lang.org/issues/19089 https://github.com/rubygems/rubygems/commit/963cb65a2d --- lib/bundler/environment_preserver.rb | 1 + lib/bundler/shared_helpers.rb | 1 + lib/rubygems.rb | 2 ++ spec/bundler/bundler/shared_helpers_spec.rb | 7 +++++++ 4 files changed, 11 insertions(+) diff --git a/lib/bundler/environment_preserver.rb b/lib/bundler/environment_preserver.rb index 0f08e049d8d49d..70967522af29c2 100644 --- a/lib/bundler/environment_preserver.rb +++ b/lib/bundler/environment_preserver.rb @@ -7,6 +7,7 @@ class EnvironmentPreserver BUNDLE_BIN_PATH BUNDLE_GEMFILE BUNDLER_VERSION + BUNDLER_SETUP GEM_HOME GEM_PATH MANPATH diff --git a/lib/bundler/shared_helpers.rb b/lib/bundler/shared_helpers.rb index 899eb68e0a86ce..0a6afe0e5a5beb 100644 --- a/lib/bundler/shared_helpers.rb +++ b/lib/bundler/shared_helpers.rb @@ -284,6 +284,7 @@ def set_bundle_variables Bundler::SharedHelpers.set_env "BUNDLE_BIN_PATH", exe_file Bundler::SharedHelpers.set_env "BUNDLE_GEMFILE", find_gemfile.to_s Bundler::SharedHelpers.set_env "BUNDLER_VERSION", Bundler::VERSION + Bundler::SharedHelpers.set_env "BUNDLER_SETUP", File.expand_path("setup", __dir__) end def set_path diff --git a/lib/rubygems.rb b/lib/rubygems.rb index 915a899f38edfb..43b1c4528871bf 100644 --- a/lib/rubygems.rb +++ b/lib/rubygems.rb @@ -1348,3 +1348,5 @@ def default_gem_load_paths require_relative "rubygems/core_ext/kernel_gem" require_relative "rubygems/core_ext/kernel_require" require_relative "rubygems/core_ext/kernel_warn" + +require ENV["BUNDLER_SETUP"] if ENV["BUNDLER_SETUP"] diff --git a/spec/bundler/bundler/shared_helpers_spec.rb b/spec/bundler/bundler/shared_helpers_spec.rb index 68a24be31c6914..569d6ed9cdf191 100644 --- a/spec/bundler/bundler/shared_helpers_spec.rb +++ b/spec/bundler/bundler/shared_helpers_spec.rb @@ -246,6 +246,13 @@ end end + shared_examples_for "ENV['BUNDLER_SETUP'] gets set correctly" do + it "ensures bundler/setup is set in ENV['BUNDLE_SETUP']" do + subject.set_bundle_environment + expect(ENV["BUNDLER_SETUP"]).to eq("#{source_lib_dir}/bundler/setup") + end + end + shared_examples_for "ENV['RUBYLIB'] gets set correctly" do let(:ruby_lib_path) { "stubbed_ruby_lib_dir" } From d5513da01d24fbc4de71975b6a49f0ba3b3be401 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Sat, 29 Oct 2022 03:23:46 +0900 Subject: [PATCH 089/104] [rubygems/rubygems] Update bundler/spec/bundler/shared_helpers_spec.rb https://github.com/rubygems/rubygems/commit/8b1481ba77 Co-authored-by: Nobuyoshi Nakada --- spec/bundler/bundler/shared_helpers_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/bundler/bundler/shared_helpers_spec.rb b/spec/bundler/bundler/shared_helpers_spec.rb index 569d6ed9cdf191..43ae9cdcebf0bf 100644 --- a/spec/bundler/bundler/shared_helpers_spec.rb +++ b/spec/bundler/bundler/shared_helpers_spec.rb @@ -247,7 +247,7 @@ end shared_examples_for "ENV['BUNDLER_SETUP'] gets set correctly" do - it "ensures bundler/setup is set in ENV['BUNDLE_SETUP']" do + it "ensures bundler/setup is set in ENV['BUNDLER_SETUP']" do subject.set_bundle_environment expect(ENV["BUNDLER_SETUP"]).to eq("#{source_lib_dir}/bundler/setup") end From 8fa83fa0b2031ad17f01b5a12b39599398dc6da6 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 10 Nov 2022 14:55:11 -0800 Subject: [PATCH 090/104] [ruby/irb] Transform ls's --grep/-G option to keyword args (https://github.com/ruby/irb/pull/437) * Transform ls's --grep/-G option to keyword args * Make --grep less flexible * Support -g instead of --grep * Suppress warnings from symbol aliases --- lib/irb.rb | 1 + lib/irb/cmd/ls.rb | 9 +++++++++ test/irb/test_cmd.rb | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/lib/irb.rb b/lib/irb.rb index 57ec9ebaebc9d9..04009664efd47a 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -427,6 +427,7 @@ def initialize(workspace = nil, input_method = nil) @context = Context.new(self, workspace, input_method) @context.main.extend ExtendCommandBundle @context.command_aliases.each do |alias_name, cmd_name| + next if @context.symbol_alias(alias_name) @context.main.install_alias_method(alias_name, cmd_name) end @signal_status = :IN_IRB diff --git a/lib/irb/cmd/ls.rb b/lib/irb/cmd/ls.rb index f4a7348bd12ad3..77cf071783dbc1 100644 --- a/lib/irb/cmd/ls.rb +++ b/lib/irb/cmd/ls.rb @@ -9,6 +9,15 @@ module IRB module ExtendCommand class Ls < Nop + def self.transform_args(args) + if match = args&.match(/\A(?.+\s|)(-g|-G)\s+(?[^\s]+)\s*\n\z/) + args = match[:args] + "#{args}#{',' unless args.chomp.empty?} grep: /#{match[:grep]}/" + else + args + end + end + def execute(*arg, grep: nil) o = Output.new(grep: grep) diff --git a/test/irb/test_cmd.rb b/test/irb/test_cmd.rb index d233cbb9b5424e..eafa8be38203bd 100644 --- a/test/irb/test_cmd.rb +++ b/test/irb/test_cmd.rb @@ -480,6 +480,44 @@ def test_ls assert_match(/C.methods:\s+m5\n/m, out) end + def test_ls_grep + pend if RUBY_ENGINE == 'truffleruby' + out, err = execute_lines("ls 42\n") + assert_empty err + assert_match(/times/, out) + assert_match(/polar/, out) + + [ + "ls 42, grep: /times/\n", + "ls 42 -g times\n", + "ls 42 -G times\n", + ].each do |line| + out, err = execute_lines(line) + assert_empty err + assert_match(/times/, out) + assert_not_match(/polar/, out) + end + end + + def test_ls_grep_empty + pend if RUBY_ENGINE == 'truffleruby' + out, err = execute_lines("ls\n") + assert_empty err + assert_match(/whereami/, out) + assert_match(/show_source/, out) + + [ + "ls grep: /whereami/\n", + "ls -g whereami\n", + "ls -G whereami\n", + ].each do |line| + out, err = execute_lines(line) + assert_empty err + assert_match(/whereami/, out) + assert_not_match(/show_source/, out) + end + end + def test_ls_with_no_singleton_class out, err = execute_lines( "ls 42", From 4c554096bfc08939e9eb1fb1773514b62b8b95b5 Mon Sep 17 00:00:00 2001 From: Jun Aruga Date: Thu, 10 Nov 2022 13:24:27 +0100 Subject: [PATCH 091/104] wasm/README.md: Add a note about the Ruby built for wasm. [ci skip] The Ruby built for wasm cannot be execute without a WebAssembly runtime. ``` $ ruby-wasm32-wasi/usr/local/bin/ruby -e 'puts "a"' bash: ruby-wasm32-wasi/usr/local/bin/ruby: cannot execute binary file: Exec format error ``` Because the Ruby's file type is different from the one built normally, that is the `/usr/local/ruby-3.2.0-preview2/bin/ruby` below. ``` $ file ruby-wasm32-wasi/usr/local/bin/ruby ruby-wasm32-wasi/usr/local/bin/ruby: WebAssembly (wasm) binary module version 0x1 (MVP) $ file /usr/local/ruby-3.2.0-preview2/bin/ruby /usr/local/ruby-3.2.0-preview2/bin/ruby: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=a37822085e285c0971159982e7642dda88cea606, for GNU/Linux 3.2.0, with debug_info, not stripped ``` --- wasm/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/wasm/README.md b/wasm/README.md index 050030e5143908..0f9ca1a3d587b5 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -54,6 +54,16 @@ $ wasmtime ruby-wasm32-wasi/usr/local/bin/ruby --mapdir /::./ruby-wasm32-wasi/ - wasm32-wasi ``` +Note: you cannot run the built ruby without a WebAssembly runtime, because of the difference of the binary file type. + +``` +$ ruby-wasm32-wasi/usr/local/bin/ruby -e 'puts "a"' +bash: ruby-wasm32-wasi/usr/local/bin/ruby: cannot execute binary file: Exec format error + +$ file ruby-wasm32-wasi/usr/local/bin/ruby +ruby-wasm32-wasi/usr/local/bin/ruby: WebAssembly (wasm) binary module version 0x1 (MVP) +``` + ## Current Limitation - No `Thread` support for now. From adfbee85e07494d42e54b1c616e5fa62a207fb8f Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 11 Nov 2022 09:05:51 +0900 Subject: [PATCH 092/104] Allow a float error for Regexp.timeout The tests failed on windows https://github.com/ruby/ruby/actions/runs/3440997073/jobs/5740085169#step:18:62 ``` 1) Failure: TestRegexp#test_s_timeout [D:/a/ruby/ruby/src/test/ruby/test_regexp.rb:1586]: <0.30000000000000004> expected but was <0.3>. 2) Failure: TestRegexp#test_timeout_shorter_than_global [D:/a/ruby/ruby/src/test/ruby/test_regexp.rb:1631]: <0.30000000000000004> expected but was <0.3>. ``` --- test/ruby/test_regexp.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 87ce6987e39e30..3479a9f212049b 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1583,7 +1583,7 @@ def test_s_timeout timeout = #{ EnvUtil.apply_timeout_scale(0.2).inspect } begin; Regexp.timeout = timeout - assert_equal(timeout, Regexp.timeout) + assert_in_delta(timeout, Regexp.timeout, timeout * 2 * Float::EPSILON) t = Time.now assert_raise_with_message(Regexp::TimeoutError, "regexp match timeout") do @@ -1628,7 +1628,11 @@ def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeo Regexp.timeout = global_timeout re = Regexp.new("^(a*)\\1b?a*$", timeout: per_instance_timeout) - assert_equal(per_instance_timeout, re.timeout) + if per_instance_timeout + assert_in_delta(per_instance_timeout, re.timeout, per_instance_timeout * 2 * Float::EPSILON) + else + assert_nil(re.timeout) + end t = Time.now assert_raise_with_message(Regexp::TimeoutError, "regexp match timeout") do From ceeefb5870c144ddc069b2c9b8a19dbd4947a947 Mon Sep 17 00:00:00 2001 From: Jason Karns Date: Fri, 28 Oct 2022 15:30:21 -0400 Subject: [PATCH 093/104] [rubygems/rubygems] github source should default to secure protocol Bundler 2 switched to secure https here https://github.com/rubygems/rubygems/commit/c2e81f8ff63613871cc8b52653c5e176f8dafde3 Insecure protocols should be avoided to prevent MITM attacks. https://github.com/rubygems/rubygems/commit/758413364a --- lib/rubygems/request_set/gem_dependency_api.rb | 2 +- test/rubygems/test_gem_request_set_gem_dependency_api.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/rubygems/request_set/gem_dependency_api.rb b/lib/rubygems/request_set/gem_dependency_api.rb index 693cd2793a986a..ad6e45005bc44a 100644 --- a/lib/rubygems/request_set/gem_dependency_api.rb +++ b/lib/rubygems/request_set/gem_dependency_api.rb @@ -214,7 +214,7 @@ def initialize(set, path) git_source :github do |repo_name| repo_name = "#{repo_name}/#{repo_name}" unless repo_name.include? "/" - "git://github.com/#{repo_name}.git" + "https://github.com/#{repo_name}.git" end git_source :bitbucket do |repo_name| diff --git a/test/rubygems/test_gem_request_set_gem_dependency_api.rb b/test/rubygems/test_gem_request_set_gem_dependency_api.rb index d1411ddc5643c8..5fd2bbb9c2f399 100644 --- a/test/rubygems/test_gem_request_set_gem_dependency_api.rb +++ b/test/rubygems/test_gem_request_set_gem_dependency_api.rb @@ -183,7 +183,7 @@ def test_gem_github assert_equal [dep("a")], @set.dependencies - assert_equal %w[git://github.com/example/repository.git master], + assert_equal %w[https://github.com/example/repository.git master], @git_set.repositories["a"] expected = { "a" => Gem::Requirement.create("!") } @@ -196,7 +196,7 @@ def test_gem_github_expand_path assert_equal [dep("a")], @set.dependencies - assert_equal %w[git://github.com/example/example.git master], + assert_equal %w[https://github.com/example/example.git master], @git_set.repositories["a"] expected = { "a" => Gem::Requirement.create("!") } From c7d043065c058f20ce30c61bb3ce127cb15cc0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20=C5=A0im=C3=A1nek?= Date: Sun, 11 Sep 2022 05:40:55 +0200 Subject: [PATCH 094/104] [rubygems/rubygems] Add 'call for update' to RubyGems install command. https://github.com/rubygems/rubygems/commit/05811f8248 --- lib/rubygems/commands/install_command.rb | 4 + lib/rubygems/config_file.rb | 12 ++ lib/rubygems/update_suggestion.rb | 70 +++++++++ test/rubygems/helper.rb | 1 + .../test_gem_commands_install_command.rb | 19 +++ test/rubygems/test_gem_update_suggestion.rb | 137 ++++++++++++++++++ 6 files changed, 243 insertions(+) create mode 100644 lib/rubygems/update_suggestion.rb create mode 100644 test/rubygems/test_gem_update_suggestion.rb diff --git a/lib/rubygems/commands/install_command.rb b/lib/rubygems/commands/install_command.rb index 071687c63fe46a..c04c01f2585018 100644 --- a/lib/rubygems/commands/install_command.rb +++ b/lib/rubygems/commands/install_command.rb @@ -5,6 +5,7 @@ require_relative "../local_remote_options" require_relative "../validator" require_relative "../version_option" +require_relative "../update_suggestion" ## # Gem installer command line tool @@ -17,6 +18,7 @@ class Gem::Commands::InstallCommand < Gem::Command include Gem::VersionOption include Gem::LocalRemoteOptions include Gem::InstallUpdateOptions + include Gem::UpdateSuggestion def initialize defaults = Gem::DependencyInstaller::DEFAULT_OPTIONS.merge({ @@ -168,6 +170,8 @@ def execute show_installed + say update_suggestion if eglible_for_update? + terminate_interaction exit_code end diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index c53e209ae8ace7..b18f4115ccf3bb 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -371,6 +371,18 @@ def backtrace @backtrace || $DEBUG end + # Check config file is writable. Creates empty file if not present to ensure we can write to it. + def config_file_writable? + if File.exist?(config_file_name) + File.writable?(config_file_name) + else + require "fileutils" + FileUtils.mkdir_p File.dirname(config_file_name) + File.open(config_file_name, "w") {} + true + end + end + # The name of the configuration file. def config_file_name @config_file_name || Gem.config_file diff --git a/lib/rubygems/update_suggestion.rb b/lib/rubygems/update_suggestion.rb new file mode 100644 index 00000000000000..71c44af3afeb43 --- /dev/null +++ b/lib/rubygems/update_suggestion.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +## +# Mixin methods for Gem::Command to promote available RubyGems update + +module Gem::UpdateSuggestion + # list taken from https://github.com/watson/ci-info/blob/7a3c30d/index.js#L56-L66 + CI_ENV_VARS = [ + "CI", # Travis CI, CircleCI, Cirrus CI, Gitlab CI, Appveyor, CodeShip, dsari + "CONTINUOUS_INTEGRATION", # Travis CI, Cirrus CI + "BUILD_NUMBER", # Jenkins, TeamCity + "CI_APP_ID", "CI_BUILD_ID", "CI_BUILD_NUMBER", # Applfow + "RUN_ID" # TaskCluster, dsari + ].freeze + + ONE_WEEK = 7 * 24 * 60 * 60 + + ## + # Message to promote available RubyGems update with related gem update command. + + def update_suggestion + <<-MESSAGE + +A new release of RubyGems is available: #{Gem.rubygems_version} → #{Gem.latest_rubygems_version}! +Run `gem update --system #{Gem.latest_rubygems_version}` to update your installation. + + MESSAGE + end + + ## + # Determines if current environment is eglible for update suggestion. + + def eglible_for_update? + # explicit opt-out + return false if Gem.configuration[:prevent_update_suggestion] + return false if ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] + + # focus only on human usage of final RubyGems releases + return false unless Gem.ui.tty? + return false if Gem.rubygems_version.prerelease? + return false if Gem.disable_system_update_message + return false if ci? + + # check makes sense only when we can store of last try + # otherwise we will not be able to prevent annoying update message + # on each command call + return unless Gem.configuration.config_file_writable? + + # load time of last check, ensure the difference is enough to repeat the suggestion + check_time = Time.now.to_i + last_update_check = Gem.configuration[:last_update_check] || 0 + return false if (check_time - last_update_check) < ONE_WEEK + + # compare current and latest version, this is the part where + # latest rubygems spec is fetched from remote + (Gem.rubygems_version < Gem.latest_rubygems_version).tap do |eglible| + if eglible + # store the time of last successful check into config file + Gem.configuration[:last_update_check] = check_time + Gem.configuration.write + end + end + rescue # don't block install command on any problem + false + end + + def ci? + CI_ENV_VARS.any? {|var| ENV.include?(var) } + end +end diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index ae89d669fe8939..43423dc101e005 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -309,6 +309,7 @@ def setup ENV["XDG_DATA_HOME"] = nil ENV["SOURCE_DATE_EPOCH"] = nil ENV["BUNDLER_VERSION"] = nil + ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = "true" @current_dir = Dir.pwd @fetcher = nil diff --git a/test/rubygems/test_gem_commands_install_command.rb b/test/rubygems/test_gem_commands_install_command.rb index 7a58bcd7cb916d..14bddec485eb77 100644 --- a/test/rubygems/test_gem_commands_install_command.rb +++ b/test/rubygems/test_gem_commands_install_command.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true require_relative "helper" +require_relative "test_gem_update_suggestion" require "rubygems/commands/install_command" require "rubygems/request_set" require "rubygems/rdoc" @@ -1550,4 +1551,22 @@ def test_explain_platform_ruby_ignore_dependencies assert_equal " a-3", out.shift assert_empty out end + + def test_suggest_update_if_enabled + TestUpdateSuggestion.with_eglible_environment(cmd: @cmd) do + spec_fetcher do |fetcher| + fetcher.gem "a", 2 + end + + @cmd.options[:args] = %w[a] + + use_ui @ui do + assert_raise Gem::MockGemUi::SystemExitException, @ui.error do + @cmd.execute + end + end + + assert_includes @ui.output, "A new release of RubyGems is available: 1.2.3 → 2.0.0!" + end + end end diff --git a/test/rubygems/test_gem_update_suggestion.rb b/test/rubygems/test_gem_update_suggestion.rb new file mode 100644 index 00000000000000..aefebf41cffbc0 --- /dev/null +++ b/test/rubygems/test_gem_update_suggestion.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true +require_relative "helper" +require "rubygems/command" +require "rubygems/update_suggestion" + +class TestUpdateSuggestion < Gem::TestCase + def setup + super + + @cmd = Gem::Command.new "dummy", "dummy" + @cmd.extend Gem::UpdateSuggestion + end + + def with_eglible_environment(**params) + self.class.with_eglible_environment(**params) do + yield + end + end + + def self.with_eglible_environment( + tty: true, + rubygems_version: Gem::Version.new("1.2.3"), + latest_rubygems_version: Gem::Version.new("2.0.0"), + ci: false, + cmd: + ) + original_config, Gem.configuration[:prevent_update_suggestion] = Gem.configuration[:prevent_update_suggestion], nil + original_env, ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"], nil + original_disable, Gem.disable_system_update_message = Gem.disable_system_update_message, nil + Gem.configuration[:last_update_check] = nil + + Gem.ui.stub :tty?, tty do + Gem.stub :rubygems_version, rubygems_version do + Gem.stub :latest_rubygems_version, latest_rubygems_version do + cmd.stub :ci?, ci do + yield + end + end + end + end + ensure + Gem.configuration[:prevent_update_suggestion] = original_config + ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = original_env + Gem.disable_system_update_message = original_disable + end + + def test_update_suggestion + Gem.stub :rubygems_version, Gem::Version.new("1.2.3") do + Gem.stub :latest_rubygems_version, Gem::Version.new("2.0.0") do + assert_equal @cmd.update_suggestion, <<~SUGGESTION + + A new release of RubyGems is available: 1.2.3 → 2.0.0! + Run `gem update --system 2.0.0` to update your installation. + + SUGGESTION + end + end + end + + def test_eglible_for_update + with_eglible_environment(cmd: @cmd) do + Time.stub :now, 123456789 do + assert @cmd.eglible_for_update? + assert_equal Gem.configuration[:last_update_check], 123456789 + + # test last check is written to config file + assert File.read(Gem.configuration.config_file_name).match("last_update_check: 123456789") + end + end + end + + def test_eglible_for_update_prevent_config + with_eglible_environment(cmd: @cmd) do + begin + original_config, Gem.configuration[:prevent_update_suggestion] = Gem.configuration[:prevent_update_suggestion], true + refute @cmd.eglible_for_update? + ensure + Gem.configuration[:prevent_update_suggestion] = original_config + end + end + end + + def test_eglible_for_update_prevent_env + with_eglible_environment(cmd: @cmd) do + begin + original_env, ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"], "yes" + refute @cmd.eglible_for_update? + ensure + ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = original_env + end + end + end + + def test_eglible_for_update_non_tty + with_eglible_environment(tty: false, cmd: @cmd) do + refute @cmd.eglible_for_update? + end + end + + def test_eglible_for_update_for_prerelease + with_eglible_environment(rubygems_version: Gem::Version.new("1.0.0-rc1"), cmd: @cmd) do + refute @cmd.eglible_for_update? + end + end + + def test_eglible_for_update_disabled_update + with_eglible_environment(cmd: @cmd) do + begin + original_disable, Gem.disable_system_update_message = Gem.disable_system_update_message, "disabled" + refute @cmd.eglible_for_update? + ensure + Gem.disable_system_update_message = original_disable + end + end + end + + def test_eglible_for_update_on_ci + with_eglible_environment(ci: true, cmd: @cmd) do + refute @cmd.eglible_for_update? + end + end + + def test_eglible_for_update_unwrittable_config + with_eglible_environment(ci: true, cmd: @cmd) do + Gem.configuration.stub :config_file_writable?, false do + refute @cmd.eglible_for_update? + end + end + end + + def test_eglible_for_update_notification_delay + with_eglible_environment(cmd: @cmd) do + Gem.configuration[:last_update_check] = Time.now.to_i + refute @cmd.eglible_for_update? + end + end +end From 7ce0f81fbbd9c2b35e9fe35f5ef040626e284a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20=C5=A0im=C3=A1nek?= Date: Sat, 29 Oct 2022 02:47:14 +0200 Subject: [PATCH 095/104] [rubygems/rubygems] Use file in XDG_STATE_HOME directory to store last update check timestamp. https://github.com/rubygems/rubygems/commit/0fbc4ace8a --- lib/rubygems/config_file.rb | 33 +++++++++++++++++---- lib/rubygems/defaults.rb | 14 +++++++++ lib/rubygems/update_suggestion.rb | 18 +++++------ test/rubygems/helper.rb | 3 ++ test/rubygems/test_gem_update_suggestion.rb | 10 +++---- 5 files changed, 57 insertions(+), 21 deletions(-) diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index b18f4115ccf3bb..f2abc7f2af575f 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -371,16 +371,18 @@ def backtrace @backtrace || $DEBUG end - # Check config file is writable. Creates empty file if not present to ensure we can write to it. - def config_file_writable? - if File.exist?(config_file_name) - File.writable?(config_file_name) + # Check state file is writable. Creates empty file if not present to ensure we can write to it. + def state_file_writable? + if File.exist?(state_file_name) + File.writable?(state_file_name) else require "fileutils" - FileUtils.mkdir_p File.dirname(config_file_name) - File.open(config_file_name, "w") {} + FileUtils.mkdir_p File.dirname(state_file_name) + File.open(state_file_name, "w") {} true end + rescue Errno::EACCES + false end # The name of the configuration file. @@ -388,6 +390,25 @@ def config_file_name @config_file_name || Gem.config_file end + # The name of the state file. + def state_file_name + @state_file_name || Gem.state_file + end + + # Reads time of last update check from state file + def last_update_check + if File.readable?(state_file_name) + File.read(state_file_name).to_i + else + 0 + end + end + + # Writes time of last update check to state file + def last_update_check=(timestamp) + File.write(state_file_name, timestamp.to_s) if state_file_writable? + end + # Delegates to @hash def each(&block) hash = @hash.dup diff --git a/lib/rubygems/defaults.rb b/lib/rubygems/defaults.rb index e12c13cb46d4f6..8daff0bc30a723 100644 --- a/lib/rubygems/defaults.rb +++ b/lib/rubygems/defaults.rb @@ -133,6 +133,13 @@ def self.config_file @config_file ||= find_config_file.tap(&Gem::UNTAINT) end + ## + # The path to standard location of the user's state file. + + def self.state_file + @state_file ||= File.join(Gem.state_home, "gem", "last_update_check").tap(&Gem::UNTAINT) + end + ## # The path to standard location of the user's cache directory. @@ -147,6 +154,13 @@ def self.data_home @data_home ||= (ENV["XDG_DATA_HOME"] || File.join(Gem.user_home, ".local", "share")) end + ## + # The path to standard location of the user's state directory. + + def self.state_home + @data_home ||= (ENV["XDG_STATE_HOME"] || File.join(Gem.user_home, ".local", "state")) + end + ## # How String Gem paths should be split. Overridable for esoteric platforms. diff --git a/lib/rubygems/update_suggestion.rb b/lib/rubygems/update_suggestion.rb index 71c44af3afeb43..d9ac517e49d6d0 100644 --- a/lib/rubygems/update_suggestion.rb +++ b/lib/rubygems/update_suggestion.rb @@ -41,24 +41,22 @@ def eglible_for_update? return false if Gem.disable_system_update_message return false if ci? - # check makes sense only when we can store of last try - # otherwise we will not be able to prevent annoying update message + # check makes sense only when we can store timestamp of last try + # otherwise we will not be able to prevent "annoying" update message # on each command call - return unless Gem.configuration.config_file_writable? + return unless Gem.configuration.state_file_writable? # load time of last check, ensure the difference is enough to repeat the suggestion check_time = Time.now.to_i - last_update_check = Gem.configuration[:last_update_check] || 0 + last_update_check = Gem.configuration.last_update_check return false if (check_time - last_update_check) < ONE_WEEK # compare current and latest version, this is the part where # latest rubygems spec is fetched from remote - (Gem.rubygems_version < Gem.latest_rubygems_version).tap do |eglible| - if eglible - # store the time of last successful check into config file - Gem.configuration[:last_update_check] = check_time - Gem.configuration.write - end + if (Gem.rubygems_version < Gem.latest_rubygems_version) + # store the time of last successful check into state file + Gem.configuration.last_update_check = check_time + return true end rescue # don't block install command on any problem false diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 43423dc101e005..8d94f82d8e0454 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -307,6 +307,7 @@ def setup ENV["XDG_CACHE_HOME"] = nil ENV["XDG_CONFIG_HOME"] = nil ENV["XDG_DATA_HOME"] = nil + ENV["XDG_STATE_HOME"] = nil ENV["SOURCE_DATE_EPOCH"] = nil ENV["BUNDLER_VERSION"] = nil ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = "true" @@ -327,6 +328,7 @@ def setup @gemhome = File.join @tempdir, "gemhome" @userhome = File.join @tempdir, "userhome" + @statehome = File.join @tempdir, "statehome" ENV["GEM_SPEC_CACHE"] = File.join @tempdir, "spec_cache" @orig_ruby = if ENV["RUBY"] @@ -361,6 +363,7 @@ def setup Gem.instance_variable_set :@user_home, nil Gem.instance_variable_set :@config_home, nil Gem.instance_variable_set :@data_home, nil + Gem.instance_variable_set :@state_home, @statehome Gem.instance_variable_set :@gemdeps, nil Gem.instance_variable_set :@env_requirements_by_name, nil Gem.send :remove_instance_variable, :@ruby_version if diff --git a/test/rubygems/test_gem_update_suggestion.rb b/test/rubygems/test_gem_update_suggestion.rb index aefebf41cffbc0..ba54059ee7e816 100644 --- a/test/rubygems/test_gem_update_suggestion.rb +++ b/test/rubygems/test_gem_update_suggestion.rb @@ -27,7 +27,7 @@ def self.with_eglible_environment( original_config, Gem.configuration[:prevent_update_suggestion] = Gem.configuration[:prevent_update_suggestion], nil original_env, ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"], nil original_disable, Gem.disable_system_update_message = Gem.disable_system_update_message, nil - Gem.configuration[:last_update_check] = nil + Gem.configuration.last_update_check = 0 Gem.ui.stub :tty?, tty do Gem.stub :rubygems_version, rubygems_version do @@ -61,10 +61,10 @@ def test_eglible_for_update with_eglible_environment(cmd: @cmd) do Time.stub :now, 123456789 do assert @cmd.eglible_for_update? - assert_equal Gem.configuration[:last_update_check], 123456789 + assert_equal Gem.configuration.last_update_check, 123456789 # test last check is written to config file - assert File.read(Gem.configuration.config_file_name).match("last_update_check: 123456789") + assert File.read(Gem.configuration.state_file_name).match("123456789") end end end @@ -122,7 +122,7 @@ def test_eglible_for_update_on_ci def test_eglible_for_update_unwrittable_config with_eglible_environment(ci: true, cmd: @cmd) do - Gem.configuration.stub :config_file_writable?, false do + Gem.configuration.stub :state_file_writable?, false do refute @cmd.eglible_for_update? end end @@ -130,7 +130,7 @@ def test_eglible_for_update_unwrittable_config def test_eglible_for_update_notification_delay with_eglible_environment(cmd: @cmd) do - Gem.configuration[:last_update_check] = Time.now.to_i + Gem.configuration.last_update_check = Time.now.to_i refute @cmd.eglible_for_update? end end From de159c5a855dd53bfd9ce284c9099306724560a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20=C5=A0im=C3=A1nek?= Date: Fri, 4 Nov 2022 22:56:31 +0100 Subject: [PATCH 096/104] [rubygems/rubygems] Store last check even when upgrade is not available and fix test. https://github.com/rubygems/rubygems/commit/bcffc2b0a5 --- lib/rubygems/config_file.rb | 2 +- lib/rubygems/update_suggestion.rb | 5 +- test/rubygems/test_gem_update_suggestion.rb | 75 ++++++++++++++++++++- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index f2abc7f2af575f..4aa8b4d33ac50a 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -392,7 +392,7 @@ def config_file_name # The name of the state file. def state_file_name - @state_file_name || Gem.state_file + Gem.state_file end # Reads time of last update check from state file diff --git a/lib/rubygems/update_suggestion.rb b/lib/rubygems/update_suggestion.rb index d9ac517e49d6d0..c2e81b2374389e 100644 --- a/lib/rubygems/update_suggestion.rb +++ b/lib/rubygems/update_suggestion.rb @@ -53,10 +53,11 @@ def eglible_for_update? # compare current and latest version, this is the part where # latest rubygems spec is fetched from remote - if (Gem.rubygems_version < Gem.latest_rubygems_version) + (Gem.rubygems_version < Gem.latest_rubygems_version).tap do |eglible| # store the time of last successful check into state file Gem.configuration.last_update_check = check_time - return true + + return eglible end rescue # don't block install command on any problem false diff --git a/test/rubygems/test_gem_update_suggestion.rb b/test/rubygems/test_gem_update_suggestion.rb index ba54059ee7e816..520a69ac69015f 100644 --- a/test/rubygems/test_gem_update_suggestion.rb +++ b/test/rubygems/test_gem_update_suggestion.rb @@ -9,6 +9,9 @@ def setup @cmd = Gem::Command.new "dummy", "dummy" @cmd.extend Gem::UpdateSuggestion + @start_time = 1_000_000 + @minute = 60 * 60 + @week = 7 * 24 * @minute end def with_eglible_environment(**params) @@ -22,12 +25,13 @@ def self.with_eglible_environment( rubygems_version: Gem::Version.new("1.2.3"), latest_rubygems_version: Gem::Version.new("2.0.0"), ci: false, + reset_last_update_check: true, cmd: ) original_config, Gem.configuration[:prevent_update_suggestion] = Gem.configuration[:prevent_update_suggestion], nil original_env, ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"] = ENV["RUBYGEMS_PREVENT_UPDATE_SUGGESTION"], nil original_disable, Gem.disable_system_update_message = Gem.disable_system_update_message, nil - Gem.configuration.last_update_check = 0 + Gem.configuration.last_update_check = 0 if reset_last_update_check Gem.ui.stub :tty?, tty do Gem.stub :rubygems_version, rubygems_version do @@ -69,6 +73,73 @@ def test_eglible_for_update end end + def test_eglible_for_update_is_not_annoying_when_new_version_is_released + current_version = Gem::Version.new("1.2.0") + latest_version = current_version + + # checking for first time, it is not eglible since new version + # is not released yet and stored + with_eglible_environment(cmd: @cmd, rubygems_version: current_version, latest_rubygems_version: latest_version) do + Time.stub :now, @start_time do + refute @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + end + end + + # checking next week, it is not eglible since new version + # is not released yet and timestamp is stored + with_eglible_environment( + cmd: @cmd, + rubygems_version: current_version, + latest_rubygems_version: latest_version, + reset_last_update_check: false + ) do + Time.stub :now, @start_time + @week do + refute @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + @week + end + end + + # pretend new version is released + latest_version = Gem::Version.new("1.3.0") + + # checking later same next week, it is not eglible even new version + # is released and timestamp is not stored + with_eglible_environment( + cmd: @cmd, + rubygems_version: current_version, + latest_rubygems_version: latest_version, + reset_last_update_check: false + ) do + Time.stub :now, @start_time + @week + @minute do + refute @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + @week + end + end + end + + def test_eglible_for_update_is_not_annoying_when_not_upgraded + with_eglible_environment(cmd: @cmd) do + # checking for first time, it is eglible and stored + Time.stub :now, @start_time do + assert @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + end + + # checking minute later is not eglible and not stored + Time.stub :now, @start_time + @minute do + refute @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + end + + # checking week later is eglible again and stored + Time.stub :now, @start_time + @week do + assert @cmd.eglible_for_update? + assert_equal Gem.configuration.last_update_check, @start_time + @week + end + end + end + def test_eglible_for_update_prevent_config with_eglible_environment(cmd: @cmd) do begin @@ -121,7 +192,7 @@ def test_eglible_for_update_on_ci end def test_eglible_for_update_unwrittable_config - with_eglible_environment(ci: true, cmd: @cmd) do + with_eglible_environment(cmd: @cmd) do Gem.configuration.stub :state_file_writable?, false do refute @cmd.eglible_for_update? end From 28611be6ee84ba8eb19e667a70ae129833b98b8b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 11 Nov 2022 15:05:59 +0900 Subject: [PATCH 097/104] Merge RubyGems/Bundler master from ee2f8398324af4bc1b95f7565ce2fda98126e026 --- lib/bundler/definition.rb | 2 +- lib/bundler/lazy_specification.rb | 4 +-- lib/rubygems/ext/cargo_builder.rb | 4 +++ spec/bundler/commands/update_spec.rb | 6 ++++ .../install/gemfile/specific_platform_spec.rb | 32 +++++++++++++++++++ .../compact_index_precompiled_before.rb | 25 +++++++++++++++ .../custom_name/Cargo.lock | 8 ++--- .../custom_name/Cargo.toml | 2 +- .../rust_ruby_example/Cargo.lock | 8 ++--- .../rust_ruby_example/Cargo.toml | 2 +- 10 files changed, 79 insertions(+), 14 deletions(-) create mode 100644 spec/bundler/support/artifice/compact_index_precompiled_before.rb diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 95be7a7e273930..3836841f3fabbf 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -357,7 +357,7 @@ def ensure_equivalent_gemfile_and_lockfile(explicit_flag = false) "bundle config unset deployment" end msg << "\n\nIf this is a development machine, remove the #{Bundler.default_gemfile} " \ - "freeze \nby running `#{suggested_command}`." + "freeze \nby running `#{suggested_command}`." if suggested_command end added = [] diff --git a/lib/bundler/lazy_specification.rb b/lib/bundler/lazy_specification.rb index f5fe2e64aec527..949e8264baf831 100644 --- a/lib/bundler/lazy_specification.rb +++ b/lib/bundler/lazy_specification.rb @@ -79,9 +79,7 @@ def materialize_for_installation candidates = if source.is_a?(Source::Path) || !ruby_platform_materializes_to_ruby_platform? target_platform = ruby_platform_materializes_to_ruby_platform? ? platform : local_platform - source.specs.search(Dependency.new(name, version)).select do |spec| - MatchPlatform.platforms_match?(spec.platform, target_platform) - end + GemHelpers.select_best_platform_match(source.specs.search(Dependency.new(name, version)), target_platform) else source.specs.search(self) end diff --git a/lib/rubygems/ext/cargo_builder.rb b/lib/rubygems/ext/cargo_builder.rb index e33b07a8a23356..24c1d3ae6e0667 100644 --- a/lib/rubygems/ext/cargo_builder.rb +++ b/lib/rubygems/ext/cargo_builder.rb @@ -37,6 +37,7 @@ def build_crate(dest_path, results, args, cargo_dir) def build_env build_env = rb_config_env build_env["RUBY_STATIC"] = "true" if ruby_static? && ENV.key?("RUBY_STATIC") + build_env["RUSTFLAGS"] = "#{ENV["RUSTFLAGS"]} --cfg=rb_sys_gem".strip build_env end @@ -92,6 +93,9 @@ def platform_specific_rustc_args(dest_dir, flags = []) # run on one that isn't the missing libraries will cause the extension # to fail on start. flags += ["-C", "link-arg=-static-libgcc"] + elsif darwin_target? + # Ventura does not always have this flag enabled + flags += ["-C", "link-arg=-Wl,-undefined,dynamic_lookup"] end flags diff --git a/spec/bundler/commands/update_spec.rb b/spec/bundler/commands/update_spec.rb index 11ff49bf89a3da..1ad5f76466e886 100644 --- a/spec/bundler/commands/update_spec.rb +++ b/spec/bundler/commands/update_spec.rb @@ -613,6 +613,12 @@ expect(err).to match(/You are trying to install in deployment mode after changing.your Gemfile/m). and match(/freeze \nby running `bundle config unset deployment`./m) end + + it "should not suggest any command to unfreeze bundler if frozen is set through ENV" do + bundle "update", :all => true, :raise_on_error => false, :env => { "BUNDLE_FROZEN" => "true" } + expect(err).to match(/You are trying to install in deployment mode after changing.your Gemfile/m) + expect(err).not_to match(/by running/) + end end describe "with --source option" do diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index 699672f357e087..98efec396cce58 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -148,6 +148,38 @@ expect(out).to include("Using libv8 8.4.255.0 (universal-darwin)") end + it "chooses platform specific gems even when resolving upon materialization and the API returns more specific plaforms first" do + build_repo4 do + build_gem("grpc", "1.50.0") + build_gem("grpc", "1.50.0") {|s| s.platform = "universal-darwin" } + end + + gemfile <<-G + source "https://localgemserver.test" + gem "grpc" + G + + # simulate lockfile created with old bundler, which only locks for ruby platform + lockfile <<-L + GEM + remote: https://localgemserver.test/ + specs: + grpc (1.50.0) + + PLATFORMS + ruby + + DEPENDENCIES + grpc + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "install --verbose", :artifice => "compact_index_precompiled_before", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + expect(out).to include("Installing grpc 1.50.0 (universal-darwin)") + end + it "caches the universal-darwin gem when --all-platforms is passed and properly picks it up on further bundler invocations" do setup_multiplatform_gem gemfile(google_protobuf) diff --git a/spec/bundler/support/artifice/compact_index_precompiled_before.rb b/spec/bundler/support/artifice/compact_index_precompiled_before.rb new file mode 100644 index 00000000000000..9f310e653b2293 --- /dev/null +++ b/spec/bundler/support/artifice/compact_index_precompiled_before.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative "compact_index" + +Artifice.deactivate + +class CompactIndexPrecompiledBefore < CompactIndexAPI + get "/info/:name" do + etag_response do + gem = gems.find {|g| g.name == params[:name] } + move_ruby_variant_to_the_end(CompactIndex.info(gem ? gem.versions : [])) + end + end + + private + + def move_ruby_variant_to_the_end(response) + lines = response.split("\n") + ruby = lines.find {|line| /\A\d+\.\d+\.\d* \|/.match(line) } + lines.delete(ruby) + lines.push(ruby).join("\n") + end +end + +Artifice.activate_with(CompactIndexPrecompiledBefore) diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock index aa975b1cd02d05..08c97618fd1e0b 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock @@ -160,18 +160,18 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.35" +version = "0.9.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2bde30824a18f2e68cd1c8004cec16656764c6efc385bc1c7fb4c904b276a5" +checksum = "5ba942b6777ea18ded013b267023a9c98994557e6539e43740de9e75084cb124" dependencies = [ "rb-sys-build", ] [[package]] name = "rb-sys-build" -version = "0.9.35" +version = "0.9.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff5d3ba92624df9c66bf0d1f0251d96284f08ac9773b7723d370e3f225c1d38" +checksum = "d35109e1a11ef8d1a988db242ab2ba2e80170f9f5a28f88ab30184a2cea8e09b" dependencies = [ "bindgen", "linkify", diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml index 6673f784645b9a..8175b5ae2e86ff 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.35", features = ["gem"] } +rb-sys = "0.9.37" diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock index e7e91de57674cc..eb71f255700498 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock @@ -153,18 +153,18 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.35" +version = "0.9.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2bde30824a18f2e68cd1c8004cec16656764c6efc385bc1c7fb4c904b276a5" +checksum = "5ba942b6777ea18ded013b267023a9c98994557e6539e43740de9e75084cb124" dependencies = [ "rb-sys-build", ] [[package]] name = "rb-sys-build" -version = "0.9.35" +version = "0.9.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff5d3ba92624df9c66bf0d1f0251d96284f08ac9773b7723d370e3f225c1d38" +checksum = "d35109e1a11ef8d1a988db242ab2ba2e80170f9f5a28f88ab30184a2cea8e09b" dependencies = [ "bindgen", "linkify", diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml index 814afe10f72f2d..766346757d204b 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.35", features = ["gem"] } +rb-sys = "0.9.37" From a0fcbce415fc045ff16eb74946a40774c0d39ac5 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 11 Nov 2022 16:06:36 +0900 Subject: [PATCH 098/104] Try to use ossf/scorecards --- .github/workflows/scorecards.yml | 72 ++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 .github/workflows/scorecards.yml diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml new file mode 100644 index 00000000000000..ad6dde5dc5b8b3 --- /dev/null +++ b/.github/workflows/scorecards.yml @@ -0,0 +1,72 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecards supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '22 4 * * 2' + push: + branches: [ "master" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecards analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # v2.0.6 + with: + results_file: results.sarif + results_format: sarif + # (Optional) Read-only PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecards on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_READ_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@807578363a7869ca324a79039e6db9c843e0e100 # v2.1.27 + with: + sarif_file: results.sarif From 800caf4ee1eca21252da0ec148ed7840df89bc9b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 11 Nov 2022 16:54:24 +0900 Subject: [PATCH 099/104] Use secrets.SCORECARD_READ_TOKEN --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index ad6dde5dc5b8b3..751180d1d20536 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -45,7 +45,7 @@ jobs: # - you want to enable the Branch-Protection check on a *public* repository, or # - you are installing Scorecards on a *private* repository # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. - # repo_token: ${{ secrets.SCORECARD_READ_TOKEN }} + repo_token: ${{ secrets.SCORECARD_READ_TOKEN }} # Public repositories: # - Publish results to OpenSSF REST API for easy access by consumers From 5a1ddc486bad30d795ce94021ee03c085e7a88c1 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 11 Nov 2022 16:54:51 +0900 Subject: [PATCH 100/104] Test with topic-branch --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 751180d1d20536..0fc51281ef4e1c 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -12,7 +12,7 @@ on: schedule: - cron: '22 4 * * 2' push: - branches: [ "master" ] + # branches: [ "master" ] # Declare default permissions as read only. permissions: read-all From 90bbc891b192c30432c517ccb279ed687bb2d0b4 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 11 Nov 2022 17:01:43 +0900 Subject: [PATCH 101/104] Enable only master-branch --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 0fc51281ef4e1c..751180d1d20536 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -12,7 +12,7 @@ on: schedule: - cron: '22 4 * * 2' push: - # branches: [ "master" ] + branches: [ "master" ] # Declare default permissions as read only. permissions: read-all From 04c5adf80697a310f12f473b5ef772d234576f2b Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 10 Nov 2022 17:24:10 -0500 Subject: [PATCH 102/104] YJIT: Fix staying in invalidated code after proc calls Previously, there is no instruction boundary patch point after the call to a non-leaf C function we generate for OPTIMIZED_METHOD_TYPE_CALL. This meant that if code GC is triggered while inside the C function, we would keep running invalidated code when we return from the C function. This had the effect of running stale branch stubs, jumping to bad code, etc. Use jit_prepare_routine_call() to make sure we exit from the invalidated region as soon as possible after the C call in case of invalidation. --- yjit/src/codegen.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index b1b854ad7f79f4..421e14c553549f 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5555,11 +5555,8 @@ fn gen_send_general( let sp = asm.lea(ctx.sp_opnd(0)); - // Write interpreter SP into CFP. - // Needed in case the callee yields to the block. - jit_save_pc(jit, asm); - // Store incremented PC into current control frame in case callee raises. - gen_save_sp(jit, asm, ctx); + // Save the PC and SP because the callee can make Ruby calls + jit_prepare_routine_call(jit, ctx, asm); let kw_splat = flags & VM_CALL_KW_SPLAT; let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)); From 4d9ada223bf7d57a62e94b0a2082cc92ba5b7813 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Wed, 9 Nov 2022 22:08:51 +0000 Subject: [PATCH 103/104] [ruby/net-http] Prettify class hierarchies in Net::HTTP https://github.com/ruby/net-http/commit/4a5732e210 --- lib/net/http.rb | 196 +++++++++++++++++++++++++----------------------- 1 file changed, 104 insertions(+), 92 deletions(-) diff --git a/lib/net/http.rb b/lib/net/http.rb index b602d2d0b0f1ca..80dfc95df420b4 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -298,98 +298,110 @@ class HTTPHeaderSyntaxError < StandardError; end # # Compression can be disabled through the Accept-Encoding: identity header. # - # == HTTP Request Classes - # - # Here is the HTTP request class hierarchy. - # - # * Net::HTTPRequest - # * Net::HTTP::Get - # * Net::HTTP::Head - # * Net::HTTP::Post - # * Net::HTTP::Patch - # * Net::HTTP::Put - # * Net::HTTP::Proppatch - # * Net::HTTP::Lock - # * Net::HTTP::Unlock - # * Net::HTTP::Options - # * Net::HTTP::Propfind - # * Net::HTTP::Delete - # * Net::HTTP::Move - # * Net::HTTP::Copy - # * Net::HTTP::Mkcol - # * Net::HTTP::Trace - # - # == HTTP Response Classes - # - # Here is HTTP response class hierarchy. All classes are defined in Net - # module and are subclasses of Net::HTTPResponse. - # - # HTTPUnknownResponse:: For unhandled HTTP extensions - # HTTPInformation:: 1xx - # HTTPContinue:: 100 - # HTTPSwitchProtocol:: 101 - # HTTPProcessing:: 102 - # HTTPEarlyHints:: 103 - # HTTPSuccess:: 2xx - # HTTPOK:: 200 - # HTTPCreated:: 201 - # HTTPAccepted:: 202 - # HTTPNonAuthoritativeInformation:: 203 - # HTTPNoContent:: 204 - # HTTPResetContent:: 205 - # HTTPPartialContent:: 206 - # HTTPMultiStatus:: 207 - # HTTPAlreadyReported:: 208 - # HTTPIMUsed:: 226 - # HTTPRedirection:: 3xx - # HTTPMultipleChoices:: 300 - # HTTPMovedPermanently:: 301 - # HTTPFound:: 302 - # HTTPSeeOther:: 303 - # HTTPNotModified:: 304 - # HTTPUseProxy:: 305 - # HTTPTemporaryRedirect:: 307 - # HTTPPermanentRedirect:: 308 - # HTTPClientError:: 4xx - # HTTPBadRequest:: 400 - # HTTPUnauthorized:: 401 - # HTTPPaymentRequired:: 402 - # HTTPForbidden:: 403 - # HTTPNotFound:: 404 - # HTTPMethodNotAllowed:: 405 - # HTTPNotAcceptable:: 406 - # HTTPProxyAuthenticationRequired:: 407 - # HTTPRequestTimeOut:: 408 - # HTTPConflict:: 409 - # HTTPGone:: 410 - # HTTPLengthRequired:: 411 - # HTTPPreconditionFailed:: 412 - # HTTPRequestEntityTooLarge:: 413 - # HTTPRequestURITooLong:: 414 - # HTTPUnsupportedMediaType:: 415 - # HTTPRequestedRangeNotSatisfiable:: 416 - # HTTPExpectationFailed:: 417 - # HTTPMisdirectedRequest:: 421 - # HTTPUnprocessableEntity:: 422 - # HTTPLocked:: 423 - # HTTPFailedDependency:: 424 - # HTTPUpgradeRequired:: 426 - # HTTPPreconditionRequired:: 428 - # HTTPTooManyRequests:: 429 - # HTTPRequestHeaderFieldsTooLarge:: 431 - # HTTPUnavailableForLegalReasons:: 451 - # HTTPServerError:: 5xx - # HTTPInternalServerError:: 500 - # HTTPNotImplemented:: 501 - # HTTPBadGateway:: 502 - # HTTPServiceUnavailable:: 503 - # HTTPGatewayTimeOut:: 504 - # HTTPVersionNotSupported:: 505 - # HTTPVariantAlsoNegotiates:: 506 - # HTTPInsufficientStorage:: 507 - # HTTPLoopDetected:: 508 - # HTTPNotExtended:: 510 - # HTTPNetworkAuthenticationRequired:: 511 + # == \HTTP Request Classes + # + # Here is the hierarchy of \HTTP request classes + # + # - Net::HTTPRequest + # + # - Net::HTTP::Get + # - Net::HTTP::Head + # - Net::HTTP::Post + # - Net::HTTP::Patch + # - Net::HTTP::Put + # - Net::HTTP::Proppatch + # - Net::HTTP::Lock + # - Net::HTTP::Unlock + # - Net::HTTP::Options + # - Net::HTTP::Propfind + # - Net::HTTP::Delete + # - Net::HTTP::Move + # - Net::HTTP::Copy + # - Net::HTTP::Mkcol + # - Net::HTTP::Trace + # + # == \HTTP Response Classes + # + # Here is the \HTTP response class hierarchy (with status codes): + # + # - Net::HTTPResponse: + # + # - Net::HTTPUnknownResponse (for unhandled \HTTP extensions). + # + # - Net::HTTPInformation: + # + # - Net::HTTPContinue (100) + # - Net::HTTPSwitchProtocol (101) + # - Net::HTTPProcessing (102) + # - Net::HTTPEarlyHints (103) + # + # - Net::HTTPSuccess: + # + # - Net::HTTPOK (200) + # - Net::HTTPCreated (201) + # - Net::HTTPAccepted (202) + # - Net::HTTPNonAuthoritativeInformation (203) + # - Net::HTTPNoContent (204) + # - Net::HTTPResetContent (205) + # - Net::HTTPPartialContent (206) + # - Net::HTTPMultiStatus (207) + # - Net::HTTPAlreadyReported (208) + # - Net::HTTPIMUsed (226) + # + # - HTTPRedirection: + # + # - Net::HTTPMultipleChoices (300) + # - Net::HTTPMovedPermanently (301) + # - Net::HTTPFound (302) + # - Net::HTTPSeeOther (303) + # - Net::HTTPNotModified (304) + # - Net::HTTPUseProxy (305) + # - Net::HTTPTemporaryRedirect (307) + # - Net::HTTPPermanentRedirect (308) + # + # - Net::HTTPClientError: + # + # - Net::HTTPBadRequest (400) + # - Net::HTTPUnauthorized (401) + # - Net::HTTPPaymentRequired (402) + # - Net::HTTPForbidden (403) + # - Net::HTTPNotFound (404) + # - Net::HTTPMethodNotAllowed (405) + # - Net::HTTPNotAcceptable (406) + # - Net::HTTPProxyAuthenticationRequired (407) + # - Net::HTTPRequestTimeOut (408) + # - Net::HTTPConflict (409) + # - Net::HTTPGone (410) + # - Net::HTTPLengthRequired (411) + # - Net::HTTPPreconditionFailed (412) + # - Net::HTTPRequestEntityTooLarge (413) + # - Net::HTTPRequestURITooLong (414) + # - Net::HTTPUnsupportedMediaType (415) + # - Net::HTTPRequestedRangeNotSatisfiable (416) + # - Net::HTTPExpectationFailed (417) + # - Net::HTTPMisdirectedRequest (421) + # - Net::HTTPUnprocessableEntity (422) + # - Net::HTTPLocked (423) + # - Net::HTTPFailedDependency (424) + # - Net::HTTPUpgradeRequired (426) + # - Net::HTTPPreconditionRequired (428) + # - Net::HTTPTooManyRequests (429) + # - Net::HTTPRequestHeaderFieldsTooLarge (431) + # - Net::HTTPUnavailableForLegalReasons (451) + # + # - Net::HTTPServerError: + # + # - Net::HTTPInternalServerError (500) + # - Net::HTTPNotImplemented (501) + # - Net::HTTPBadGateway (502) + # - Net::HTTPServiceUnavailable (503) + # - Net::HTTPGatewayTimeOut (504) + # - Net::HTTPVersionNotSupported (505) + # - Net::HTTPVariantAlsoNegotiates (506) + # - Net::HTTPInsufficientStorage (507) + # - Net::HTTPLoopDetected (508) + # - Net::HTTPNotExtended (510) + # - Net::HTTPNetworkAuthenticationRequired (511) # # There is also the Net::HTTPBadResponse exception which is raised when # there is a protocol error. From a48e01ccba99eaec48bf63db326c0602fbd50610 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 10 Nov 2022 09:36:50 -0600 Subject: [PATCH 104/104] [ruby/net-http] Update lib/net/http.rb https://github.com/ruby/net-http/commit/16d042fad6 Co-authored-by: Peter Zhu --- lib/net/http.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/net/http.rb b/lib/net/http.rb index 80dfc95df420b4..16137cebf22d12 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -348,7 +348,7 @@ class HTTPHeaderSyntaxError < StandardError; end # - Net::HTTPAlreadyReported (208) # - Net::HTTPIMUsed (226) # - # - HTTPRedirection: + # - Net::HTTPRedirection: # # - Net::HTTPMultipleChoices (300) # - Net::HTTPMovedPermanently (301)