diff --git a/.appveyor.yml b/.appveyor.yml index ea9b81aa47c6f3..1ea5e592104a31 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -16,6 +16,7 @@ skip_commits: - doc/* - '**/*.md' - '**/*.rdoc' + - '**/.document' environment: ruby_version: "24-%Platform%" zlib_version: "1.2.12" diff --git a/.cirrus.yml b/.cirrus.yml index 0cab0023c227b0..ba4470321164a5 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -10,7 +10,7 @@ env: task: name: Arm64 Graviton2 / $CC - skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}')" + skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}', '.document')" arm_container: # We use the arm64 images at https://github.com/ruby/ruby-ci-image/pkgs/container/ruby-ci-image . image: ghcr.io/ruby/ruby-ci-image:$CC @@ -62,3 +62,72 @@ task: make_test-tool_script: make test-tool make_test-all_script: make test-all make_test-spec_script: make test-spec + +# The following is to test YJIT on ARM64 CPUs available on Cirrus CI +yjit_task: + name: Arm64 Graviton2 / $CC YJIT + auto_cancellation: $CIRRUS_BRANCH != 'master' + skip: "changesIncludeOnly('doc/**', '**.{md,rdoc}')" + arm_container: + # We use the arm64 images at https://github.com/ruby/ruby-ci-image/pkgs/container/ruby-ci-image . + image: ghcr.io/ruby/ruby-ci-image:$CC + # Define the used cpu core in each matrix task. We can use total 16 cpu + # cores in entire matrix. [cpu] = [total cpu: 16] / [number of tasks] + cpu: 8 + # We can request maximum 4 GB per cpu. + # [memory per task] = [memory per cpu: 4 GB] * [cpu] + memory: 32G + env: + CIRRUS_CLONE_DEPTH: 50 + optflags: '-O1' + debugflags: '-ggdb3' + RUBY_PREFIX: /tmp/ruby-prefix + RUBY_DEBUG: ci rgengc + RUBY_TESTOPTS: >- + -q + --color=always + --tty=no + matrix: + CC: clang-12 + CC: gcc-11 + timeout_in: 90m + id_script: id + set_env_script: + # Set `GNUMAKEFLAGS`, because the flags are GNU make specific. Note using + # the `make` environment variable used in compilers.yml causes some rubygems + # tests to fail. + # https://github.com/rubygems/rubygems/issues/4921 + - echo "GNUMAKEFLAGS=-s -j$((1 + $CIRRUS_CPU))" >> $CIRRUS_ENV + print_env_script: + - echo "GNUMAKEFLAGS=$GNUMAKEFLAGS" + # Arm containers are executed in AWS's EKS, and it's not yet supporting IPv6 + # See https://github.com/aws/containers-roadmap/issues/835 + disable_ipv6_script: sudo ./tool/disable_ipv6.sh + install_rust_script: + - sudo apt-get update -y + - sudo apt-get install -y curl + - "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" + autogen_script: ./autogen.sh + configure_script: >- + source $HOME/.cargo/env && ./configure -C + --enable-debug-env + --disable-install-doc + --with-ext=-test-/cxxanyargs,+ + --prefix="$RUBY_PREFIX" + --enable-yjit=dev + make_miniruby_script: source $HOME/.cargo/env && make miniruby + make_bindgen_script: | + if [[ "$CC" = "clang-12" ]]; then + source $HOME/.cargo/env && make yjit-bindgen + else + echo "only running bindgen on clang image" + fi + boot_miniruby_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 -e0 + test_dump_insns_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-dump-insns -e0 + output_stats_script: RUST_BACKTRACE=1 ./miniruby --yjit-call-threshold=1 --yjit-stats -e0 + full_build_script: source $HOME/.cargo/env && make + cargo_test_script: source $HOME/.cargo/env && cd yjit && cargo test + make_test_script: source $HOME/.cargo/env && make test RUN_OPTS="--yjit-call-threshold=1 --yjit-verify-ctx" + make_test_all_script: source $HOME/.cargo/env && make test-all RUN_OPTS="--yjit-call-threshold=1" TESTOPTS="$RUBY_TESTOPTS"' --test-order=alpha --name=!/TestGCCompact/' + test_gc_compact_script: source $HOME/.cargo/env && make test-all RUN_OPTS="--yjit-call-threshold=1" TESTS="test/ruby/test_gc_compact.rb" + make_test_spec_script: source $HOME/.cargo/env && make test-spec RUN_OPTS="--yjit-call-threshold=1" diff --git a/.gdbinit b/.gdbinit index 8979e8b47c5ab1..34d044caf6317f 100644 --- a/.gdbinit +++ b/.gdbinit @@ -544,13 +544,13 @@ end define rp_class printf "(struct RClass *) %p", (void*)$arg0 - if ((struct RClass *)($arg0))->ptr.origin_ != $arg0 - printf " -> %p", ((struct RClass *)($arg0))->ptr.origin_ + if RCLASS_ORIGIN((struct RClass *)($arg0)) != $arg0 + printf " -> %p", RCLASS_ORIGIN((struct RClass *)($arg0)) end printf "\n" rb_classname $arg0 print/x *(struct RClass *)($arg0) - print *((struct RClass *)($arg0))->ptr + print *RCLASS_EXT((struct RClass *)($arg0)) end document rp_class Print the content of a Class/Module. @@ -979,8 +979,8 @@ end define rb_ps_vm print $ps_vm = (rb_vm_t*)$arg0 - set $ps_thread_ln = $ps_vm->living_threads.n.next - set $ps_thread_ln_last = $ps_vm->living_threads.n.prev + set $ps_thread_ln = $ps_vm->ractor.main_ractor.threads.set.n.next + set $ps_thread_ln_last = $ps_vm->ractor.main_ractor.threads.set.n.prev while 1 set $ps_thread_th = (rb_thread_t *)$ps_thread_ln set $ps_thread = (VALUE)($ps_thread_th->self) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index c05a98e306c73d..6c5eac5a0f8026 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -18,3 +18,6 @@ d7ffd3fea402239b16833cc434404a7af82d44f3 f28287d34c03f472ffe90ea262bdde9affd4b965 0d842fecb4f75ab3b1d4097ebdb8e88f51558041 4ba2c66761d6a293abdfba409241d31063cefd62 + +# Make benchmark indentation consistent +fc4acf8cae82e5196186d3278d831f2438479d91 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c8d7ec5e0dc19f..b7cd624b0dda95 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,9 +3,9 @@ # Code owners will be automatically tagged as reviewers when a pull request is opened # YJIT sources and tests -yjit* @maximecb @xrxr @tenderlove -yjit/* @maximecb @xrxr @tenderlove -doc/yjit/* @maximecb @xrxr @tenderlove -bootstraptest/test_yjit* @maximecb @xrxr @tenderlove -test/ruby/test_yjit* @maximecb @xrxr @tenderlove -.github/workflows/yjit* @maximecb @xrxr @tenderlove +yjit* @maximecb @xrxr @tenderlove @k0kubun +yjit/* @maximecb @xrxr @tenderlove @k0kubun +doc/yjit/* @maximecb @xrxr @tenderlove @k0kubun +bootstraptest/test_yjit* @maximecb @xrxr @tenderlove @k0kubun +test/ruby/test_yjit* @maximecb @xrxr @tenderlove @k0kubun +.github/workflows/yjit* @maximecb @xrxr @tenderlove @k0kubun diff --git a/.github/workflows/check_dependencies.yml b/.github/workflows/check_dependencies.yml index 6834d2c9c89816..fab198933518ad 100644 --- a/.github/workflows/check_dependencies.yml +++ b/.github/workflows/check_dependencies.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 299c6b220aa6cf..f9fa0a74490162 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' schedule: - cron: '0 12 * * 4' diff --git a/.github/workflows/compilers.yml b/.github/workflows/compilers.yml index ab29a7acc556a3..340dd20f575ff2 100644 --- a/.github/workflows/compilers.yml +++ b/.github/workflows/compilers.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} @@ -147,6 +149,7 @@ jobs: - { name: disable-dln, env: { append_configure: '--disable-dln' } } - { name: enable-mkmf-verbose, env: { append_configure: '--enable-mkmf-verbose' } } - { name: disable-rubygems, env: { append_configure: '--disable-rubygems' } } + - { name: RUBY_DEVEL, env: { append_configure: '--enable-devel' } } - { name: OPT_THREADED_CODE=1, env: { cppflags: '-DOPT_THREADED_CODE=1' } } - { name: OPT_THREADED_CODE=2, env: { cppflags: '-DOPT_THREADED_CODE=2' } } @@ -254,6 +257,8 @@ jobs: - run: make test-all TESTS='-- ruby -ext-' if: ${{ matrix.entry.check }} - run: make test-spec + env: + CHECK_LEAKS: true if: ${{ matrix.entry.check }} - run: make test-annocheck if: ${{ matrix.entry.check && endsWith(matrix.entry.name, 'annocheck') }} diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 2f2c876b15384f..72f28a7b615baf 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 80b7a92f15223c..6f93aa5392e3c2 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/mjit.yml b/.github/workflows/mjit.yml index c2479f9467c029..b5065288c70c88 100644 --- a/.github/workflows/mjit.yml +++ b/.github/workflows/mjit.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/spec_guards.yml b/.github/workflows/spec_guards.yml index 480731ad930616..7bffe25bb26610 100644 --- a/.github/workflows/spec_guards.yml +++ b/.github/workflows/spec_guards.yml @@ -6,11 +6,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} @@ -38,6 +40,8 @@ jobs: - run: gem install webrick - run: ruby ../mspec/bin/mspec working-directory: spec/ruby + env: + CHECK_LEAKS: true - uses: k0kubun/action-slack@v2.0.0 with: payload: | diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 5662ca71adde48..6ea8c06b930534 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} @@ -19,10 +21,9 @@ jobs: make: strategy: matrix: - test_task: ["check", "test-bundler-parallel", "test-bundled-gems"] + test_task: ["check", "test-syntax-suggest", "test-bundler-parallel", "test-bundled-gems"] os: - ubuntu-20.04 -# - ubuntu-18.04 configure: ["", "cppflags=-DRUBY_DEBUG"] include: - test_task: "check" diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 83688fbacae371..713444b7410882 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 2c5b823d202758..4418c78d813cf2 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} @@ -105,6 +107,8 @@ jobs: set | C:\msys64\usr\bin\sort > new.env C:\msys64\usr\bin\comm -13 old.env new.env >> %GITHUB_ENV% del *.env + - name: compiler version + run: cl - name: link libraries run: | for %%I in (C:\vcpkg\installed\x64-windows\bin\*.dll) do ( diff --git a/.github/workflows/yjit-ubuntu.yml b/.github/workflows/yjit-ubuntu.yml index 105489aab46236..bf90b80efb1ee0 100644 --- a/.github/workflows/yjit-ubuntu.yml +++ b/.github/workflows/yjit-ubuntu.yml @@ -5,11 +5,13 @@ on: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' pull_request: paths-ignore: - 'doc/**' - '**.md' - '**.rdoc' + - '**/.document' concurrency: group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }} diff --git a/.gitignore b/.gitignore index 521f4ec807e3ae..c12ec27782a91b 100644 --- a/.gitignore +++ b/.gitignore @@ -219,6 +219,9 @@ lcov*.info /lib/ruby/[1-9]*.* /lib/ruby/vendor_ruby +# /misc/ +/misc/**/__pycache__ + # /spec/bundler /.rspec_status diff --git a/NEWS.md b/NEWS.md index d3fee75302d250..cc7c9d2a933712 100644 --- a/NEWS.md +++ b/NEWS.md @@ -104,6 +104,9 @@ Note: We're only listing outstanding class updates. empty, instead of returning the default value or calling the default proc. [[Bug #16908]] +* Integer + * Integer#ceildiv has been added. [[Feature #18809]] + * Kernel * Kernel#binding raises RuntimeError if called from a non-Ruby frame (such as a method defined in C). [[Bug #18487]] @@ -153,11 +156,18 @@ Note: We're only listing outstanding class updates. ## Stdlib updates +* SyntaxSuggest + + * The feature of `syntax_suggest` formerly `dead_end` is integrated in Ruby. + [[Feature #18159]] + * The following default gems are updated. * RubyGems 3.4.0.dev * bigdecimal 3.1.2 * bundler 2.4.0.dev * cgi 0.3.2 + * date 3.2.3 + * error_highlight 0.4.0 * etc 1.4.0 * io-console 0.5.11 * io-nonblock 0.1.1 @@ -172,13 +182,14 @@ Note: We're only listing outstanding class updates. * reline 0.3.1 * securerandom 0.2.0 * stringio 3.0.3 + * syntax_suggest 0.0.1 * timeout 0.3.0 * The following bundled gems are updated. - * minitest 5.16.2 + * minitest 5.16.3 * net-imap 0.2.3 * rbs 2.6.0 * typeprof 0.21.3 - * debug 1.6.1 + * debug 1.6.2 * The following default gems are now bundled gems. ## Compatibility issues @@ -264,6 +275,7 @@ The following deprecated APIs are removed. [Bug #17545]: https://bugs.ruby-lang.org/issues/17545 [Feature #17881]: https://bugs.ruby-lang.org/issues/17881 [Feature #18037]: https://bugs.ruby-lang.org/issues/18037 +[Feature #18159]: https://bugs.ruby-lang.org/issues/18159 [Feature #18351]: https://bugs.ruby-lang.org/issues/18351 [Bug #18487]: https://bugs.ruby-lang.org/issues/18487 [Feature #18571]: https://bugs.ruby-lang.org/issues/18571 @@ -274,3 +286,4 @@ The following deprecated APIs are removed. [Feature #18685]: https://bugs.ruby-lang.org/issues/18685 [Bug #18782]: https://bugs.ruby-lang.org/issues/18782 [Feature #18788]: https://bugs.ruby-lang.org/issues/18788 +[Feature #18809]: https://bugs.ruby-lang.org/issues/18809 diff --git a/array.c b/array.c index b2ebf3c0e9ebee..793a53f17b24b2 100644 --- a/array.c +++ b/array.c @@ -1373,13 +1373,16 @@ ary_make_partial_step(VALUE ary, VALUE klass, long offset, long len, long step) const VALUE *values = RARRAY_CONST_PTR_TRANSIENT(ary); const long orig_len = len; - if ((step > 0 && step >= len) || (step < 0 && (step < -len))) { + if (step > 0 && step >= len) { VALUE result = ary_new(klass, 1); VALUE *ptr = (VALUE *)ARY_EMBED_PTR(result); RB_OBJ_WRITE(result, ptr, values[offset]); ARY_SET_EMBED_LEN(result, 1); return result; } + else if (step < 0 && step < -len) { + step = -len; + } long ustep = (step < 0) ? -step : step; len = (len + ustep - 1) / ustep; diff --git a/benchmark/buffer_get.yml b/benchmark/buffer_get.yml index e375dcf85dc04e..bb9ca7e94aca7f 100644 --- a/benchmark/buffer_get.yml +++ b/benchmark/buffer_get.yml @@ -1,9 +1,10 @@ +prelude: | + # frozen_string_literal: true + Warning[:experimental] = false + buffer = IO::Buffer.new(32, IO::Buffer::MAPPED) + string = "\0" * 32 benchmark: - - name: buffer.get - prelude: buffer = IO::Buffer.new(32, IO::Buffer::MAPPED) - script: buffer.get(:U32, 0) - loop_count: 20000000 - - name: string.unpack - prelude: string = "\0" * 32 - script: string.unpack("C") - loop_count: 20000000 + buffer.get_value: | + buffer.get_value(:U32, 0) + string.unpack1: | + string.unpack1("N") diff --git a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb b/benchmark/lib/benchmark_driver/runner/mjit_exec.rb deleted file mode 100644 index eac3dfba8485bd..00000000000000 --- a/benchmark/lib/benchmark_driver/runner/mjit_exec.rb +++ /dev/null @@ -1,237 +0,0 @@ -require 'benchmark_driver/struct' -require 'benchmark_driver/metric' -require 'erb' - -# A special runner dedicated for measuring mjit_exec overhead. -class BenchmarkDriver::Runner::MjitExec - METRIC = BenchmarkDriver::Metric.new(name: 'Iteration per second', unit: 'i/s') - - # JobParser returns this, `BenchmarkDriver::Runner.runner_for` searches "*::Job" - Job = ::BenchmarkDriver::Struct.new( - :name, # @param [String] name - This is mandatory for all runner - :metrics, # @param [Array] - :num_methods, # @param [Integer] num_methods - The number of methods to be defined - :loop_count, # @param [Integer] loop_count - :from_jit, # @param [TrueClass,FalseClass] from_jit - Whether the mjit_exec() is from JIT or not - :to_jit, # @param [TrueClass,FalseClass] to_jit - Whether the mjit_exec() is to JIT or not - ) - # Dynamically fetched and used by `BenchmarkDriver::JobParser.parse` - class << JobParser = Module.new - # @param [Array,String] num_methods - # @param [Integer] loop_count - # @param [TrueClass,FalseClass] from_jit - # @param [TrueClass,FalseClass] to_jit - def parse(num_methods:, loop_count:, from_jit:, to_jit:) - if num_methods.is_a?(String) - num_methods = eval(num_methods) - end - - num_methods.map do |num| - if num_methods.size > 1 - suffix = "[#{'%4d' % num}]" - else - suffix = "_#{num}" - end - Job.new( - name: "mjit_exec_#{from_jit ? 'JT' : 'VM'}2#{to_jit ? 'JT' : 'VM'}#{suffix}", - metrics: [METRIC], - num_methods: num, - loop_count: loop_count, - from_jit: from_jit, - to_jit: to_jit, - ) - end - end - end - - # @param [BenchmarkDriver::Config::RunnerConfig] config - # @param [BenchmarkDriver::Output] output - # @param [BenchmarkDriver::Context] contexts - def initialize(config:, output:, contexts:) - @config = config - @output = output - @contexts = contexts - end - - # This method is dynamically called by `BenchmarkDriver::JobRunner.run` - # @param [Array] jobs - def run(jobs) - @output.with_benchmark do - jobs.each do |job| - @output.with_job(name: job.name) do - @contexts.each do |context| - result = BenchmarkDriver::Repeater.with_repeat(config: @config, larger_better: true, rest_on_average: :average) do - run_benchmark(job, context: context) - end - value, duration = result.value - @output.with_context(name: context.name, executable: context.executable, gems: context.gems, prelude: context.prelude) do - @output.report(values: { METRIC => value }, duration: duration, loop_count: job.loop_count) - end - end - end - end - end - end - - private - - # @param [BenchmarkDriver::Runner::Ips::Job] job - loop_count is not nil - # @param [BenchmarkDriver::Context] context - # @return [BenchmarkDriver::Metrics] - def run_benchmark(job, context:) - if job.from_jit - if job.to_jit - benchmark = BenchmarkJT2JT.new(num_methods: job.num_methods, loop_count: job.loop_count) - else - raise NotImplementedError, "JT2VM is not implemented yet" - end - else - if job.to_jit - benchmark = BenchmarkVM2JT.new(num_methods: job.num_methods, loop_count: job.loop_count) - else - benchmark = BenchmarkVM2VM.new(num_methods: job.num_methods, loop_count: job.loop_count) - end - end - - duration = Tempfile.open(['benchmark_driver-result', '.txt']) do |f| - with_script(benchmark.render(result: f.path)) do |path| - opt = [] - if context.executable.command.any? { |c| c.start_with?('--jit') } - opt << '--jit-min-calls=2' - end - IO.popen([*context.executable.command, '--disable-gems', *opt, path], &:read) - if $?.success? - Float(f.read) - else - BenchmarkDriver::Result::ERROR - end - end - end - - [job.loop_count.to_f / duration, duration] - end - - def with_script(script) - if @config.verbose >= 2 - sep = '-' * 30 - $stdout.puts "\n\n#{sep}[Script begin]#{sep}\n#{script}#{sep}[Script end]#{sep}\n\n" - end - - Tempfile.open(['benchmark_driver-', '.rb']) do |f| - f.puts script - f.close - return yield(f.path) - end - end - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkVM2VM = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - % end - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - - def vm - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - vm # warmup call cache - File.write(<%= result.dump %>, vm) - EOS - end - end - private_constant :BenchmarkVM2VM - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkVM2JT = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - a<%= i %> - a<%= i %> # --jit-min-calls=2 - % end - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - - def vm - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - vm # warmup call cache - File.write(<%= result.dump %>, vm) - EOS - end - end - private_constant :BenchmarkVM2JT - - # @param [Integer] num_methods - # @param [Integer] loop_count - BenchmarkJT2JT = ::BenchmarkDriver::Struct.new(:num_methods, :loop_count) do - # @param [String] result - A file to write result - def render(result:) - ERB.new(<<~EOS, trim_mode: '%').result(binding) - % num_methods.times do |i| - def a<%= i %> - nil - end - % end - - # You may need to: - # * Increase `JIT_ISEQ_SIZE_THRESHOLD` to 10000000 in mjit.h - # * Always return false in `inlinable_iseq_p()` of mjit_compile.c - def jit - t = Process.clock_gettime(Process::CLOCK_MONOTONIC) - i = 0 - while i < <%= loop_count / 1000 %> - % 1000.times do |i| - a<%= i % num_methods %> - % end - i += 1 - end - % (loop_count % 1000).times do |i| - a<%= i % num_methods %> - % end - Process.clock_gettime(Process::CLOCK_MONOTONIC) - t - end - - jit - jit - RubyVM::MJIT.pause if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled? - File.write(<%= result.dump %>, jit) - EOS - end - end - private_constant :BenchmarkJT2JT -end diff --git a/benchmark/marshal_dump_load_integer.yml b/benchmark/marshal_dump_load_integer.yml new file mode 100644 index 00000000000000..78ebf823d25fa7 --- /dev/null +++ b/benchmark/marshal_dump_load_integer.yml @@ -0,0 +1,22 @@ +prelude: | + smallint_array = 1000.times.map { |x| x } + bigint32_array = 1000.times.map { |x| x + 2**32 } + bigint64_array = 1000.times.map { |x| x + 2**64 } + + smallint_dump = Marshal.dump(smallint_array) + bigint32_dump = Marshal.dump(bigint32_array) + bigint64_dump = Marshal.dump(bigint64_array) +benchmark: + marshal_dump_integer_small: | + Marshal.dump(smallint_array) + marshal_dump_integer_over_32_bit: | + Marshal.dump(bigint32_array) + marshal_dump_integer_over_64_bit: | + Marshal.dump(bigint64_array) + marshal_load_integer_small: | + Marshal.load(smallint_dump) + marshal_load_integer_over_32_bit: | + Marshal.load(bigint32_dump) + marshal_load_integer_over_64_bit: | + Marshal.load(bigint64_dump) +loop_count: 4000 diff --git a/benchmark/masgn.yml b/benchmark/masgn.yml index 4be9333e232c1f..31cb8ee4a301de 100644 --- a/benchmark/masgn.yml +++ b/benchmark/masgn.yml @@ -1,7 +1,7 @@ prelude: | a = [nil] * 3 b = Class.new{attr_writer :a, :b, :c}.new - c, d, e, f = nil, nil, nil, nil + c = d = e = f = g = h = i = nil benchmark: array2_2: "c = (a[0], a[1] = 1, 2)" array2_3: "c = (a[0], a[1] = 1, 2, 3)" @@ -27,3 +27,27 @@ benchmark: lvar2_3p: "(d, e = 1, 2, 3; nil)" lvar3_2p: "(d, e, f = 1, 2; nil)" lvar3_3p: "(d, e, f = 1, 2, 3; nil)" + array2_2lv: "c = (a[0], a[1] = g, h)" + array2_ilv: "c = (a[0], a[1] = g, h, i)" + arrayi_2lv: "c = (a[0], a[1], a[2] = g, h)" + arrayi_ilv: "c = (a[0], a[1], a[2] = g, h, i)" + attr2_2lv: "c = (b.a, b.b = g, h)" + attr2_ilv: "c = (b.a, b.b = g, h, i)" + attri_2lv: "c = (b.a, b.b, b.c = g, h)" + attri_ilv: "c = (b.a, b.b, b.c = g, h, i)" + lvar2_2lv: "c = (d, e = g, h)" + lvar2_ilv: "c = (d, e = g, h, i)" + lvari_2lv: "c = (d, e, f = g, h)" + lvari_ilv: "c = (d, e, f = g, h, i)" + array2_2plv: "(a[0], a[1] = g, h; nil)" + array2_iplv: "(a[0], a[1] = g, h, i; nil)" + arrayi_2plv: "(a[0], a[1], a[2] = g, h; nil)" + arrayi_iplv: "(a[0], a[1], a[2] = g, h, i; nil)" + attr2_2plv: "(b.a, b.b = g, h; nil)" + attr2_iplv: "(b.a, b.b = g, h, i; nil)" + attri_2plv: "(b.a, b.b, b.c = g, h; nil)" + attri_iplv: "(b.a, b.b, b.c = g, h, i; nil)" + lvar2_2plv: "(d, e = g, h; nil)" + lvar2_iplv: "(d, e = g, h, i; nil)" + lvari_2plv: "(d, e, f = g, h; nil)" + lvari_iplv: "(d, e, f = g, h, i; nil)" diff --git a/benchmark/mjit_exec_jt2jt.yml b/benchmark/mjit_exec_jt2jt.yml deleted file mode 100644 index 6c303c7a44a03b..00000000000000 --- a/benchmark/mjit_exec_jt2jt.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: true -to_jit: true diff --git a/benchmark/mjit_exec_vm2jt.yml b/benchmark/mjit_exec_vm2jt.yml deleted file mode 100644 index 764883f070f5ea..00000000000000 --- a/benchmark/mjit_exec_vm2jt.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: false -to_jit: true diff --git a/benchmark/mjit_exec_vm2vm.yml b/benchmark/mjit_exec_vm2vm.yml deleted file mode 100644 index 030aa76c1cd963..00000000000000 --- a/benchmark/mjit_exec_vm2vm.yml +++ /dev/null @@ -1,6 +0,0 @@ -type: lib/benchmark_driver/runner/mjit_exec -num_methods: [1] -#num_methods: (1..100).to_a + [200, 300, 400, 500, 600, 700, 800, 900, 1000] -loop_count: 50000000 -from_jit: false -to_jit: false diff --git a/benchmark/so_nbody.rb b/benchmark/so_nbody.rb index d6c5bb9e61f12a..9884fc4edc6585 100644 --- a/benchmark/so_nbody.rb +++ b/benchmark/so_nbody.rb @@ -12,38 +12,38 @@ def _puts *args end class Planet - attr_accessor :x, :y, :z, :vx, :vy, :vz, :mass + attr_accessor :x, :y, :z, :vx, :vy, :vz, :mass - def initialize(x, y, z, vx, vy, vz, mass) - @x, @y, @z = x, y, z - @vx, @vy, @vz = vx * DAYS_PER_YEAR, vy * DAYS_PER_YEAR, vz * DAYS_PER_YEAR - @mass = mass * SOLAR_MASS - end - - def move_from_i(bodies, nbodies, dt, i) - while i < nbodies - b2 = bodies[i] - dx = @x - b2.x - dy = @y - b2.y - dz = @z - b2.z - - distance = Math.sqrt(dx * dx + dy * dy + dz * dz) - mag = dt / (distance * distance * distance) - b_mass_mag, b2_mass_mag = @mass * mag, b2.mass * mag - - @vx -= dx * b2_mass_mag - @vy -= dy * b2_mass_mag - @vz -= dz * b2_mass_mag - b2.vx += dx * b_mass_mag - b2.vy += dy * b_mass_mag - b2.vz += dz * b_mass_mag - i += 1 + def initialize(x, y, z, vx, vy, vz, mass) + @x, @y, @z = x, y, z + @vx, @vy, @vz = vx * DAYS_PER_YEAR, vy * DAYS_PER_YEAR, vz * DAYS_PER_YEAR + @mass = mass * SOLAR_MASS end - @x += dt * @vx - @y += dt * @vy - @z += dt * @vz - end + def move_from_i(bodies, nbodies, dt, i) + while i < nbodies + b2 = bodies[i] + dx = @x - b2.x + dy = @y - b2.y + dz = @z - b2.z + + distance = Math.sqrt(dx * dx + dy * dy + dz * dz) + mag = dt / (distance * distance * distance) + b_mass_mag, b2_mass_mag = @mass * mag, b2.mass * mag + + @vx -= dx * b2_mass_mag + @vy -= dy * b2_mass_mag + @vz -= dz * b2_mass_mag + b2.vx += dx * b_mass_mag + b2.vy += dy * b_mass_mag + b2.vz += dz * b_mass_mag + i += 1 + end + + @x += dt * @vx + @y += dt * @vy + @z += dt * @vz + end end def energy(bodies) diff --git a/benchmark/vm_ivar_set_on_instance.yml b/benchmark/vm_ivar_set_on_instance.yml new file mode 100644 index 00000000000000..91857b7742e0f2 --- /dev/null +++ b/benchmark/vm_ivar_set_on_instance.yml @@ -0,0 +1,35 @@ +prelude: | + class TheClass + def initialize + @v0 = 1 + @v1 = 2 + @v3 = 3 + @levar = 1 + end + + def set_value_loop + # 1M + i = 0 + while i < 1000000 + # 10 times to de-emphasize loop overhead + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + i += 1 + end + end + end + + obj = TheClass.new + +benchmark: + vm_ivar_set_on_instance: | + obj.set_value_loop +loop_count: 100 diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index f9669a4b5bca92..4d7780d1d45a2a 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2903,11 +2903,20 @@ def test foo end + def bar + :bar + end + + test test RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT) + # Old simulat_omm! leaves one byte of space and this fills it up + bar + bar + def foo :new end @@ -3068,3 +3077,183 @@ def pt_inspect(pt) a.length } + +# checktype +assert_equal 'false', %q{ + def function() + [1, 2] in [Integer, String] + end + function() +} + +# opt_send_without_block (VM_METHOD_TYPE_ATTRSET) +assert_equal 'foo', %q{ + class Foo + attr_writer :foo + + def foo() + self.foo = "foo" + end + end + foo = Foo.new + foo.foo +} + +# anytostring, intern +assert_equal 'true', %q{ + def foo() + :"#{true}" + end + foo() +} + +# toregexp, objtostring +assert_equal '/true/', %q{ + def foo() + /#{true}/ + end + foo().inspect +} + +# concatstrings, objtostring +assert_equal '9001', %q{ + def foo() + "#{9001}" + end + foo() +} + +# opt_send_without_block (VM_METHOD_TYPE_CFUNC) +assert_equal 'nil', %q{ + def foo + nil.inspect # argc: 0 + end + foo +} +assert_equal '4', %q{ + def foo + 2.pow(2) # argc: 1 + end + foo +} +assert_equal 'aba', %q{ + def foo + "abc".tr("c", "a") # argc: 2 + end + foo +} +assert_equal 'true', %q{ + def foo + respond_to?(:inspect) # argc: -1 + end + foo +} +assert_equal '["a", "b"]', %q{ + def foo + "a\nb".lines(chomp: true) # kwargs + end + foo +} + +# invokebuiltin +assert_equal '123', %q{ + def foo(obj) + obj.foo = 123 + end + + struct = Struct.new(:foo) + obj = struct.new + foo(obj) +} + +# invokebuiltin_delegate +assert_equal '.', %q{ + def foo(path) + Dir.open(path).path + end + foo(".") +} + +# opt_invokebuiltin_delegate_leave +assert_equal '[0]', %q{"\x00".unpack("c")} + +# opt_send_without_block (VM_METHOD_TYPE_ISEQ) +assert_equal '1', %q{ + def foo = 1 + def bar = foo + bar +} +assert_equal '[1, 2, 3]', %q{ + def foo(a, b) = [1, a, b] + def bar = foo(2, 3) + bar +} +assert_equal '[1, 2, 3, 4, 5, 6]', %q{ + def foo(a, b, c:, d:, e: 0, f: 6) = [a, b, c, d, e, f] + def bar = foo(1, 2, c: 3, d: 4, e: 5) + bar +} +assert_equal '[1, 2, 3, 4]', %q{ + def foo(a, b = 2) = [a, b] + def bar = foo(1) + foo(3, 4) + bar +} + +assert_equal '1', %q{ + def foo(a) = a + def bar = foo(1) { 2 } + bar +} +assert_equal '[1, 2]', %q{ + def foo(a, &block) = [a, block.call] + def bar = foo(1) { 2 } + bar +} + +# opt_send_without_block (VM_METHOD_TYPE_IVAR) +assert_equal 'foo', %q{ + class Foo + attr_reader :foo + + def initialize + @foo = "foo" + end + end + Foo.new.foo +} + +# opt_send_without_block (VM_METHOD_TYPE_OPTIMIZED) +assert_equal 'foo', %q{ + Foo = Struct.new(:bar) + Foo.new("bar").bar = "foo" +} +assert_equal 'foo', %q{ + Foo = Struct.new(:bar) + Foo.new("foo").bar +} + +# getblockparamproxy +assert_equal 'foo', %q{ + def foo(&block) + block.call + end + foo { "foo" } +} + +# getblockparam +assert_equal 'foo', %q{ + def foo(&block) + block + end + foo { "foo" }.call +} + +assert_equal '[1, 2]', %q{ + def foo + x = [2] + [1, *x] + end + + foo + foo +} diff --git a/class.c b/class.c index 54d9e6e17795dc..5e57068f03c5ec 100644 --- a/class.c +++ b/class.c @@ -1755,6 +1755,15 @@ class_instance_method_list(int argc, const VALUE *argv, VALUE mod, int obj, int * B.instance_methods(true).include?(:method1) #=> true * C.instance_methods(false) #=> [:method3] * C.instance_methods.include?(:method2) #=> true + * + * Note that method visibility changes in the current class, as well as aliases, + * are considered as methods of the current class by this method: + * + * class C < B + * alias method4 method2 + * protected :method2 + * end + * C.instance_methods(false).sort #=> [:method2, :method3, :method4] */ VALUE diff --git a/common.mk b/common.mk index 007af481c705ca..851ba1ecb36400 100644 --- a/common.mk +++ b/common.mk @@ -113,7 +113,7 @@ COMMONOBJS = array.$(OBJEXT) \ math.$(OBJEXT) \ memory_view.$(OBJEXT) \ mjit.$(OBJEXT) \ - mjit_compile.$(OBJEXT) \ + mjit_compiler.$(OBJEXT) \ node.$(OBJEXT) \ numeric.$(OBJEXT) \ object.$(OBJEXT) \ @@ -687,18 +687,18 @@ realclean-platform: distclean-platform realclean-spec: distclean-spec realclean-rubyspec: realclean-spec -clean-ext:: ext/clean gems/clean timestamp/clean -distclean-ext:: ext/distclean gems/distclean timestamp/distclean -realclean-ext:: ext/realclean gems/realclean timestamp/realclean +clean-ext:: ext/clean .bundle/clean timestamp/clean +distclean-ext:: ext/distclean .bundle/distclean timestamp/distclean +realclean-ext:: ext/realclean .bundle/realclean timestamp/realclean ext/clean.mk ext/distclean.mk ext/realclean.mk:: ext/clean:: ext/clean.mk ext/distclean:: ext/distclean.mk ext/realclean:: ext/realclean.mk -timestamp/clean:: ext/clean gems/clean -timestamp/distclean:: ext/distclean gems/distclean -timestamp/realclean:: ext/realclean gems/realclean +timestamp/clean:: ext/clean .bundle/clean +timestamp/distclean:: ext/distclean .bundle/distclean +timestamp/realclean:: ext/realclean .bundle/realclean timestamp/clean timestamp/distclean timestamp/realclean:: $(Q)$(RM) $(TIMESTAMPDIR)/.*.time $(TIMESTAMPDIR)/$(arch)/.time @@ -1344,7 +1344,7 @@ update-config_files: PHONY refresh-gems: update-bundled_gems prepare-gems prepare-gems: $(HAVE_BASERUBY:yes=update-gems) $(HAVE_BASERUBY:yes=extract-gems) -update-gems$(gnumake:yes=-nongnumake): PHONY +update-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Downloading bundled gem files... $(Q) $(BASERUBY) -C "$(srcdir)" \ -I./tool -rdownloader -answ \ @@ -1358,15 +1358,20 @@ update-gems$(gnumake:yes=-nongnumake): PHONY -e 'FileUtils.rm_rf(old.map{'"|n|"'n.chomp(".gem")})' \ gems/bundled_gems -extract-gems$(gnumake:yes=-nongnumake): PHONY +extract-gems$(gnumake:yes=-sequential): PHONY $(ECHO) Extracting bundled gem files... - $(Q) $(RUNRUBY) -C "$(srcdir)" \ - -Itool -rfileutils -rgem-unpack -answ \ + $(Q) $(BASERUBY) -C "$(srcdir)" \ + -Itool/lib -rfileutils -rbundled_gem -answ \ -e 'BEGIN {d = ".bundle/gems"}' \ - -e 'gem, ver = *$$F' \ + -e 'gem, ver, _, rev = *$$F' \ -e 'next if !ver or /^#/=~gem' \ -e 'g = "#{gem}-#{ver}"' \ - -e 'File.directory?("#{d}/#{g}") or Gem.unpack("gems/#{g}.gem", ".bundle")' \ + -e 'if File.directory?("#{d}/#{g}")' \ + -e 'elsif rev and File.exist?(gs = "gems/src/#{gem}/#{gem}.gemspec")' \ + -e 'BundledGem.copy(gs, ".bundle")' \ + -e 'else' \ + -e 'BundledGem.unpack("gems/#{g}.gem", ".bundle")' \ + -e 'end' \ gems/bundled_gems update-bundled_gems: PHONY @@ -1374,8 +1379,8 @@ update-bundled_gems: PHONY $(tooldir)/update-bundled_gems.rb \ "$(srcdir)/gems/bundled_gems" | \ $(IFCHANGE) "$(srcdir)/gems/bundled_gems" - - git -C "$(srcdir)" diff --no-ext-diff --ignore-submodules --exit-code || \ - git -C "$(srcdir)" commit -m "Update bundled_gems" gems/bundled_gems + $(GIT) -C "$(srcdir)" diff --no-ext-diff --ignore-submodules --exit-code || \ + $(GIT) -C "$(srcdir)" commit -m "Update bundled_gems" gems/bundled_gems PRECHECK_BUNDLED_GEMS = test-bundled-gems-precheck test-bundled-gems-precheck: $(TEST_RUNNABLE)-test-bundled-gems-precheck @@ -1383,7 +1388,7 @@ yes-test-bundled-gems-precheck: main no-test-bundled-gems-precheck: test-bundled-gems-fetch: yes-test-bundled-gems-fetch -yes-test-bundled-gems-fetch: $(PREP) +yes-test-bundled-gems-fetch: $(ACTIONS_GROUP) $(Q) $(BASERUBY) -C $(srcdir)/gems ../tool/fetch-bundled_gems.rb src bundled_gems $(ACTIONS_ENDGROUP) @@ -1410,6 +1415,25 @@ BUNDLED_GEMS = test-bundled-gems-run: $(PREPARE_BUNDLED_GEMS) $(gnumake_recursive)$(Q) $(XRUBY) $(tooldir)/test-bundled-gems.rb $(BUNDLED_GEMS) +test-syntax-suggest-precheck: $(TEST_RUNNABLE)-test-syntax-suggest-precheck +no-test-syntax-suggest-precheck: +yes-test-syntax-suggest-precheck: main + +no-test-syntax-suggest-prepare: no-test-syntax-suggest-precheck +yes-test-syntax-suggest-prepare: yes-test-syntax-suggest-precheck + $(ACTIONS_GROUP) + $(XRUBY) -C "$(srcdir)" bin/gem install --no-document \ + --install-dir .bundle --conservative "bundler" "rake" "rspec:~> 3" #"ruby-prof" + $(ACTIONS_ENDGROUP) + +RSPECOPTS = +SYNTAX_SUGGEST_SPECS = +test-syntax-suggest: $(TEST_RUNNABLE)-test-syntax-suggest +yes-test-syntax-suggest: yes-test-syntax-suggest-prepare + $(XRUBY) -C $(srcdir) -Ispec/syntax_suggest .bundle/bin/rspec \ + --require spec_helper $(RSPECOPTS) spec/syntax_suggest/$(SYNTAX_SUGGEST_SPECS) +no-test-syntax-suggest: + test-bundler-precheck: $(TEST_RUNNABLE)-test-bundler-precheck no-test-bundler-precheck: yes-test-bundler-precheck: main @@ -1661,36 +1685,37 @@ help: PHONY " Makefile of Ruby" \ "" \ "targets:" \ - " all (default): builds all of below" \ - " miniruby: builds only miniruby" \ - " encs: builds encodings" \ - " exts: builds extensions" \ - " main: builds encodings, extensions and ruby" \ - " docs: builds documents" \ - " install-capi: builds C API documents" \ - " run: runs test.rb by miniruby" \ - " runruby: runs test.rb by ruby you just built" \ - " gdb: runs test.rb by miniruby under gdb" \ - " gdb-ruby: runs test.rb by ruby under gdb" \ - " check: equals make test test-tool test-all test-spec" \ - " test: ruby core tests [BTESTS=]" \ - " test-all: all ruby tests [TESTOPTS=-j4 TESTS=]" \ - " test-spec: run the Ruby spec suite [SPECOPTS=]" \ - " test-bundler: run the Bundler spec" \ - " test-bundled-gems: run the test suite of bundled gems" \ - " test-tool: tests under the tool/test" \ - " update-gems: download files of the bundled gems" \ - " update-bundled_gems: update the latest version of bundled gems" \ - " sync-default-gems: sync default gems from upstream [GEM=]" \ - " up: update local copy and autogenerated files" \ - " benchmark: benchmark this ruby and COMPARE_RUBY." \ - " gcbench: gc benchmark [GCBENCH_ITEM=]" \ - " install: install all ruby distributions" \ - " install-nodoc: install without rdoc" \ - " install-cross: install cross compiling stuff" \ - " clean: clean for tarball" \ - " distclean: clean for repository" \ - " golf: build goruby for golfers" \ + " all (default): builds all of below" \ + " miniruby: builds only miniruby" \ + " encs: builds encodings" \ + " exts: builds extensions" \ + " main: builds encodings, extensions and ruby" \ + " docs: builds documents" \ + " install-capi: builds C API documents" \ + " run: runs test.rb by miniruby" \ + " runruby: runs test.rb by ruby you just built" \ + " gdb: runs test.rb by miniruby under gdb" \ + " gdb-ruby: runs test.rb by ruby under gdb" \ + " check: equals make test test-tool test-all test-spec" \ + " test: ruby core tests [BTESTS=]" \ + " test-all: all ruby tests [TESTOPTS=-j4 TESTS=]" \ + " test-spec: run the Ruby spec suite [SPECOPTS=]" \ + " test-bundler: run the Bundler spec" \ + " test-bundler-parallel: run the Bundler spec with parallel" \ + " test-bundled-gems: run the test suite of bundled gems" \ + " test-tool: tests under the tool/test" \ + " update-gems: download files of the bundled gems" \ + " update-bundled_gems: update the latest version of bundled gems" \ + " sync-default-gems: sync default gems from upstream [GEM=]" \ + " up: update local copy and autogenerated files" \ + " benchmark: benchmark this ruby and COMPARE_RUBY." \ + " gcbench: gc benchmark [GCBENCH_ITEM=]" \ + " install: install all ruby distributions" \ + " install-nodoc: install without rdoc" \ + " install-cross: install cross compiling stuff" \ + " clean: clean for tarball" \ + " distclean: clean for repository" \ + " golf: build goruby for golfers" \ $(HELP_EXTRA_TASKS) \ "see DeveloperHowto for more detail: " \ " https://bugs.ruby-lang.org/projects/ruby/wiki/DeveloperHowto" \ @@ -3494,7 +3519,6 @@ cont.$(OBJEXT): {$(VPATH)}thread_native.h cont.$(OBJEXT): {$(VPATH)}vm_core.h cont.$(OBJEXT): {$(VPATH)}vm_debug.h cont.$(OBJEXT): {$(VPATH)}vm_opts.h -cont.$(OBJEXT): {$(VPATH)}yjit.h debug.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h debug.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h debug.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -6376,7 +6400,6 @@ eval.$(OBJEXT): {$(VPATH)}vm.h eval.$(OBJEXT): {$(VPATH)}vm_core.h eval.$(OBJEXT): {$(VPATH)}vm_debug.h eval.$(OBJEXT): {$(VPATH)}vm_opts.h -eval.$(OBJEXT): {$(VPATH)}yjit.h explicit_bzero.$(OBJEXT): {$(VPATH)}config.h explicit_bzero.$(OBJEXT): {$(VPATH)}explicit_bzero.c explicit_bzero.$(OBJEXT): {$(VPATH)}internal/attr/format.h @@ -6829,7 +6852,6 @@ gc.$(OBJEXT): {$(VPATH)}vm_core.h gc.$(OBJEXT): {$(VPATH)}vm_debug.h gc.$(OBJEXT): {$(VPATH)}vm_opts.h gc.$(OBJEXT): {$(VPATH)}vm_sync.h -gc.$(OBJEXT): {$(VPATH)}yjit.h goruby.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h goruby.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h goruby.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -8710,12 +8732,15 @@ main.$(OBJEXT): {$(VPATH)}vm_debug.h marshal.$(OBJEXT): $(hdrdir)/ruby/ruby.h marshal.$(OBJEXT): $(top_srcdir)/internal/array.h marshal.$(OBJEXT): $(top_srcdir)/internal/bignum.h +marshal.$(OBJEXT): $(top_srcdir)/internal/bits.h marshal.$(OBJEXT): $(top_srcdir)/internal/class.h marshal.$(OBJEXT): $(top_srcdir)/internal/compilers.h marshal.$(OBJEXT): $(top_srcdir)/internal/encoding.h marshal.$(OBJEXT): $(top_srcdir)/internal/error.h +marshal.$(OBJEXT): $(top_srcdir)/internal/fixnum.h marshal.$(OBJEXT): $(top_srcdir)/internal/gc.h marshal.$(OBJEXT): $(top_srcdir)/internal/hash.h +marshal.$(OBJEXT): $(top_srcdir)/internal/numeric.h marshal.$(OBJEXT): $(top_srcdir)/internal/object.h marshal.$(OBJEXT): $(top_srcdir)/internal/serial.h marshal.$(OBJEXT): $(top_srcdir)/internal/static_assert.h @@ -9472,6 +9497,7 @@ mjit.$(OBJEXT): $(top_srcdir)/internal/file.h mjit.$(OBJEXT): $(top_srcdir)/internal/gc.h mjit.$(OBJEXT): $(top_srcdir)/internal/hash.h mjit.$(OBJEXT): $(top_srcdir)/internal/imemo.h +mjit.$(OBJEXT): $(top_srcdir)/internal/process.h mjit.$(OBJEXT): $(top_srcdir)/internal/serial.h mjit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h mjit.$(OBJEXT): $(top_srcdir)/internal/vm.h @@ -9680,208 +9706,208 @@ mjit.$(OBJEXT): {$(VPATH)}vm_debug.h mjit.$(OBJEXT): {$(VPATH)}vm_opts.h mjit.$(OBJEXT): {$(VPATH)}vm_sync.h mjit.$(OBJEXT): {$(VPATH)}yjit.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/list/list.h -mjit_compile.$(OBJEXT): $(CCAN_DIR)/str/str.h -mjit_compile.$(OBJEXT): $(hdrdir)/ruby.h -mjit_compile.$(OBJEXT): $(hdrdir)/ruby/ruby.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/array.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/class.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/compile.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/compilers.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/gc.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/hash.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/imemo.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/object.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/serial.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/static_assert.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/variable.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/vm.h -mjit_compile.$(OBJEXT): $(top_srcdir)/internal/warnings.h -mjit_compile.$(OBJEXT): {$(VPATH)}assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}atomic.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/assume.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/attributes.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/bool.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/gcc_version_since.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/inttypes.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/limits.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/long_long.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h -mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h -mjit_compile.$(OBJEXT): {$(VPATH)}builtin.h -mjit_compile.$(OBJEXT): {$(VPATH)}config.h -mjit_compile.$(OBJEXT): {$(VPATH)}constant.h -mjit_compile.$(OBJEXT): {$(VPATH)}debug_counter.h -mjit_compile.$(OBJEXT): {$(VPATH)}defines.h -mjit_compile.$(OBJEXT): {$(VPATH)}id.h -mjit_compile.$(OBJEXT): {$(VPATH)}id_table.h -mjit_compile.$(OBJEXT): {$(VPATH)}insns.def -mjit_compile.$(OBJEXT): {$(VPATH)}insns.inc -mjit_compile.$(OBJEXT): {$(VPATH)}insns_info.inc -mjit_compile.$(OBJEXT): {$(VPATH)}intern.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/abi.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/anyargs.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/char.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/double.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/fixnum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/gid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/int.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/intptr_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/long.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/long_long.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/mode_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/off_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/pid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/short.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/size_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/st_data_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/arithmetic/uid_t.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/assume.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/alloc_size.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/artificial.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/cold.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/const.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/constexpr.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/deprecated.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/diagnose_if.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/enum_extensibility.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/flag_enum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/forceinline.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/format.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/maybe_unused.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noalias.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/nodiscard.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noexcept.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noinline.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/nonnull.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/noreturn.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/pure.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/restrict.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/returns_nonnull.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/warning.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/attr/weakref.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/cast.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/apple.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/clang.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/gcc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/compiler_since.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/config.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/constant_p.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rarray.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rbasic.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rbignum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rclass.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rdata.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rfile.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rhash.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/robject.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rstring.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rstruct.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/core/rtypeddata.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/ctype.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/dllexport.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/dosish.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/eval.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/event.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/fl_type.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/gc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/glob.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/globals.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/builtin.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/c_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/cpp_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/declspec_attribute.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/extension.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/feature.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/has/warning.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/array.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/bignum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/class.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/compar.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/complex.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/cont.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/dir.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/enum.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/enumerator.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/error.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/eval.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/file.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/gc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/hash.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/io.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/load.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/marshal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/numeric.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/object.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/parse.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/proc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/process.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/random.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/range.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/rational.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/re.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/ruby.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/select.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/select/largesize.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/signal.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/sprintf.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/string.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/struct.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/thread.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/time.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/variable.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/intern/vm.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/interpreter.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/iterator.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/memory.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/method.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/module.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/newobj.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/rgengc.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/scan_args.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/special_consts.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/static_assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/stdalign.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/stdbool.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/symbol.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/value.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/value_type.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/variable.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/warning_push.h -mjit_compile.$(OBJEXT): {$(VPATH)}internal/xmalloc.h -mjit_compile.$(OBJEXT): {$(VPATH)}iseq.h -mjit_compile.$(OBJEXT): {$(VPATH)}method.h -mjit_compile.$(OBJEXT): {$(VPATH)}missing.h -mjit_compile.$(OBJEXT): {$(VPATH)}mjit.h -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_compile.c -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_compile.inc -mjit_compile.$(OBJEXT): {$(VPATH)}mjit_unit.h -mjit_compile.$(OBJEXT): {$(VPATH)}node.h -mjit_compile.$(OBJEXT): {$(VPATH)}ruby_assert.h -mjit_compile.$(OBJEXT): {$(VPATH)}ruby_atomic.h -mjit_compile.$(OBJEXT): {$(VPATH)}st.h -mjit_compile.$(OBJEXT): {$(VPATH)}subst.h -mjit_compile.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h -mjit_compile.$(OBJEXT): {$(VPATH)}thread_native.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_callinfo.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_core.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_exec.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_insnhelper.h -mjit_compile.$(OBJEXT): {$(VPATH)}vm_opts.h -mjit_compile.$(OBJEXT): {$(VPATH)}yjit.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/list/list.h +mjit_compiler.$(OBJEXT): $(CCAN_DIR)/str/str.h +mjit_compiler.$(OBJEXT): $(hdrdir)/ruby.h +mjit_compiler.$(OBJEXT): $(hdrdir)/ruby/ruby.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/array.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/class.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/compile.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/compilers.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/gc.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/hash.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/imemo.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/object.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/serial.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/static_assert.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/variable.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/vm.h +mjit_compiler.$(OBJEXT): $(top_srcdir)/internal/warnings.h +mjit_compiler.$(OBJEXT): {$(VPATH)}assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}atomic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/assume.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/attributes.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/bool.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/gcc_version_since.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/inttypes.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/limits.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/long_long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h +mjit_compiler.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h +mjit_compiler.$(OBJEXT): {$(VPATH)}builtin.h +mjit_compiler.$(OBJEXT): {$(VPATH)}config.h +mjit_compiler.$(OBJEXT): {$(VPATH)}constant.h +mjit_compiler.$(OBJEXT): {$(VPATH)}debug_counter.h +mjit_compiler.$(OBJEXT): {$(VPATH)}defines.h +mjit_compiler.$(OBJEXT): {$(VPATH)}id.h +mjit_compiler.$(OBJEXT): {$(VPATH)}id_table.h +mjit_compiler.$(OBJEXT): {$(VPATH)}insns.def +mjit_compiler.$(OBJEXT): {$(VPATH)}insns.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}insns_info.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}intern.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/abi.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/anyargs.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/char.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/double.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/fixnum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/gid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/int.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/intptr_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/long_long.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/mode_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/off_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/pid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/short.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/size_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/st_data_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/arithmetic/uid_t.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/assume.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/alloc_size.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/artificial.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/cold.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/const.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/constexpr.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/deprecated.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/diagnose_if.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/enum_extensibility.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/flag_enum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/forceinline.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/format.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/maybe_unused.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noalias.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/nodiscard.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noexcept.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noinline.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/nonnull.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/noreturn.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/pure.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/restrict.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/returns_nonnull.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/warning.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/attr/weakref.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/cast.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/apple.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/clang.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/gcc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/compiler_since.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/config.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/constant_p.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rarray.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rbasic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rbignum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rclass.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rdata.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rfile.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/robject.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rstring.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rstruct.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/core/rtypeddata.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/ctype.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/dllexport.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/dosish.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/eval.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/event.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/fl_type.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/gc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/glob.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/globals.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/builtin.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/c_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/cpp_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/declspec_attribute.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/extension.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/feature.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/has/warning.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/array.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/bignum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/class.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/compar.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/complex.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/cont.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/dir.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/enum.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/enumerator.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/error.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/eval.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/file.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/gc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/hash.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/io.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/load.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/marshal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/numeric.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/object.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/parse.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/proc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/process.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/random.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/range.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/rational.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/re.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/ruby.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/select.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/select/largesize.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/signal.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/sprintf.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/string.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/struct.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/thread.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/time.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/variable.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/intern/vm.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/interpreter.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/iterator.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/memory.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/method.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/module.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/newobj.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/rgengc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/scan_args.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/special_consts.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/static_assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/stdalign.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/stdbool.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/symbol.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/value.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/value_type.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/variable.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/warning_push.h +mjit_compiler.$(OBJEXT): {$(VPATH)}internal/xmalloc.h +mjit_compiler.$(OBJEXT): {$(VPATH)}iseq.h +mjit_compiler.$(OBJEXT): {$(VPATH)}method.h +mjit_compiler.$(OBJEXT): {$(VPATH)}missing.h +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit.h +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_compile.inc +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_compiler.c +mjit_compiler.$(OBJEXT): {$(VPATH)}mjit_unit.h +mjit_compiler.$(OBJEXT): {$(VPATH)}node.h +mjit_compiler.$(OBJEXT): {$(VPATH)}ruby_assert.h +mjit_compiler.$(OBJEXT): {$(VPATH)}ruby_atomic.h +mjit_compiler.$(OBJEXT): {$(VPATH)}st.h +mjit_compiler.$(OBJEXT): {$(VPATH)}subst.h +mjit_compiler.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h +mjit_compiler.$(OBJEXT): {$(VPATH)}thread_native.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_callinfo.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_core.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_exec.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_insnhelper.h +mjit_compiler.$(OBJEXT): {$(VPATH)}vm_opts.h +mjit_compiler.$(OBJEXT): {$(VPATH)}yjit.h node.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h node.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h node.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -11289,7 +11315,6 @@ process.$(OBJEXT): {$(VPATH)}thread_native.h process.$(OBJEXT): {$(VPATH)}util.h process.$(OBJEXT): {$(VPATH)}vm_core.h process.$(OBJEXT): {$(VPATH)}vm_opts.h -process.$(OBJEXT): {$(VPATH)}yjit.h ractor.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h ractor.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h ractor.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -13817,7 +13842,6 @@ signal.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h signal.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h signal.$(OBJEXT): $(CCAN_DIR)/list/list.h signal.$(OBJEXT): $(CCAN_DIR)/str/str.h -signal.$(OBJEXT): $(hdrdir)/ruby.h signal.$(OBJEXT): $(hdrdir)/ruby/ruby.h signal.$(OBJEXT): $(top_srcdir)/internal/array.h signal.$(OBJEXT): $(top_srcdir)/internal/compilers.h @@ -14003,7 +14027,6 @@ signal.$(OBJEXT): {$(VPATH)}internal/warning_push.h signal.$(OBJEXT): {$(VPATH)}internal/xmalloc.h signal.$(OBJEXT): {$(VPATH)}method.h signal.$(OBJEXT): {$(VPATH)}missing.h -signal.$(OBJEXT): {$(VPATH)}mjit.h signal.$(OBJEXT): {$(VPATH)}node.h signal.$(OBJEXT): {$(VPATH)}onigmo.h signal.$(OBJEXT): {$(VPATH)}oniguruma.h @@ -14019,7 +14042,6 @@ signal.$(OBJEXT): {$(VPATH)}thread_native.h signal.$(OBJEXT): {$(VPATH)}vm_core.h signal.$(OBJEXT): {$(VPATH)}vm_debug.h signal.$(OBJEXT): {$(VPATH)}vm_opts.h -signal.$(OBJEXT): {$(VPATH)}yjit.h sprintf.$(OBJEXT): $(hdrdir)/ruby/ruby.h sprintf.$(OBJEXT): $(top_srcdir)/internal/bignum.h sprintf.$(OBJEXT): $(top_srcdir)/internal/bits.h @@ -15422,7 +15444,6 @@ thread.$(OBJEXT): {$(VPATH)}vm_core.h thread.$(OBJEXT): {$(VPATH)}vm_debug.h thread.$(OBJEXT): {$(VPATH)}vm_opts.h thread.$(OBJEXT): {$(VPATH)}vm_sync.h -thread.$(OBJEXT): {$(VPATH)}yjit.h time.$(OBJEXT): $(hdrdir)/ruby/ruby.h time.$(OBJEXT): $(top_srcdir)/internal/array.h time.$(OBJEXT): $(top_srcdir)/internal/bignum.h @@ -17588,16 +17609,14 @@ yjit.$(OBJEXT): $(top_srcdir)/internal/array.h yjit.$(OBJEXT): $(top_srcdir)/internal/class.h yjit.$(OBJEXT): $(top_srcdir)/internal/compile.h yjit.$(OBJEXT): $(top_srcdir)/internal/compilers.h +yjit.$(OBJEXT): $(top_srcdir)/internal/fixnum.h yjit.$(OBJEXT): $(top_srcdir)/internal/gc.h yjit.$(OBJEXT): $(top_srcdir)/internal/hash.h yjit.$(OBJEXT): $(top_srcdir)/internal/imemo.h -yjit.$(OBJEXT): $(top_srcdir)/internal/object.h -yjit.$(OBJEXT): $(top_srcdir)/internal/re.h yjit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h yjit.$(OBJEXT): $(top_srcdir)/internal/serial.h yjit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h yjit.$(OBJEXT): $(top_srcdir)/internal/string.h -yjit.$(OBJEXT): $(top_srcdir)/internal/struct.h yjit.$(OBJEXT): $(top_srcdir)/internal/variable.h yjit.$(OBJEXT): $(top_srcdir)/internal/vm.h yjit.$(OBJEXT): $(top_srcdir)/internal/warnings.h @@ -17615,6 +17634,7 @@ yjit.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h yjit.$(OBJEXT): {$(VPATH)}builtin.h yjit.$(OBJEXT): {$(VPATH)}config.h yjit.$(OBJEXT): {$(VPATH)}constant.h +yjit.$(OBJEXT): {$(VPATH)}debug.h yjit.$(OBJEXT): {$(VPATH)}debug_counter.h yjit.$(OBJEXT): {$(VPATH)}defines.h yjit.$(OBJEXT): {$(VPATH)}encoding.h @@ -17793,6 +17813,7 @@ yjit.$(OBJEXT): {$(VPATH)}thread_native.h yjit.$(OBJEXT): {$(VPATH)}vm_callinfo.h yjit.$(OBJEXT): {$(VPATH)}vm_core.h yjit.$(OBJEXT): {$(VPATH)}vm_debug.h +yjit.$(OBJEXT): {$(VPATH)}vm_insnhelper.h yjit.$(OBJEXT): {$(VPATH)}vm_opts.h yjit.$(OBJEXT): {$(VPATH)}vm_sync.h yjit.$(OBJEXT): {$(VPATH)}yjit.c diff --git a/compile.c b/compile.c index 6a9ed2a5d09453..e906bd1e10b86c 100644 --- a/compile.c +++ b/compile.c @@ -1357,18 +1357,18 @@ new_child_iseq_with_callback(rb_iseq_t *iseq, const struct rb_iseq_new_with_call static void set_catch_except_p(struct rb_iseq_constant_body *body) { - body->catch_except_p = TRUE; + body->catch_except_p = true; if (body->parent_iseq != NULL) { set_catch_except_p(ISEQ_BODY(body->parent_iseq)); } } -/* Set body->catch_except_p to TRUE if the ISeq may catch an exception. If it is FALSE, - JIT-ed code may be optimized. If we are extremely conservative, we should set TRUE +/* Set body->catch_except_p to true if the ISeq may catch an exception. If it is false, + JIT-ed code may be optimized. If we are extremely conservative, we should set true if catch table exists. But we want to optimize while loop, which always has catch table entries for break/next/redo. - So this function sets TRUE for limited ISeqs with break/next/redo catch table entries + So this function sets true for limited ISeqs with break/next/redo catch table entries whose child ISeq would really raise an exception. */ static void update_catch_except_flags(struct rb_iseq_constant_body *body) @@ -1399,7 +1399,7 @@ update_catch_except_flags(struct rb_iseq_constant_body *body) if (entry->type != CATCH_TYPE_BREAK && entry->type != CATCH_TYPE_NEXT && entry->type != CATCH_TYPE_REDO) { - body->catch_except_p = TRUE; + body->catch_except_p = true; break; } } @@ -3332,6 +3332,84 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } } + if (IS_INSN_ID(iobj, newarray)) { + LINK_ELEMENT *next = iobj->link.next; + if (IS_INSN(next) && IS_INSN_ID(next, expandarray) && + OPERAND_AT(next, 1) == INT2FIX(0)) { + VALUE op1, op2; + op1 = OPERAND_AT(iobj, 0); + op2 = OPERAND_AT(next, 0); + ELEM_REMOVE(next); + + if (op1 == op2) { + /* + * newarray 2 + * expandarray 2, 0 + * => + * swap + */ + if (op1 == INT2FIX(2)) { + INSN_OF(iobj) = BIN(swap); + iobj->operand_size = 0; + } + /* + * newarray X + * expandarray X, 0 + * => + * opt_reverse X + */ + else { + INSN_OF(iobj) = BIN(opt_reverse); + } + } + else { + NODE dummy_line_node = generate_dummy_line_node(iobj->insn_info.line_no, iobj->insn_info.node_id); + long diff = FIX2LONG(op1) - FIX2LONG(op2); + INSN_OF(iobj) = BIN(opt_reverse); + OPERAND_AT(iobj, 0) = OPERAND_AT(next, 0); + + if (op1 > op2) { + /* X > Y + * newarray X + * expandarray Y, 0 + * => + * pop * (Y-X) + * opt_reverse Y + */ + for (; diff > 0; diff--) { + INSERT_BEFORE_INSN(iobj, &dummy_line_node, pop); + } + } + else { /* (op1 < op2) */ + /* X < Y + * newarray X + * expandarray Y, 0 + * => + * putnil * (Y-X) + * opt_reverse Y + */ + for (; diff < 0; diff++) { + INSERT_BEFORE_INSN(iobj, &dummy_line_node, putnil); + } + } + } + } + } + + if (IS_INSN_ID(iobj, duparray)) { + LINK_ELEMENT *next = iobj->link.next; + /* + * duparray obj + * expandarray X, 0 + * => + * putobject obj + * expandarray X, 0 + */ + if (IS_INSN(next) && IS_INSN_ID(next, expandarray)) { + INSN_OF(iobj) = BIN(putobject); + } + } + if (IS_INSN_ID(iobj, anytostring)) { LINK_ELEMENT *next = iobj->link.next; /* diff --git a/complex.c b/complex.c index d625ced7fa865b..ae40fa7355b5bc 100644 --- a/complex.c +++ b/complex.c @@ -1121,7 +1121,10 @@ nucomp_real_p(VALUE self) static VALUE nucomp_cmp(VALUE self, VALUE other) { - if (nucomp_real_p(self) && k_numeric_p(other)) { + if (!k_numeric_p(other)) { + return rb_num_coerce_cmp(self, other, idCmp); + } + if (nucomp_real_p(self)) { if (RB_TYPE_P(other, T_COMPLEX) && nucomp_real_p(other)) { get_dat2(self, other); return rb_funcall(adat->real, idCmp, 1, bdat->real); diff --git a/configure.ac b/configure.ac index cefd6946dc0562..a7de25bc6ae8c8 100644 --- a/configure.ac +++ b/configure.ac @@ -89,7 +89,9 @@ AC_SUBST(GIT) AC_SUBST(HAVE_GIT) eval `sed -n -e ['s/^@%:@define RUBY_[A-Z_]*VERSION_\([A-Z][A-Z][A-Z_0-9]*\) \([0-9][0-9]*\)$/\1=\2/p'] \ + -e ['s/^@%:@define \(RUBY_ABI_VERSION\) \([0-9][0-9]*\).*/\1=\2/p'] \ -e ['s/^@%:@define \(RUBY_PATCHLEVEL\) \(.*\)/\1=\2/p'] \ + $srcdir/include/ruby/internal/abi.h \ $srcdir/include/ruby/version.h $srcdir/version.h` for v in MAJOR MINOR TEENY; do AS_IF([eval "test \"\$$v\" = ''"], [ @@ -101,6 +103,9 @@ AC_SUBST(MINOR) AC_SUBST(TEENY) AC_SUBST(RUBY_API_VERSION, '$(MAJOR).$(MINOR)') AC_SUBST(RUBY_PROGRAM_VERSION, '$(MAJOR).$(MINOR).$(TEENY)') +AS_CASE([$RUBY_PATCHLEVEL], [-*], [ + AC_DEFINE_UNQUOTED(RUBY_ABI_VERSION, [${RUBY_ABI_VERSION}]) +], [RUBY_ABI_VERSION=]) AS_IF([test "$program_prefix" = NONE], [ program_prefix= @@ -623,9 +628,13 @@ AS_IF([test "$fdeclspec" = yes], [ RUBY_APPEND_OPTIONS(CXXFLAGS, -fdeclspec) ]) -AS_CASE([$RUBY_PATCHLEVEL], [-*], - [RUBY_DEVEL=yes], [RUBY_DEVEL=no]) -particular_werror_flags=$RUBY_DEVEL +AC_ARG_ENABLE(devel, + AS_HELP_STRING([--enable-devel], [enable development build]), + [RUBY_DEVEL=$enableval], + [AS_IF([test "x${RUBY_DEVEL-no}" != xyes], [RUBY_DEVEL=])] +)dnl +AC_SUBST(RUBY_DEVEL) +particular_werror_flags=${RUBY_DEVEL:-no} AC_ARG_ENABLE(werror, AS_HELP_STRING([--disable-werror], [don't make warnings into errors @@ -869,7 +878,6 @@ AS_IF([test "$GCC" = yes], [ test "${debugflags+set}" || {RUBY_TRY_CFLAGS(-g3, [debugflags=-g3])} ]) test $ac_cv_prog_cc_g = yes && : ${debugflags=-g} -AS_IF([test "x$RUBY_DEVEL" = xyes], [RUBY_APPEND_OPTION(XCFLAGS, -DRUBY_DEVEL=1)]) AS_IF([test "$GCC" = ""], [ AS_CASE(["$target_os"],[aix*],[warnflags="$warnflags -qinfo=por" rb_cv_warnflags="$rb_cv_warnflags -qinfo=por"]) @@ -4119,6 +4127,7 @@ AS_CASE(["$ruby_version"], AS_IF([test ${RUBY_LIB_VERSION_STYLE+set}], [ { echo "#define RUBY_LIB_VERSION_STYLE $RUBY_LIB_VERSION_STYLE" + echo '@%:@include "confdefs.h"' echo '#define STRINGIZE(x) x' test -f revision.h -o -f "${srcdir}/revision.h" || echo '#define RUBY_REVISION 0' echo '#include "version.h"' diff --git a/debug.c b/debug.c index 3af7f26275033a..3dd0f7190621f5 100644 --- a/debug.c +++ b/debug.c @@ -499,6 +499,7 @@ pretty_filename(const char *path) return path; } +#undef ruby_debug_log void ruby_debug_log(const char *file, int line, const char *func_name, const char *fmt, ...) { diff --git a/debug_counter.h b/debug_counter.h index 3f0dec948fa26a..c6f4176e9752de 100644 --- a/debug_counter.h +++ b/debug_counter.h @@ -347,8 +347,8 @@ RB_DEBUG_COUNTER(vm_sync_lock_enter_nb) RB_DEBUG_COUNTER(vm_sync_lock_enter_cr) RB_DEBUG_COUNTER(vm_sync_barrier) -/* mjit_exec() counts */ -RB_DEBUG_COUNTER(mjit_exec) +/* jit_exec() counts */ +RB_DEBUG_COUNTER(jit_exec) RB_DEBUG_COUNTER(mjit_exec_not_added) RB_DEBUG_COUNTER(mjit_exec_not_ready) RB_DEBUG_COUNTER(mjit_exec_not_compiled) diff --git a/defs/gmake.mk b/defs/gmake.mk index 4019eb3854b2aa..944b9b41ed9912 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -137,7 +137,7 @@ config.status: $(wildcard config.cache) STUBPROGRAM = rubystub$(EXEEXT) IGNOREDPATTERNS = %~ .% %.orig %.rej \#%\# SCRIPTBINDIR := $(if $(EXEEXT),,exec/) -SCRIPTPROGRAMS = $(addprefix $(SCRIPTBINDIR),$(addsuffix $(EXEEXT),$(filter-out $(IGNOREDPATTERNS),$(notdir $(wildcard $(srcdir)/libexec/*))))) +SCRIPTPROGRAMS = $(addprefix $(SCRIPTBINDIR),$(addsuffix $(EXEEXT),$(filter-out $(IGNOREDPATTERNS),$(notdir $(wildcard $(srcdir)/bin/*))))) stub: $(STUBPROGRAM) scriptbin: $(SCRIPTPROGRAMS) @@ -163,9 +163,8 @@ $(SCRIPTBINDIR)%$(EXEEXT): bin/% $(STUBPROGRAM) \ $(Q) chmod +x $@ $(Q) $(POSTLINK) -$(TIMESTAMPDIR)/.exec.time: - $(Q) mkdir exec - $(Q) exit > $@ +$(SCRIPTBINDIR): + $(Q) mkdir $@ .PHONY: commit commit: $(if $(filter commit,$(MAKECMDGOALS)),$(filter-out commit,$(MAKECMDGOALS))) up @@ -183,8 +182,8 @@ commit: $(if $(filter commit,$(MAKECMDGOALS)),$(filter-out commit,$(MAKECMDGOALS GITHUB_RUBY_URL = https://github.com/ruby/ruby PR = -COMMIT_GPG_SIGN = $(shell git -C "$(srcdir)" config commit.gpgsign) -REMOTE_GITHUB_URL = $(shell git -C "$(srcdir)" config remote.github.url) +COMMIT_GPG_SIGN = $(shell $(GIT) -C "$(srcdir)" config commit.gpgsign) +REMOTE_GITHUB_URL = $(shell $(GIT) -C "$(srcdir)" config remote.github.url) COMMITS_NOTES = commits .PHONY: fetch-github @@ -197,21 +196,21 @@ define fetch-github exit 1; \ ) $(eval REMOTE_GITHUB_URL := $(REMOTE_GITHUB_URL)) - $(if $(REMOTE_GITHUB_URL),, \ - echo adding $(GITHUB_RUBY_URL) as remote github; \ - git -C "$(srcdir)" remote add github $(GITHUB_RUBY_URL); \ - git -C "$(srcdir)" config --add remote.github.fetch +refs/notes/$(COMMITS_NOTES):refs/notes/$(COMMITS_NOTES) - $(eval REMOTE_GITHUB_URL := $(GITHUB_RUBY_URL)) \ + $(if $(REMOTE_GITHUB_URL),, + echo adding $(GITHUB_RUBY_URL) as remote github + $(GIT) -C "$(srcdir)" remote add github $(GITHUB_RUBY_URL) + $(GIT) -C "$(srcdir)" config --add remote.github.fetch +refs/notes/$(COMMITS_NOTES):refs/notes/$(COMMITS_NOTES) + $(eval REMOTE_GITHUB_URL := $(GITHUB_RUBY_URL)) ) - $(if $(git -C "$(srcdir)" rev-parse "github/pull/$(1)/head" -- 2> /dev/null), \ - git -C "$(srcdir)" branch -f "gh-$(1)" "github/pull/$(1)/head", \ - git -C "$(srcdir)" fetch -f github "pull/$(1)/head:gh-$(1)" \ + $(if $(shell $(GIT) -C "$(srcdir)" rev-parse "github/pull/$(1)/head" -- 2> /dev/null), + $(GIT) -C "$(srcdir)" branch -f "gh-$(1)" "github/pull/$(1)/head", + $(GIT) -C "$(srcdir)" fetch -f github "pull/$(1)/head:gh-$(1)" ) endef .PHONY: checkout-github checkout-github: fetch-github - git -C "$(srcdir)" checkout "gh-$(PR)" + $(GIT) -C "$(srcdir)" checkout "gh-$(PR)" .PHONY: update-github update-github: fetch-github @@ -224,31 +223,31 @@ update-github: fetch-github $(eval PR_BRANCH := $(word 2,$(PULL_REQUEST_FORK_BRANCH))) $(eval GITHUB_UPDATE_WORKTREE := $(shell mktemp -d "$(srcdir)/gh-$(PR)-XXXXXX")) - git -C "$(srcdir)" worktree add $(notdir $(GITHUB_UPDATE_WORKTREE)) "gh-$(PR)" - git -C "$(GITHUB_UPDATE_WORKTREE)" merge master --no-edit + $(GIT) -C "$(srcdir)" worktree add $(notdir $(GITHUB_UPDATE_WORKTREE)) "gh-$(PR)" + $(GIT) -C "$(GITHUB_UPDATE_WORKTREE)" merge master --no-edit @$(BASERUBY) -e 'print "Are you sure to push this to PR=$(PR)? [Y/n]: "; exit(gets.chomp != "n")' - git -C "$(srcdir)" remote add fork-$(PR) git@github.com:$(FORK_REPO).git - git -C "$(GITHUB_UPDATE_WORKTREE)" push fork-$(PR) gh-$(PR):$(PR_BRANCH) - git -C "$(srcdir)" remote rm fork-$(PR) - git -C "$(srcdir)" worktree remove $(notdir $(GITHUB_UPDATE_WORKTREE)) - git -C "$(srcdir)" branch -D gh-$(PR) + $(GIT) -C "$(srcdir)" remote add fork-$(PR) git@github.com:$(FORK_REPO).git + $(GIT) -C "$(GITHUB_UPDATE_WORKTREE)" push fork-$(PR) gh-$(PR):$(PR_BRANCH) + $(GIT) -C "$(srcdir)" remote rm fork-$(PR) + $(GIT) -C "$(srcdir)" worktree remove $(notdir $(GITHUB_UPDATE_WORKTREE)) + $(GIT) -C "$(srcdir)" branch -D gh-$(PR) .PHONY: pull-github pull-github: fetch-github $(call pull-github,$(PR)) define pull-github - $(eval GITHUB_MERGE_BASE := $(shell git -C "$(srcdir)" log -1 --format=format:%H)) - $(eval GITHUB_MERGE_BRANCH := $(shell git -C "$(srcdir)" symbolic-ref --short HEAD)) + $(eval GITHUB_MERGE_BASE := $(shell $(GIT) -C "$(srcdir)" log -1 --format=format:%H)) + $(eval GITHUB_MERGE_BRANCH := $(shell $(GIT) -C "$(srcdir)" symbolic-ref --short HEAD)) $(eval GITHUB_MERGE_WORKTREE := $(shell mktemp -d "$(srcdir)/gh-$(1)-XXXXXX")) - git -C "$(srcdir)" worktree prune - git -C "$(srcdir)" worktree add $(notdir $(GITHUB_MERGE_WORKTREE)) "gh-$(1)" - git -C "$(GITHUB_MERGE_WORKTREE)" rebase $(GITHUB_MERGE_BRANCH) + $(GIT) -C "$(srcdir)" worktree prune + $(GIT) -C "$(srcdir)" worktree add $(notdir $(GITHUB_MERGE_WORKTREE)) "gh-$(1)" + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase $(GITHUB_MERGE_BRANCH) $(eval COMMIT_GPG_SIGN := $(COMMIT_GPG_SIGN)) $(if $(filter true,$(COMMIT_GPG_SIGN)), \ - git -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "git commit --amend --no-edit -S" "$(GITHUB_MERGE_BASE)"; \ + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "$(GIT) commit --amend --no-edit -S" "$(GITHUB_MERGE_BASE)"; \ ) - git -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "git notes add --message 'Merged: $(GITHUB_RUBY_URL)/pull/$(1)'" "$(GITHUB_MERGE_BASE)" + $(GIT) -C "$(GITHUB_MERGE_WORKTREE)" rebase --exec "$(GIT) notes add --message 'Merged: $(GITHUB_RUBY_URL)/pull/$(1)'" "$(GITHUB_MERGE_BASE)" endef .PHONY: fetch-github-% @@ -257,23 +256,38 @@ fetch-github-%: .PHONY: checkout-github-% checkout-github-%: fetch-github-% - git -C "$(srcdir)" checkout "gh-$*" + $(GIT) -C "$(srcdir)" checkout "gh-$*" .PHONY: pr-% pull-github-% pr-% pull-github-%: fetch-github-% $(call pull-github,$*) HELP_EXTRA_TASKS = \ - " checkout-github: checkout GitHub Pull Request [PR=1234]" \ - " pull-github: rebase GitHub Pull Request to new worktree [PR=1234]" \ - " update-github: merge master branch and push it to Pull Request [PR=1234]" \ + " checkout-github: checkout GitHub Pull Request [PR=1234]" \ + " pull-github: rebase GitHub Pull Request to new worktree [PR=1234]" \ + " update-github: merge master branch and push it to Pull Request [PR=1234]" \ "" extract-gems: $(HAVE_BASERUBY:yes=update-gems) -bundled-gems := $(shell sed '/^[ ]*\#/d;/^[ ]*$$/d;s/[ ][ ]*/-/;s/[ ].*//' $(srcdir)/gems/bundled_gems) - -update-gems: | $(patsubst %,gems/%.gem,$(bundled-gems)) +# 1. squeeze spaces +# 2. strip and skip comment/empty lines +# 3. "gem x.y.z URL xxxxxx" -> "gem|x.y.z|xxxxxx|URL" +# 4. "gem x.y.z URL" -> "gem-x.y.z" +bundled-gems := $(shell sed \ + -e 's/[ ][ ]*/ /g' \ + -e 's/^ //;/\#/d;s/ *$$//;/^$$/d' \ + $(if $(filter yes,$(HAVE_GIT)), \ + -e 's/^\(.*\) \(.*\) \(.*\) \(.*\)/\1|\2|\4|\3/' \ + ) \ + -e 's/ /-/;s/ .*//' \ + $(srcdir)/gems/bundled_gems) + +bundled-gems-rev := $(filter-out $(subst |,,$(bundled-gems)),$(bundled-gems)) +bundled-gems := $(filter-out $(bundled-gems-rev),$(bundled-gems)) + +update-gems: | $(patsubst %,$(srcdir)/gems/%.gem,$(bundled-gems)) +update-gems: | $(foreach g,$(bundled-gems-rev),$(srcdir)/gems/src/$(word 1,$(subst |, ,$(value g)))) test-bundler-precheck: | $(srcdir)/.bundle/cache @@ -281,7 +295,7 @@ $(srcdir)/.bundle/cache: $(MAKEDIRS) $(@D) $(CACHE_DIR) $(LN_S) ../.downloaded-cache $@ -gems/%.gem: +$(srcdir)/gems/%.gem: $(ECHO) Downloading bundled gem $*... $(Q) $(BASERUBY) -C "$(srcdir)" \ -I./tool -rdownloader \ @@ -292,13 +306,40 @@ gems/%.gem: -e 'File.unlink(*old) and' \ -e 'FileUtils.rm_rf(old.map{'"|n|"'n.chomp(".gem")})' -extract-gems: | $(patsubst %,.bundle/gems/%,$(bundled-gems)) +extract-gems: | $(patsubst %,$(srcdir)/.bundle/gems/%,$(bundled-gems)) +extract-gems: | $(foreach g,$(bundled-gems-rev), \ + $(srcdir)/.bundle/gems/$(word 1,$(subst |, ,$(value g)))-$(word 2,$(subst |, ,$(value g)))) -.bundle/gems/%: gems/%.gem | .bundle/gems +$(srcdir)/.bundle/gems/%: $(srcdir)/gems/%.gem | .bundle/gems $(ECHO) Extracting bundle gem $*... $(Q) $(BASERUBY) -C "$(srcdir)" \ - -Itool -rgem-unpack \ - -e 'Gem.unpack("gems/$(@F).gem", ".bundle")' + -Itool/lib -rbundled_gem \ + -e 'BundledGem.unpack("gems/$(@F).gem", ".bundle")' + +define copy-gem +$(srcdir)/gems/src/$(1): | $(srcdir)/gems/src + $(ECHO) Cloning $(4) + $(Q) $(GIT) clone $(4) $$(@) + +$(srcdir)/.bundle/gems/$(1)-$(2): | $(srcdir)/gems/src/$(1) .bundle/gems + $(ECHO) Copying $(1)@$(3) to $$(@F) + $(Q) $(CHDIR) "$(srcdir)/gems/src/$(1)" && \ + $(GIT) fetch origin $(3) && \ + $(GIT) checkout --detach $(3) && \ + : + $(Q) $(BASERUBY) -C "$(srcdir)" \ + -Itool/lib -rbundled_gem \ + -e 'BundledGem.copy("gems/src/$(1)/$(1).gemspec", ".bundle")' + +endef +define copy-gem-0 +$(call copy-gem,$(word 1,$(1)),$(word 2,$(1)),$(word 3,$(1)),$(word 4,$(1))) +endef + +$(foreach g,$(bundled-gems-rev),$(eval $(call copy-gem-0,$(subst |, ,$(value g))))) + +$(srcdir)/gems/src: + $(MAKEDIRS) $@ $(srcdir)/.bundle/gems: $(MAKEDIRS) $@ @@ -351,7 +392,7 @@ REVISION_IN_HEADER := none REVISION_LATEST := update else REVISION_IN_HEADER := $(shell sed -n 's/^\#define RUBY_FULL_REVISION "\(.*\)"/\1/p' $(srcdir)/revision.h 2>/dev/null) -REVISION_LATEST := $(shell $(CHDIR) $(srcdir) && git log -1 --format=%H 2>/dev/null) +REVISION_LATEST := $(shell $(CHDIR) $(srcdir) && $(GIT) log -1 --format=%H 2>/dev/null) endif ifneq ($(REVISION_IN_HEADER),$(REVISION_LATEST)) # GNU make treat the target as unmodified when its dependents get @@ -403,19 +444,19 @@ endif update-deps: $(eval update_deps := $(shell date +update-deps-%Y%m%d)) $(eval deps_dir := $(shell mktemp -d)/$(update_deps)) - $(eval GIT_DIR := $(shell git -C $(srcdir) rev-parse --absolute-git-dir)) - git --git-dir=$(GIT_DIR) worktree add $(deps_dir) + $(eval GIT_DIR := $(shell $(GIT) -C $(srcdir) rev-parse --absolute-git-dir)) + $(GIT) --git-dir=$(GIT_DIR) worktree add $(deps_dir) cp $(tooldir)/config.guess $(tooldir)/config.sub $(deps_dir)/tool [ -f config.status ] && cp config.status $(deps_dir) cd $(deps_dir) && autoconf && \ exec ./configure -q -C --enable-load-relative --disable-install-doc --disable-rubygems 'optflags=-O0' 'debugflags=-save-temps=obj -g' $(RUNRUBY) -C $(deps_dir) tool/update-deps --fix - git -C $(deps_dir) diff --no-ext-diff --ignore-submodules --exit-code || \ - git -C $(deps_dir) commit --all --message='Update dependencies' - git --git-dir=$(GIT_DIR) worktree remove $(deps_dir) + $(GIT) -C $(deps_dir) diff --no-ext-diff --ignore-submodules --exit-code || \ + $(GIT) -C $(deps_dir) commit --all --message='Update dependencies' + $(GIT) --git-dir=$(GIT_DIR) worktree remove $(deps_dir) $(RMDIR) $(dir $(deps_dir)) - git --git-dir=$(GIT_DIR) merge --no-edit --ff-only $(update_deps) - git --git-dir=$(GIT_DIR) branch --delete $(update_deps) + $(GIT) --git-dir=$(GIT_DIR) merge --no-edit --ff-only $(update_deps) + $(GIT) --git-dir=$(GIT_DIR) branch --delete $(update_deps) # order-only-prerequisites doesn't work for $(RUBYSPEC_CAPIEXT) # because the same named directory exists in the source tree. diff --git a/doc/contributing/documentation_guide.md b/doc/contributing/documentation_guide.md index f01184180903ce..df677477103ece 100644 --- a/doc/contributing/documentation_guide.md +++ b/doc/contributing/documentation_guide.md @@ -376,12 +376,22 @@ Mention aliases in the form In some cases, it is useful to document which methods are related to the current method. For example, documentation for `Hash#[]` might mention `Hash#fetch` as a related method, and `Hash#merge` might mention -`Hash#merge!` as a related method. Consider which methods may be related -to the current method, and if you think the reader would benefit it, -at the end of the method documentation, add a line starting with -"Related: " (e.g. "Related: #fetch"). Don't list more than three -related methods. If you think more than three methods are related, -pick the three you think are most important and list those three. +`Hash#merge!` as a related method. + +- Consider which methods may be related + to the current method, and if you think the reader would benefit it, + at the end of the method documentation, add a line starting with + "Related: " (e.g. "Related: #fetch."). +- Don't list more than three related methods. + If you think more than three methods are related, + list the three you think are most important. +- Consider adding: + + - A phrase suggesting how the related method is similar to, + or different from,the current method. + See an example at Time#getutc. + - Example code that illustrates the similarities and differences. + See examples at Time#ctime, Time#inspect, Time#to_s. ### Methods Accepting Multiple Argument Types diff --git a/doc/date/calendars.rdoc b/doc/date/calendars.rdoc new file mode 100644 index 00000000000000..b8690841b1d45c --- /dev/null +++ b/doc/date/calendars.rdoc @@ -0,0 +1,62 @@ +== Julian and Gregorian Calendars + +The difference between the +{Julian calendar}[https://en.wikipedia.org/wiki/Julian_calendar] +and the +{Gregorian calendar}[https://en.wikipedia.org/wiki/Gregorian_calendar] +may matter to your program if it uses dates before the switchovers. + +- October 15, 1582. +- September 14, 1752. + +A date will be different in the two calendars, in general. + +=== Different switchover dates + +The reasons for the difference are religious/political histories. + +- On October 15, 1582, several countries changed + from the Julian calendar to the Gregorian calendar; + these included Italy, Poland, Portugal, and Spain. + Other contries in the Western world retained the Julian calendar. +- On September 14, 1752, most of the British empire + changed from the Julian calendar to the Gregorian calendar. + +When your code uses a date before these switchover dates, +it will matter whether it considers the switchover date +to be the earlier date or the later date (or neither). + +See also {a concrete example here}[rdoc-ref:DateTime@When+should+you+use+DateTime+and+when+should+you+use+Time-3F]. + +=== Argument +start+ + +Certain methods in class \Date handle differences in the +{Julian and Gregorian calendars}[rdoc-ref:calendars.rdoc@Julian+and+Gregorian+Calendars] +by accepting an optional argument +start+, whose value may be: + +- Date::ITALY (the default): the created date is Julian + if before October 15, 1582, Gregorian otherwise: + + d = Date.new(1582, 10, 15) + d.prev_day.julian? # => true + d.julian? # => false + d.gregorian? # => true + +- Date::ENGLAND: the created date is Julian if before September 14, 1752, + Gregorian otherwise: + + d = Date.new(1752, 9, 14, Date::ENGLAND) + d.prev_day.julian? # => true + d.julian? # => false + d.gregorian? # => true + +- Date::JULIAN: the created date is Julian regardless of its value: + + d = Date.new(1582, 10, 15, Date::JULIAN) + d.julian? # => true + +- Date::GREGORIAN: the created date is Gregorian regardless of its value: + + d = Date.new(1752, 9, 14, Date::GREGORIAN) + d.prev_day.gregorian? # => true + diff --git a/doc/mjit/mjit.md b/doc/mjit/mjit.md new file mode 100644 index 00000000000000..4d345a95ae61f1 --- /dev/null +++ b/doc/mjit/mjit.md @@ -0,0 +1,16 @@ +# MJIT + +Here are some tips that might be useful when you work on MJIT: + +## Always run make install + +Always run `make install` before running MJIT. It could easily cause a SEGV if you don't. +MJIT looks for the installed header for security reasons. + +## --mjit-debug vs --mjit-debug=-ggdb3 + +`--mjit-debug=[flags]` allows you to specify arbitrary flags while keeping other compiler flags like `-O3`, +which is useful for profiling benchmarks. + +`--mjit-debug` alone, on the other hand, disables `-O3` and adds debug flags. +If you're debugging MJIT, what you need to use is not `--mjit-debug=-ggdb3` but `--mjit-debug`. diff --git a/doc/rdoc/markup_reference.rb b/doc/rdoc/markup_reference.rb index 49ad996c2d22e0..66ec6786c07aa4 100644 --- a/doc/rdoc/markup_reference.rb +++ b/doc/rdoc/markup_reference.rb @@ -29,16 +29,37 @@ # see :nodoc:, :doc:, and :notnew. # - \RDoc directives in single-line comments; # see other {Directives}[rdoc-ref:RDoc::MarkupReference@Directives]. -# - The Ruby code itself; -# see {Documentation Derived from Ruby Code}[rdoc-ref:RDoc::MarkupReference@Documentation+Derived+from+Ruby+Code] +# - The Ruby code itself (but not from C code); +# see {Documentation Derived from Ruby Code}[rdoc-ref:RDoc::MarkupReference@Documentation+Derived+from+Ruby+Code]. # # == Markup in Comments # -# A single-line or multi-line comment that immediately precedes -# the definition of a class, module, method, alias, constant, or attribute -# becomes the documentation for that defined object. +# The treatment of markup in comments varies according to the type of file: # -# (\RDoc ignores other such comments that do not precede definitions.) +# - .rb (Ruby code file): markup is parsed from Ruby comments. +# - .c (C code file): markup is parsed from C comments. +# - .rdoc (RDoc text file): markup is parsed from the entire file. +# +# The comment associated with +# a Ruby class, module, method, alias, constant, or attribute +# becomes the documentation for that defined object: +# +# - In a Ruby file, that comment immediately precedes +# the definition of the object. +# - In a C file, that comment immediately precedes +# the function that implements a method, +# or otherwise immediately precedes the definition of the object. +# +# In either a Ruby or a C file, +# \RDoc ignores comments that do not precede object definitions. +# +# In an \RDoc file, the text is not associated with any code object, +# but may (depending on how the documentation is built), +# become a separate page. +# +# Almost all examples on this page are all RDoc-like; +# that is, they have no comment markers like Ruby # +# or C /* ... */. # # === Margins # @@ -51,23 +72,37 @@ # # === Blocks # -# It's convenient to think of markup input as a sequence of _blocks_, -# such as: -# -# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. -# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. -# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. -# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. -# - {Bullet lists}[rdoc-ref:RDoc::MarkupReference@Bullet+Lists]. -# - {Numbered lists}[rdoc-ref:RDoc::MarkupReference@Numbered+Lists]. -# - {Lettered lists}[rdoc-ref:RDoc::MarkupReference@Lettered+Lists]. -# - {Labeled lists}[rdoc-ref:RDoc::MarkupReference@Labeled+Lists]. -# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. -# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. -# - {Directives}[rdoc-ref:RDoc::MarkupReference@Directives]. -# -# All of these except paragraph blocks are distinguished by indentation, -# or by unusual initial or embedded characters. +# It's convenient to think of \RDoc markup input as a sequence of _blocks_ +# of various types (details at the links): +# +# - {Paragraph}[rdoc-ref:RDoc::MarkupReference@Paragraphs]: +# an ordinary paragraph. +# - {Verbatim text block}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]: +# a block of text to be rendered literally. +# - {Code block}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]: +# a verbatim text block containing Ruby code, +# to be rendered with code highlighting. +# - {Block quote}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]: +# a longish quoted passage, to be rendered with indentation +# instead of quote marks. +# - {List}[rdoc-ref:RDoc::MarkupReference@Lists]: items for +# a bullet list, numbered list, lettered list, or labeled list. +# - {Heading}[rdoc-ref:RDoc::MarkupReference@Headings]: +# a section heading. +# - {Horizontal rule}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]: +# a line across the rendered page. +# - {Directive}[rdoc-ref:RDoc::MarkupReference@Directives]: +# various special directions for the rendering. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]: +# text to be rendered in a special way. +# +# About the blocks: +# +# - Except for a paragraph, a block is distinguished by its indentation, +# or by unusual initial or embedded characters. +# - Any block may appear independently +# (that is, not nested in another block); +# some blocks may be nested, as detailed below. # # ==== Paragraphs # @@ -82,11 +117,11 @@ # # Example input: # -# # \RDoc produces HTML and command-line documentation for Ruby projects. -# # \RDoc includes the rdoc and ri tools for generating and displaying -# # documentation from the command-line. -# # -# # You'll love it. +# \RDoc produces HTML and command-line documentation for Ruby projects. +# \RDoc includes the rdoc and ri tools for generating and displaying +# documentation from the command-line. +# +# You'll love it. # # Rendered HTML: # >>> @@ -98,12 +133,13 @@ # # A paragraph may contain nested blocks, including: # -# - Verbatim text blocks. -# - Code blocks. -# - Block quotes. -# - Lists of any type. -# - Headings. -# - Horizontal rules. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. +# - {Lists}[rdoc-ref:RDoc::MarkupReference@Lists]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ==== Verbatim Text Blocks # @@ -118,15 +154,15 @@ # # Example input: # -# # This is not verbatim text. -# # -# # This is verbatim text. -# # Whitespace is honored. # See? -# # Whitespace is honored. # See? -# # -# # This is still the same verbatim text block. -# # -# # This is not verbatim text. +# This is not verbatim text. +# +# This is verbatim text. +# Whitespace is honored. # See? +# Whitespace is honored. # See? +# +# This is still the same verbatim text block. +# +# This is not verbatim text. # # Rendered HTML: # >>> @@ -140,6 +176,9 @@ # # This is not verbatim text. # +# A verbatim text block may not contain nested blocks of any kind +# -- it's verbatim. +# # ==== Code Blocks # # A special case of verbatim text is the code block, @@ -173,6 +212,9 @@ # Pro tip: If your indented Ruby code does not get highlighted, # it may contain a syntax error. # +# A code block may not contain nested blocks of any kind +# -- it's verbatim. +# # ==== Block Quotes # # You can use the characters >>> (unindented), @@ -181,6 +223,7 @@ # # Example input: # +# Here's a block quote: # >>> # Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer # commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. @@ -194,27 +237,30 @@ # Rendered HTML: # # >>> -# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer -# commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. -# Quisque turpis erat, pharetra eu dui at, sollicitudin accumsan nulla. +# Here's a block quote: +# >>> +# Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer +# commodo quam iaculis massa posuere, dictum fringilla justo pulvinar. +# Quisque turpis erat, pharetra eu dui at, sollicitudin accumsan nulla. +# +# Aenean congue ligula eu ligula molestie, eu pellentesque purus +# faucibus. In id leo non ligula condimentum lobortis. Duis vestibulum, +# diam in pellentesque aliquet, mi tellus placerat sapien, id euismod +# purus magna ut tortor. # -# Aenean congue ligula eu ligula molestie, eu pellentesque purus -# faucibus. In id leo non ligula condimentum lobortis. Duis vestibulum, -# diam in pellentesque aliquet, mi tellus placerat sapien, id euismod -# purus magna ut tortor. +# Note that, unlike verbatim text, single newlines are not honored, +# but that a double newline begins a new paragraph in the block quote. # # A block quote may contain nested blocks, including: # # - Other block quotes. -# - Paragraphs. -# - Verbatim text blocks. -# - Code blocks. -# - Lists of any type. -# - Headings. -# - Horizontal rules. -# -# Note that, unlike verbatim text, single newlines are not honored, -# but that a double newline begins a new paragraph in the block quote. +# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Lists}[rdoc-ref:RDoc::MarkupReference@Lists]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ==== Lists # @@ -240,12 +286,13 @@ # A list item may contain nested blocks, including: # # - Other lists of any type. -# - Paragraphs. -# - Verbatim text blocks. -# - Code blocks. -# - Block quotes. -# - Headings. -# - Horizontal rules. +# - {Paragraphs}[rdoc-ref:RDoc::MarkupReference@Paragraphs]. +# - {Verbatim text blocks}[rdoc-ref:RDoc::MarkupReference@Verbatim+Text+Blocks]. +# - {Code blocks}[rdoc-ref:RDoc::MarkupReference@Code+Blocks]. +# - {Block quotes}[rdoc-ref:RDoc::MarkupReference@Block+Quotes]. +# - {Headings}[rdoc-ref:RDoc::MarkupReference@Headings]. +# - {Horizontal rules}[rdoc-ref:RDoc::MarkupReference@Horizontal+Rules]. +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ===== Bullet Lists # @@ -253,13 +300,13 @@ # # Example input: # -# # - An item. -# # - Another. -# # - An item spanning -# # multiple lines. -# # -# # * Yet another. -# # - Last one. +# - An item. +# - Another. +# - An item spanning +# multiple lines. +# +# * Yet another. +# - Last one. # # Rendered HTML: # >>> @@ -279,13 +326,13 @@ # # Example input: # -# # 100. An item. -# # 10. Another. -# # 1. An item spanning -# # multiple lines. -# # -# # 1. Yet another. -# # 1000. Last one. +# 100. An item. +# 10. Another. +# 1. An item spanning +# multiple lines. +# +# 1. Yet another. +# 1000. Last one. # # Rendered HTML: # >>> @@ -305,13 +352,13 @@ # # Example input: # -# # z. An item. -# # y. Another. -# # x. An item spanning -# # multiple lines. -# # -# # x. Yet another. -# # a. Last one. +# z. An item. +# y. Another. +# x. An item spanning +# multiple lines. +# +# x. Yet another. +# a. Last one. # # Rendered HTML: # >>> @@ -330,13 +377,13 @@ # # Example input: # -# # [foo] An item. -# # bat:: Another. -# # [bag] An item spanning -# # multiple lines. -# # -# # [bar baz] Yet another. -# # bam:: Last one. +# [foo] An item. +# bat:: Another. +# [bag] An item spanning +# multiple lines. +# +# [bar baz] Yet another. +# bam:: Last one. # # Rendered HTML: # >>> @@ -355,45 +402,44 @@ # # Examples: # -# # = Section 1 -# # == Section 1.1 -# # === Section 1.1.1 -# # === Section 1.1.2 -# # == Section 1.2 -# # = Section 2 -# # = Foo -# # == Bar -# # === Baz -# # ==== Bam -# # ===== Bat -# # ====== Bad -# # ============Still a Heading (Level 6) -# # \== Not a Heading +# = Section 1 +# == Section 1.1 +# === Section 1.1.1 +# === Section 1.1.2 +# == Section 1.2 +# = Section 2 +# = Foo +# == Bar +# === Baz +# ==== Bam +# ===== Bat +# ====== Bad +# ============Still a Heading (Level 6) +# \== Not a Heading +# +# A heading may contain only one type of nested block: +# +# - {Text Markup}[rdoc-ref:RDoc:MarkupReference@Text+Markup]. # # ==== Horizontal Rules # -# A horizontal rule begins with three or more hyphens. +# A horizontal rule consists of a line with three or more hyphens +# and nothing more. # # Example input: # -# # ------ -# # Stuff between. -# # -# # \--- Not a horizontal rule. -# # -# # -- Also not a horizontal rule. -# # -# # --- +# --- +# --- Not a horizontal rule. +# +# -- Also not a horizontal rule. +# --- # # Rendered HTML: # >>> -# ------ -# Stuff between. -# -# \--- Not a horizontal rule. +# --- +# --- Not a horizontal rule. # # -- Also not a horizontal rule. -# # --- # # ==== Directives @@ -584,83 +630,195 @@ # # For C code, the directive may appear in a stand-alone comment # -# === Text Markup +# ==== Text Markup # -# Text in a paragraph, list item (any type), or heading -# may have markup formatting. +# Text markup is metatext that affects HTML rendering: # -# ==== Italic +# - Typeface: italic, bold, monofont. +# - Character conversions: copyright, trademark, certain punctuation. +# - Links. +# - Escapes: marking text as "not markup." # -# A single word may be italicized by prefixed and suffixed underscores. +# ===== Typeface Markup # -# Examples: +# Typeface markup can specify that text is to be rendered +# as italic, bold, or monofont. # -# # _Word_ in paragraph. -# # - _Word_ in bullet list item. -# # 1. _Word_ in numbered list item. -# # a. _Word_ in lettered list item. -# # [_word_] _Word_ in labeled list item. -# # ====== _Word_ in heading +# Typeface markup may contain only one type of nested block: # -# Any text may be italicized via HTML tag +i+ or +em+. +# - More typeface markup: +# italic, bold, monofont. # -# Examples: +# ====== Italic # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# Text may be marked as italic via HTML tag or . # -# ==== Bold +# Example input: # -# A single word may be made bold by prefixed and suffixed asterisks. +# Italicized words in a paragraph. # -# Examples: +# >>> +# Italicized words in a block quote. # -# # *Word* in paragraph. -# # - *Word* in bullet list item. -# # 1. *Word* in numbered list item. -# # a. *Word* in lettered list item. -# # [*word*] *Word* in labeled list item. -# # ====== *Word* in heading +# - Italicized words in a list item. # -# Any text may be made bold via HTML tag +b+. +# ====== Italicized words in a Heading # -# Examples: +# Italicized passage containing *bold* and +monofont+. # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# Rendered HTML: +# >>> +# Italicized words in a paragraph. # -# ==== Monofont +# >>> +# Italicized words in a block quote. # -# A single word may be made monofont -- sometimes called "typewriter font" -- -# by prefixed and suffixed plus-signs. +# - Italicized words in a list item. # -# Examples: +# ====== Italicized words in a Heading # -# # +Word+ in paragraph. -# # - +Word+ in bullet list item. -# # 1. +Word+ in numbered list item. -# # a. +Word+ in lettered list item. -# # [+word+] +Word+ in labeled list item. -# # ====== +Word+ in heading +# Italicized passage containing *bold* and +monofont+. # -# Any text may be made monofont via HTML tag +tt+ or +code+. +# A single word may be italicized via a shorthand: +# prefixed and suffixed underscores. # -# Examples: +# Example input: +# +# _Italic_ in a paragraph. +# +# >>> +# _Italic_ in a block quote. +# +# - _Italic_ in a list item. +# +# ====== _Italic_ in a Heading +# +# Rendered HTML: +# >>> +# _Italic_ in a paragraph. # -# # Two words in paragraph. -# # - Two words in bullet list item. -# # 1. Two words in numbered list item. -# # a. Two words in lettered list item. -# # [Two words] Two words in labeled list item. -# # ====== Two words in heading +# >>> +# _Italic_ in a block quote. +# +# - _Italic_ in a list item. +# +# ====== _Italic_ in a Heading +# +# ====== Bold +# +# Text may be marked as bold via HTML tag . +# +# Example input: +# +# Bold words in a paragraph. +# +# >>> +# Bold words in a block quote. +# +# - Bold words in a list item. +# +# ====== Bold words in a Heading +# +# Bold passage containing _italics_ and +monofont+. +# +# Rendered HTML: +# +# >>> +# Bold words in a paragraph. +# +# >>> +# Bold words in a block quote. +# +# - Bold words in a list item. +# +# ====== Bold words in a Heading +# +# Bold passage containing _italics_ and +monofont+. +# +# A single word may be made bold via a shorthand: +# prefixed and suffixed asterisks. +# +# Example input: +# +# *Bold* in a paragraph. +# +# >>> +# *Bold* in a block quote. +# +# - *Bold* in a list item. +# +# ===== *Bold* in a Heading +# +# Rendered HTML: +# +# >>> +# *Bold* in a paragraph. +# +# >>> +# *Bold* in a block quote. +# +# - *Bold* in a list item. +# +# ===== *Bold* in a Heading +# +# ====== Monofont +# +# Text may be marked as monofont +# -- sometimes called 'typewriter font' -- +# via HTML tag or . +# +# Example input: +# +# Monofont words in a paragraph. +# +# >>> +# Monofont words in a block quote. +# +# - Monofont words in a list item. +# +# ====== Monofont words in heading +# +# Monofont passage containing _italics_ and *bold*. +# +# Rendered HTML: +# +# >>> +# Monofont words in a paragraph. +# +# >>> +# Monofont words in a block quote. +# +# - Monofont words in a list item. +# +# ====== Monofont words in heading +# +# Monofont passage containing _italics_ and *bold*. +# +# A single word may be made monofont by a shorthand: +# prefixed and suffixed plus-signs. +# +# Example input: +# +# +Monofont+ in a paragraph. +# +# >>> +# +Monofont+ in a block quote. +# +# - +Monofont+ in a list item. +# +# ====== +Monofont+ in a Heading +# +# Rendered HTML: +# +# >>> +# +Monofont+ in a paragraph. +# +# >>> +# +Monofont+ in a block quote. +# +# - +Monofont+ in a list item. +# +# ====== +Monofont+ in a Heading # # ==== Character Conversions # @@ -708,7 +866,6 @@ # # - On-page: ::dummy_singleton_method links to ::dummy_singleton_method. # - Off-pageRDoc::TokenStream::to_html links to RDoc::TokenStream::to_html. -# to \RDoc::TokenStream::to_html. # # Note: Occasionally \RDoc is not linked to a method whose name # has only special characters. Check whether the links you were expecting @@ -885,6 +1042,93 @@ # # {rdoc-image:https://www.ruby-lang.org/images/header-ruby-logo@2x.png}[./Alias.html] # +# === Escaping Text +# +# Text that would otherwise be interpreted as markup +# can be "escaped," so that it is not interpreted as markup; +# the escape character is the backslash ('\\'). +# +# In a verbatim text block or a code block, +# the escape character is always preserved: +# +# Example input: +# +# This is not verbatim text. +# +# This is verbatim text, with an escape character \. +# +# This is not a code block. +# +# def foo +# 'String with an escape character.' +# end +# +# Rendered HTML: +# +# >>> +# This is not verbatim text. +# +# This is verbatim text, with an escape character \. +# +# This is not a code block. +# +# def foo +# 'This is a code block with an escape character \.' +# end +# +# In typeface markup (italic, bold, or monofont), +# an escape character is preserved unless it is immediately +# followed by nested typeface markup. +# +# Example input: +# +# This list is about escapes; it contains: +# +# - Monofont text with unescaped nested _italic_. +# - Monofont text with escaped nested \_italic_. +# - Monofont text with an escape character \. +# +# Rendered HTML: +# +# >>> +# This list is about escapes; it contains: +# +# - Monofont text with unescaped nested _italic_. +# - Monofont text with escaped nested \_italic_. +# - Monofont text with an escape character \ . +# +# In other text-bearing blocks +# (paragraphs, block quotes, list items, headings): +# +# - A single escape character immediately followed by markup +# escapes the markup. +# - A single escape character followed by whitespace is preserved. +# - A single escape character anywhere else is ignored. +# - A double escape character is rendered as a single backslash. +# +# Example input: +# +# This list is about escapes; it contains: +# +# - An unescaped class name, RDoc, that will become a link. +# - An escaped class name, \RDoc, that will not become a link. +# - An escape character followed by whitespace \ . +# - An escape character \that is ignored. +# - A double escape character \\ that is rendered +# as a single backslash. +# +# Rendered HTML: +# +# >>> +# This list is about escapes; it contains: +# +# - An unescaped class name, RDoc, that will become a link. +# - An escaped class name, \RDoc, that will not become a link. +# - An escape character followed by whitespace \ . +# - An escape character \that is ignored. +# - A double escape character \\ that is rendered +# as a single backslash. +# # == Documentation Derived from Ruby Code # # [Class] @@ -977,10 +1221,10 @@ def dummy_instance_method(foo, bar); end; # # Here is the :call-seq: directive given for the method: # - # # :call-seq: - # # call_seq_directive(foo, bar) - # # Can be anything -> bar - # # Also anything more -> baz or bat + # :call-seq: + # call_seq_directive(foo, bar) + # Can be anything -> bar + # Also anything more -> baz or bat # def call_seq_directive nil diff --git a/doc/strftime_formatting.rdoc b/doc/strftime_formatting.rdoc index 6c27fa6a2379ec..30a629bf683083 100644 --- a/doc/strftime_formatting.rdoc +++ b/doc/strftime_formatting.rdoc @@ -294,6 +294,124 @@ longhand specifier. DateTime.now.strftime('%a %b %e %H:%M:%S %Z %Y') # => "Wed Jun 29 08:32:18 -05:00 2022" +=== Flags + +Flags may affect certain formatting specifications. + +Multiple flags may be given with a single conversion specified; +order does not matter. + +==== Padding Flags + +- 0 - Pad with zeroes: + + Time.new(10).strftime('%0Y') # => "0010" + +- _ - Pad with blanks: + + Time.new(10).strftime('%_Y') # => " 10" + +- - - Don't pad: + + Time.new(10).strftime('%-Y') # => "10" + +==== Casing Flags + +- ^ - Upcase result: + + Time.new(2022, 1).strftime('%B') # => "January" # No casing flag. + Time.new(2022, 1).strftime('%^B') # => "JANUARY" + +- # - Swapcase result: + + Time.now.strftime('%p') # => "AM" + Time.now.strftime('%^p') # => "AM" + Time.now.strftime('%#p') # => "am" + +==== Timezone Flags + +- : - Put timezone as colon-separated hours and minutes: + + Time.now.strftime('%:z') # => "-05:00" + +- :: - Put timezone as colon-separated hours, minutes, and seconds: + + Time.now.strftime('%::z') # => "-05:00:00" + +=== Width Specifiers + +The integer width specifier gives a minimum width for the returned string: + + Time.new(2002).strftime('%Y') # => "2002" # No width specifier. + Time.new(2002).strftime('%10Y') # => "0000002002" + Time.new(2002, 12).strftime('%B') # => "December" # No width specifier. + Time.new(2002, 12).strftime('%10B') # => " December" + Time.new(2002, 12).strftime('%3B') # => "December" # Ignored if too small. + +== Specialized Format Strings + +Here are a few specialized format strings, +each based on an external standard. + +=== HTTP Format + +The HTTP date format is based on +{RFC 2616}[https://datatracker.ietf.org/doc/html/rfc2616], +and treats dates in the format '%a, %d %b %Y %T GMT': + + d = Date.new(2001, 2, 3) # => # + # Return HTTP-formatted string. + httpdate = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" + # Return new date parsed from HTTP-formatted string. + Date.httpdate(httpdate) # => # + # Return hash parsed from HTTP-formatted string. + Date._httpdate(httpdate) + # => {:wday=>6, :mday=>3, :mon=>2, :year=>2001, :hour=>0, :min=>0, :sec=>0, :zone=>"GMT", :offset=>0} + +=== RFC 3339 Format + +The RFC 3339 date format is based on +{RFC 3339}[https://datatracker.ietf.org/doc/html/rfc3339]: + + d = Date.new(2001, 2, 3) # => # + # Return 3339-formatted string. + rfc3339 = d.rfc3339 # => "2001-02-03T00:00:00+00:00" + # Return new date parsed from 3339-formatted string. + Date.rfc3339(rfc3339) # => # + # Return hash parsed from 3339-formatted string. + Date._rfc3339(rfc3339) + # => {:year=>2001, :mon=>2, :mday=>3, :hour=>0, :min=>0, :sec=>0, :zone=>"+00:00", :offset=>0} + +=== RFC 2822 Format + +The RFC 2822 date format is based on +{RFC 2822}[https://datatracker.ietf.org/doc/html/rfc2822], +and treats dates in the format '%a, %-d %b %Y %T %z']: + + d = Date.new(2001, 2, 3) # => # + # Return 2822-formatted string. + rfc2822 = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" + # Return new date parsed from 2822-formatted string. + Date.rfc2822(rfc2822) # => # + # Return hash parsed from 2822-formatted string. + Date._rfc2822(rfc2822) + # => {:wday=>6, :mday=>3, :mon=>2, :year=>2001, :hour=>0, :min=>0, :sec=>0, :zone=>"+0000", :offset=>0} + +=== JIS X 0301 Format + +The JIS X 0301 format includes the +{Japanese era name}[https://en.wikipedia.org/wiki/Japanese_era_name], +and treats dates in the format '%Y-%m-%d' +with the first letter of the romanized era name prefixed: + + d = Date.new(2001, 2, 3) # => # + # Return 0301-formatted string. + jisx0301 = d.jisx0301 # => "H13.02.03" + # Return new date parsed from 0301-formatted string. + Date.jisx0301(jisx0301) # => # + # Return hash parsed from 0301-formatted string. + Date._jisx0301(jisx0301) # => {:year=>2001, :mon=>2, :mday=>3} + === ISO 8601 Format Specifications This section shows format specifications that are compatible with @@ -407,57 +525,3 @@ separated by the letter +T+. For the relevant +strftime+ formats, see {Dates}[rdoc-ref:strftime_formatting.rdoc@Dates] and {Times}[rdoc-ref:strftime_formatting.rdoc@Times] above. - -=== Flags - -Flags may affect certain formatting specifications. - -Multiple flags may be given with a single conversion specified; -order does not matter. - -==== Padding Flags - -- 0 - Pad with zeroes: - - Time.new(10).strftime('%0Y') # => "0010" - -- _ - Pad with blanks: - - Time.new(10).strftime('%_Y') # => " 10" - -- - - Don't pad: - - Time.new(10).strftime('%-Y') # => "10" - -==== Casing Flags - -- ^ - Upcase result: - - Time.new(2022, 1).strftime('%B') # => "January" # No casing flag. - Time.new(2022, 1).strftime('%^B') # => "JANUARY" - -- # - Swapcase result: - - Time.now.strftime('%p') # => "AM" - Time.now.strftime('%^p') # => "AM" - Time.now.strftime('%#p') # => "am" - -==== Timezone Flags - -- : - Put timezone as colon-separated hours and minutes: - - Time.now.strftime('%:z') # => "-05:00" - -- :: - Put timezone as colon-separated hours, minutes, and seconds: - - Time.now.strftime('%::z') # => "-05:00:00" - -=== Width Specifiers - -The integer width specifier gives a minimum width for the returned string: - - Time.new(2002).strftime('%Y') # => "2002" # No width specifier. - Time.new(2002).strftime('%10Y') # => "0000002002" - Time.new(2002, 12).strftime('%B') # => "December" # No width specifier. - Time.new(2002, 12).strftime('%10B') # => " December" - Time.new(2002, 12).strftime('%3B') # => "December" # Ignored if too small. diff --git a/doc/time/in.rdoc b/doc/time/in.rdoc deleted file mode 100644 index f47db76a353f4b..00000000000000 --- a/doc/time/in.rdoc +++ /dev/null @@ -1,7 +0,0 @@ -- in: zone: a timezone _zone_, which may be: - - A string offset from UTC. - - A single letter offset from UTC, in the range 'A'..'Z', - 'J' (the so-called military timezone) excluded. - - An integer number of seconds. - - A timezone object; - see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. diff --git a/doc/time/mon-min.rdoc b/doc/time/mon-min.rdoc deleted file mode 100644 index 5bd430c74a75c2..00000000000000 --- a/doc/time/mon-min.rdoc +++ /dev/null @@ -1,8 +0,0 @@ -- +month+: a month value, which may be: - - An integer month in the range 1..12. - - A 3-character string that matches regular expression - /jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec/i. -- +day+: an integer day in the range 1..31 - (less than 31 for some months). -- +hour+: an integer hour in the range 0..23. -- +min+: an integer minute in the range 0..59. diff --git a/doc/time/msec.rdoc b/doc/time/msec.rdoc deleted file mode 100644 index ce5d1e614516d3..00000000000000 --- a/doc/time/msec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +msec+ is the number of milliseconds (Integer, Float, or Rational) - in the range 0..1000. diff --git a/doc/time/nsec.rdoc b/doc/time/nsec.rdoc deleted file mode 100644 index a2dfe2d6086304..00000000000000 --- a/doc/time/nsec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +nsec+ is the number of nanoseconds (Integer, Float, or Rational) - in the range 0..1000000000. diff --git a/doc/time/sec.rdoc b/doc/time/sec.rdoc deleted file mode 100644 index 049c712110d89a..00000000000000 --- a/doc/time/sec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +sec+ is the number of seconds (Integer, Float, or Rational) - in the range 0..60. diff --git a/doc/time/sec_i.rdoc b/doc/time/sec_i.rdoc deleted file mode 100644 index 99c8eddc652005..00000000000000 --- a/doc/time/sec_i.rdoc +++ /dev/null @@ -1 +0,0 @@ -- +isec_i+ is the integer number of seconds in the range 0..60. diff --git a/doc/time/usec.rdoc b/doc/time/usec.rdoc deleted file mode 100644 index bb5a46419a18af..00000000000000 --- a/doc/time/usec.rdoc +++ /dev/null @@ -1,2 +0,0 @@ -- +usec+ is the number of microseconds (Integer, Float, or Rational) - in the range 0..1000000. diff --git a/doc/time/year.rdoc b/doc/time/year.rdoc deleted file mode 100644 index 2222b830d76339..00000000000000 --- a/doc/time/year.rdoc +++ /dev/null @@ -1 +0,0 @@ -- +year+: an integer year. diff --git a/doc/time/zone_and_in.rdoc b/doc/time/zone_and_in.rdoc deleted file mode 100644 index e09e22874beca1..00000000000000 --- a/doc/time/zone_and_in.rdoc +++ /dev/null @@ -1,8 +0,0 @@ -- +zone+: a timezone, which may be: - - A string offset from UTC. - - A single letter offset from UTC, in the range 'A'..'Z', - 'J' (the so-called military timezone) excluded. - - An integer number of seconds. - - A timezone object; - see {Timezone Argument}[#class-Time-label-Timezone+Argument] for details. -- in: zone: a timezone _zone_, which may be as above. diff --git a/doc/timezones.rdoc b/doc/timezones.rdoc new file mode 100644 index 00000000000000..c3aae88fdec809 --- /dev/null +++ b/doc/timezones.rdoc @@ -0,0 +1,108 @@ +== Timezones + +=== Timezone Specifiers + +Certain \Time methods accept arguments that specify timezones: + +- Time.at: keyword argument +in:+. +- Time.new: positional argument +zone+ or keyword argument +in:+. +- Time.now: keyword argument +in:+. +- Time#getlocal: positional argument +zone+. +- Time#localtime: positional argument +zone+. + +The value given with any of these must be one of the following +(each detailed below): + +- {Hours/minutes offset}[rdoc-ref:timezones.rdoc@Hours-2FMinutes+Offsets]. +- {Single-letter offset}[rdoc-ref:timezones.rdoc@Single-Letter+Offsets]. +- {Integer offset}[rdoc-ref:timezones.rdoc@Integer+Offsets]. +- {Timezone object}[rdoc-ref:timezones.rdoc@Timezone+Objects]. + +==== Hours/Minutes Offsets + +The zone value may be a string offset from UTC +in the form '+HH:MM' or '-HH:MM', +where: + +- +HH+ is the 2-digit hour in the range 0..23. +- +MM+ is the 2-digit minute in the range 0..59. + +Examples: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: '-23:59') # => 1999-12-31 20:16:01 -2359 + Time.at(t, in: '+23:59') # => 2000-01-02 20:14:01 +2359 + +==== Single-Letter Offsets + +The zone value may be a letter in the range 'A'..'I' +or 'K'..'Z'; +see {List of military time zones}[https://en.wikipedia.org/wiki/List_of_military_time_zones]: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: 'A') # => 2000-01-01 21:15:01 +0100 + Time.at(t, in: 'I') # => 2000-01-02 05:15:01 +0900 + Time.at(t, in: 'K') # => 2000-01-02 06:15:01 +1000 + Time.at(t, in: 'Y') # => 2000-01-01 08:15:01 -1200 + Time.at(t, in: 'Z') # => 2000-01-01 20:15:01 UTC + +==== \Integer Offsets + +The zone value may be an integer number of seconds +in the range -86399..86399: + + t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + Time.at(t, in: -86399) # => 1999-12-31 20:15:02 -235959 + Time.at(t, in: 86399) # => 2000-01-02 20:15:00 +235959 + +==== Timezone Objects + +In most cases, the zone value may be an object +responding to certain timezone methods. + +\Exceptions (timezone object not allowed): + +- Time.new with positional argument +zone+. +- Time.now with keyword argument +in:+. + +The timezone methods are: + +- +local_to_utc+: + + - Called when Time.new is invoked with +tz+ + as the value of positional argument +zone+ or keyword argument +in:+. + - Argument: a Time::tm object. + - Returns: a \Time-like object in the UTC timezone. + +- +utc_to_local+: + + - Called when Time.at or Time.now is invoked with +tz+ + as the value for keyword argument +in:+, + and when Time#getlocal or Time#localtime is called with +tz+ + as the value for positional argument +zone+. + - Argument: a Time::tm object. + - Returns: a \Time-like object in the local timezone. + +A custom timezone class may have these instance methods, +which will be called if defined: + +- +abbr+: + + - Called when Time#strftime is invoked with a format involving %Z. + - Argument: a Time::tm object. + - Returns: a string abbreviation for the timezone name. + +- +dst?+: + + - Called when Time.at or Time.now is invoked with +tz+ + as the value for keyword argument +in:+, + and when Time#getlocal or Time#localtime is called with +tz+ + as the value for positional argument +zone+. + - Argument: a Time::tm object. + - Returns: whether the time is daylight saving time. + +- +name+: + + - Called when Marshal.dump(t) is invoked + - Argument: none. + - Returns: the string name of the timezone. diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index fd45096beb128b..1eeb75824a950e 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -20,6 +20,7 @@ This project is open source and falls under the same license as CRuby. If you wish to learn more about the approach taken, here are some conference talks and publications: - RubyKaigi 2021 talk: [YJIT: Building a New JIT Compiler Inside CRuby](https://www.youtube.com/watch?v=PBVLf3yfMs8) - Blog post: [YJIT: Building a New JIT Compiler Inside CRuby](https://pointersgonewild.com/2021/06/02/yjit-building-a-new-jit-compiler-inside-cruby/) +- VMIL 2021 paper: [YJIT: A Basic Block Versioning JIT Compiler for CRuby](https://dl.acm.org/doi/10.1145/3486606.3486781) - MoreVMs 2021 talk: [YJIT: Building a New JIT Compiler Inside CRuby](https://www.youtube.com/watch?v=vucLAqv7qpc) - ECOOP 2016 talk: [Interprocedural Type Specialization of JavaScript Programs Without Type Analysis](https://www.youtube.com/watch?v=sRNBY7Ss97A) - ECOOP 2016 paper: [Interprocedural Type Specialization of JavaScript Programs Without Type Analysis](https://drops.dagstuhl.de/opus/volltexte/2016/6101/pdf/LIPIcs-ECOOP-2016-7.pdf) @@ -45,7 +46,7 @@ YJIT is a work in progress and as such may not yet be mature enough for mission- - No garbage collection for generated code. - Currently supports only macOS and Linux. -- Currently supports only x86-64 CPUs. +- Supports x86-64 and arm64/aarch64 CPUs only. Because there is no GC for generated code yet, your software could run out of executable memory if it is large enough. You can change how much executable memory is allocated using [YJIT's command-line options](#command-line-options). @@ -57,6 +58,7 @@ You will need to install: - A C compiler such as GCC or Clang - GNU Make and Autoconf - The Rust compiler `rustc` and Cargo (if you want to build in dev/debug mode) + - The Rust version must be [>= 1.58.1](../../yjit/Cargo.toml). To install the Rust build toolchain, we suggest following the [recommended installation method][rust-install]. Rust also provides first class [support][editor-tools] for many source code editors. @@ -308,9 +310,9 @@ You can use the Intel syntax for disassembly in LLDB, keeping it consistent with echo "settings set target.x86-disassembly-flavor intel" >> ~/.lldbinit ``` -## Running YJIT on M1 +## Running x86 YJIT on Apple's Rosetta -It is possible to run YJIT on an Apple M1 via Rosetta. You can find basic +For development purposes, it is possible to run x86 YJIT on an Apple M1 via Rosetta. You can find basic instructions below, but there are a few caveats listed further down. First, install Rosetta: @@ -343,10 +345,9 @@ $ rustup default stable-x86_64-apple-darwin While in your i386 shell, install Cargo and Homebrew, then hack away! -### M1 Caveats +### Rosetta Caveats 1. You must install a version of Homebrew for each architecture 2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install -3. `dev` won't work if you have i386 Homebrew installed on an M1 If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier. diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 9e763407f9ebce..95e082f5bd0b96 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -17,10 +17,16 @@ map_cr["0a"] = "0d" transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") + + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline") %> <%= transcode_generated_code %> +#define lf_newline universal_newline #define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 @@ -126,10 +132,24 @@ rb_cr_newline = { 0, 0, 0, 0 }; +static const rb_transcoder +rb_lf_newline = { + "", "lf_newline", lf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + 0, 0, 0, fun_so_universal_newline, + universal_newline_finish +}; + void Init_newline(void) { rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_cr_newline); + rb_register_transcoder(&rb_lf_newline); } diff --git a/enum.c b/enum.c index e3dc177ba17729..97215b627e4c93 100644 --- a/enum.c +++ b/enum.c @@ -4808,7 +4808,7 @@ enum_compact(VALUE obj) * - #grep_v: Returns elements selected by a given object * or objects returned by a given block. * - #reduce, #inject: Returns the object formed by combining all elements. - * - #sum: Returns the sum of the elements, using method +++. + * - #sum: Returns the sum of the elements, using method +. * - #zip: Combines each element with elements from other enumerables; * returns the n-tuples or calls the block with each. * - #cycle: Calls the block with each element, cycling repeatedly. diff --git a/enumerator.c b/enumerator.c index ce2eacbd2a8d8a..2c9858cda6d0f5 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1796,7 +1796,8 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) * Expands +lazy+ enumerator to an array. * See Enumerable#to_a. */ -static VALUE lazy_to_a(VALUE self) +static VALUE +lazy_to_a(VALUE self) { } #endif @@ -2753,7 +2754,8 @@ lazy_with_index(int argc, VALUE *argv, VALUE obj) * * Like Enumerable#chunk, but chains operation to be lazy-evaluated. */ -static VALUE lazy_chunk(VALUE self) +static VALUE +lazy_chunk(VALUE self) { } @@ -2763,7 +2765,8 @@ static VALUE lazy_chunk(VALUE self) * * Like Enumerable#chunk_while, but chains operation to be lazy-evaluated. */ -static VALUE lazy_chunk_while(VALUE self) +static VALUE +lazy_chunk_while(VALUE self) { } @@ -2774,7 +2777,8 @@ static VALUE lazy_chunk_while(VALUE self) * * Like Enumerable#slice_after, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_after(VALUE self) +static VALUE +lazy_slice_after(VALUE self) { } @@ -2785,7 +2789,8 @@ static VALUE lazy_slice_after(VALUE self) * * Like Enumerable#slice_before, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_before(VALUE self) +static VALUE +lazy_slice_before(VALUE self) { } @@ -2795,7 +2800,8 @@ static VALUE lazy_slice_before(VALUE self) * * Like Enumerable#slice_when, but chains operation to be lazy-evaluated. */ -static VALUE lazy_slice_when(VALUE self) +static VALUE +lazy_slice_when(VALUE self) { } # endif @@ -3562,7 +3568,8 @@ product_each(VALUE obj, struct product_state *pstate) VALUE eobj = RARRAY_AREF(enums, pstate->index); rb_block_call(eobj, id_each_entry, 0, NULL, product_each_i, (VALUE)pstate); - } else { + } + else { rb_funcallv(pstate->block, id_call, pstate->argc, pstate->argv); } @@ -3677,7 +3684,8 @@ enumerator_s_product(VALUE klass, VALUE enums) if (rb_block_given_p()) { return enum_product_run(obj, rb_block_proc()); - } else { + } + else { return obj; } } @@ -3794,6 +3802,13 @@ rb_arithmetic_sequence_beg_len_step(VALUE obj, long *begp, long *lenp, long *ste *stepp = step; if (step < 0) { + if (aseq.exclude_end && !NIL_P(aseq.end)) { + /* Handle exclusion before range reversal */ + aseq.end = LONG2NUM(NUM2LONG(aseq.end) + 1); + + /* Don't exclude the previous beginning */ + aseq.exclude_end = 0; + } VALUE tmp = aseq.begin; aseq.begin = aseq.end; aseq.end = tmp; diff --git a/error.c b/error.c index 21a532a8faedc4..07fa04627e8d5d 100644 --- a/error.c +++ b/error.c @@ -357,47 +357,42 @@ warn_vsprintf(rb_encoding *enc, const char *file, int line, const char *fmt, va_ return rb_str_cat2(str, "\n"); } +#define with_warn_vsprintf(file, line, fmt) \ + VALUE str; \ + va_list args; \ + va_start(args, fmt); \ + str = warn_vsprintf(NULL, file, line, fmt, args); \ + va_end(args); + void rb_compile_warn(const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (NIL_P(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_write_warning_str(str); + if (!NIL_P(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_write_warning_str(str); + } + } } /* rb_compile_warning() reports only in verbose mode */ void rb_compile_warning(const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (!RTEST(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_write_warning_str(str); + if (RTEST(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_write_warning_str(str); + } + } } void rb_category_compile_warn(rb_warning_category_t category, const char *file, int line, const char *fmt, ...) { - VALUE str; - va_list args; - - if (NIL_P(ruby_verbose)) return; - - va_start(args, fmt); - str = warn_vsprintf(NULL, file, line, fmt, args); - va_end(args); - rb_warn_category(str, rb_warning_category_to_name(category)); + if (!NIL_P(ruby_verbose)) { + with_warn_vsprintf(file, line, fmt) { + rb_warn_category(str, rb_warning_category_to_name(category)); + } + } } RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) diff --git a/ext/-test-/abi/extconf.rb b/ext/-test-/abi/extconf.rb index d786b15db98c7f..3b090b75531d90 100644 --- a/ext/-test-/abi/extconf.rb +++ b/ext/-test-/abi/extconf.rb @@ -1,3 +1,4 @@ # frozen_string_literal: false +return unless RUBY_PATCHLEVEL < 0 require_relative "../auto_ext.rb" auto_ext(inc: true) diff --git a/ext/-test-/arith_seq/beg_len_step/beg_len_step.c b/ext/-test-/arith_seq/beg_len_step/beg_len_step.c new file mode 100644 index 00000000000000..40c8cbee82b9fa --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/beg_len_step.c @@ -0,0 +1,19 @@ +#include "ruby/ruby.h" + +static VALUE +arith_seq_s_beg_len_step(VALUE mod, VALUE obj, VALUE len, VALUE err) +{ + VALUE r; + long beg, len2, step; + + r = rb_arithmetic_sequence_beg_len_step(obj, &beg, &len2, &step, NUM2LONG(len), NUM2INT(err)); + + return rb_ary_new_from_args(4, r, LONG2NUM(beg), LONG2NUM(len2), LONG2NUM(step)); +} + +void +Init_beg_len_step(void) +{ + VALUE cArithSeq = rb_path2class("Enumerator::ArithmeticSequence"); + rb_define_singleton_method(cArithSeq, "__beg_len_step__", arith_seq_s_beg_len_step, 3); +} diff --git a/ext/-test-/arith_seq/beg_len_step/depend b/ext/-test-/arith_seq/beg_len_step/depend new file mode 100644 index 00000000000000..36a2c4c71b1214 --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/depend @@ -0,0 +1,161 @@ +# AUTOGENERATED DEPENDENCIES START +beg_len_step.o: $(RUBY_EXTCONF_H) +beg_len_step.o: $(arch_hdrdir)/ruby/config.h +beg_len_step.o: $(hdrdir)/ruby/assert.h +beg_len_step.o: $(hdrdir)/ruby/backward.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/assume.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/attributes.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/bool.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/inttypes.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/limits.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/long_long.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/stdalign.h +beg_len_step.o: $(hdrdir)/ruby/backward/2/stdarg.h +beg_len_step.o: $(hdrdir)/ruby/defines.h +beg_len_step.o: $(hdrdir)/ruby/intern.h +beg_len_step.o: $(hdrdir)/ruby/internal/abi.h +beg_len_step.o: $(hdrdir)/ruby/internal/anyargs.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/char.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/double.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/int.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/long.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/short.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +beg_len_step.o: $(hdrdir)/ruby/internal/assume.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/artificial.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/cold.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/const.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/constexpr.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/deprecated.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/forceinline.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/format.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noalias.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noexcept.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noinline.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/nonnull.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/noreturn.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/pure.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/restrict.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/warning.h +beg_len_step.o: $(hdrdir)/ruby/internal/attr/weakref.h +beg_len_step.o: $(hdrdir)/ruby/internal/cast.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +beg_len_step.o: $(hdrdir)/ruby/internal/compiler_since.h +beg_len_step.o: $(hdrdir)/ruby/internal/config.h +beg_len_step.o: $(hdrdir)/ruby/internal/constant_p.h +beg_len_step.o: $(hdrdir)/ruby/internal/core.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rarray.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rbasic.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rbignum.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rclass.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rdata.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rfile.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rhash.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/robject.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rregexp.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rstring.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rstruct.h +beg_len_step.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +beg_len_step.o: $(hdrdir)/ruby/internal/ctype.h +beg_len_step.o: $(hdrdir)/ruby/internal/dllexport.h +beg_len_step.o: $(hdrdir)/ruby/internal/dosish.h +beg_len_step.o: $(hdrdir)/ruby/internal/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/eval.h +beg_len_step.o: $(hdrdir)/ruby/internal/event.h +beg_len_step.o: $(hdrdir)/ruby/internal/fl_type.h +beg_len_step.o: $(hdrdir)/ruby/internal/gc.h +beg_len_step.o: $(hdrdir)/ruby/internal/glob.h +beg_len_step.o: $(hdrdir)/ruby/internal/globals.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/builtin.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/c_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/extension.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/feature.h +beg_len_step.o: $(hdrdir)/ruby/internal/has/warning.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/array.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/bignum.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/class.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/compar.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/complex.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/cont.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/dir.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/enum.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/enumerator.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/error.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/eval.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/file.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/gc.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/hash.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/io.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/load.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/marshal.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/numeric.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/object.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/parse.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/proc.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/process.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/random.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/range.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/rational.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/re.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/ruby.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/select.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/signal.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/sprintf.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/string.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/struct.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/thread.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/time.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/variable.h +beg_len_step.o: $(hdrdir)/ruby/internal/intern/vm.h +beg_len_step.o: $(hdrdir)/ruby/internal/interpreter.h +beg_len_step.o: $(hdrdir)/ruby/internal/iterator.h +beg_len_step.o: $(hdrdir)/ruby/internal/memory.h +beg_len_step.o: $(hdrdir)/ruby/internal/method.h +beg_len_step.o: $(hdrdir)/ruby/internal/module.h +beg_len_step.o: $(hdrdir)/ruby/internal/newobj.h +beg_len_step.o: $(hdrdir)/ruby/internal/rgengc.h +beg_len_step.o: $(hdrdir)/ruby/internal/scan_args.h +beg_len_step.o: $(hdrdir)/ruby/internal/special_consts.h +beg_len_step.o: $(hdrdir)/ruby/internal/static_assert.h +beg_len_step.o: $(hdrdir)/ruby/internal/stdalign.h +beg_len_step.o: $(hdrdir)/ruby/internal/stdbool.h +beg_len_step.o: $(hdrdir)/ruby/internal/symbol.h +beg_len_step.o: $(hdrdir)/ruby/internal/value.h +beg_len_step.o: $(hdrdir)/ruby/internal/value_type.h +beg_len_step.o: $(hdrdir)/ruby/internal/variable.h +beg_len_step.o: $(hdrdir)/ruby/internal/warning_push.h +beg_len_step.o: $(hdrdir)/ruby/internal/xmalloc.h +beg_len_step.o: $(hdrdir)/ruby/missing.h +beg_len_step.o: $(hdrdir)/ruby/ruby.h +beg_len_step.o: $(hdrdir)/ruby/st.h +beg_len_step.o: $(hdrdir)/ruby/subst.h +beg_len_step.o: beg_len_step.c +# AUTOGENERATED DEPENDENCIES END diff --git a/ext/-test-/arith_seq/beg_len_step/extconf.rb b/ext/-test-/arith_seq/beg_len_step/extconf.rb new file mode 100644 index 00000000000000..e72b3ad01f9c09 --- /dev/null +++ b/ext/-test-/arith_seq/beg_len_step/extconf.rb @@ -0,0 +1,2 @@ +# frozen_string_literal: false +create_makefile("-test-/arith_seq/beg_len_step") diff --git a/ext/-test-/econv/append.c b/ext/-test-/econv/append.c new file mode 100644 index 00000000000000..724cd136c02e40 --- /dev/null +++ b/ext/-test-/econv/append.c @@ -0,0 +1,15 @@ +#include "ruby/ruby.h" +#include "ruby/encoding.h" + +static VALUE +econv_append(VALUE self, VALUE src, VALUE dst) +{ + rb_econv_t *ec = DATA_PTR(self); + return rb_econv_str_append(ec, src, dst, 0); +} + +void +Init_econv_append(VALUE klass) +{ + rb_define_method(klass, "append", econv_append, 2); +} diff --git a/ext/-test-/econv/extconf.rb b/ext/-test-/econv/extconf.rb new file mode 100644 index 00000000000000..d786b15db98c7f --- /dev/null +++ b/ext/-test-/econv/extconf.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: false +require_relative "../auto_ext.rb" +auto_ext(inc: true) diff --git a/ext/-test-/econv/init.c b/ext/-test-/econv/init.c new file mode 100644 index 00000000000000..9772ebe71ce69f --- /dev/null +++ b/ext/-test-/econv/init.c @@ -0,0 +1,11 @@ +#include "ruby.h" + +#define init(n) {void Init_econv_##n(VALUE klass); Init_econv_##n(klass);} + +void +Init_econv(void) +{ + VALUE mBug = rb_define_module("Bug"); + VALUE klass = rb_define_class_under(mBug, "EConv", rb_path2class("Encoding::Converter")); + TEST_INIT_FUNCS(init); +} diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 068647747dd11b..c5b76de596efb5 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -200,7 +200,7 @@ url_unreserved_char(unsigned char c) } static VALUE -optimized_escape(VALUE str) +optimized_escape(VALUE str, int plus_escape) { long i, len, beg = 0; VALUE dest = 0; @@ -220,7 +220,7 @@ optimized_escape(VALUE str) rb_str_cat(dest, cstr + beg, i - beg); beg = i + 1; - if (c == ' ') { + if (plus_escape && c == ' ') { rb_str_cat_cstr(dest, "+"); } else { @@ -242,7 +242,7 @@ optimized_escape(VALUE str) } static VALUE -optimized_unescape(VALUE str, VALUE encoding) +optimized_unescape(VALUE str, VALUE encoding, int unescape_plus) { long i, len, beg = 0; VALUE dest = 0; @@ -265,7 +265,7 @@ optimized_unescape(VALUE str, VALUE encoding) | char_to_number(cstr[i+2])); clen = 2; } - else if (c == '+') { + else if (unescape_plus && c == '+') { buf[0] = ' '; } else { @@ -348,7 +348,7 @@ cgiesc_unescape_html(VALUE self, VALUE str) * call-seq: * CGI.escape(string) -> string * - * Returns URL-escaped string. + * Returns URL-escaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -357,7 +357,7 @@ cgiesc_escape(VALUE self, VALUE str) StringValue(str); if (rb_enc_str_asciicompat_p(str)) { - return optimized_escape(str); + return optimized_escape(str, 1); } else { return rb_call_super(1, &str); @@ -376,7 +376,7 @@ accept_charset(int argc, VALUE *argv, VALUE self) * call-seq: * CGI.unescape(string, encoding=@@accept_charset) -> string * - * Returns URL-unescaped string. + * Returns URL-unescaped string (+application/x-www-form-urlencoded+). * */ static VALUE @@ -388,7 +388,50 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self) if (rb_enc_str_asciicompat_p(str)) { VALUE enc = accept_charset(argc-1, argv+1, self); - return optimized_unescape(str, enc); + return optimized_unescape(str, enc, 1); + } + else { + return rb_call_super(argc, argv); + } +} + +/* + * call-seq: + * CGI.escapeURIComponent(string) -> string + * + * Returns URL-escaped string following RFC 3986. + * + */ +static VALUE +cgiesc_escape_uri_component(VALUE self, VALUE str) +{ + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_escape(str, 0); + } + else { + return rb_call_super(1, &str); + } +} + +/* + * call-seq: + * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string + * + * Returns URL-unescaped string following RFC 3986. + * + */ +static VALUE +cgiesc_unescape_uri_component(int argc, VALUE *argv, VALUE self) +{ + VALUE str = (rb_check_arity(argc, 1, 2), argv[0]); + + StringValue(str); + + if (rb_enc_str_asciicompat_p(str)) { + VALUE enc = accept_charset(argc-1, argv+1, self); + return optimized_unescape(str, enc, 0); } else { return rb_call_super(argc, argv); @@ -414,6 +457,8 @@ InitVM_escape(void) rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); + rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); rb_prepend_module(rb_mUtil, rb_mEscape); diff --git a/ext/date/date_core.c b/ext/date/date_core.c index cee7b27faf6d08..83d493c794842a 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -2486,7 +2486,7 @@ date_s__valid_jd_p(int argc, VALUE *argv, VALUE klass) * * Date.valid_jd?(2451944) # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd. */ @@ -2580,7 +2580,7 @@ date_s__valid_civil_p(int argc, VALUE *argv, VALUE klass) * Date.valid_date?(2001, 2, 29) # => false * Date.valid_date?(2001, 2, -1) # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Date.valid_date? is an alias for Date.valid_civil?. * @@ -2670,7 +2670,7 @@ date_s__valid_ordinal_p(int argc, VALUE *argv, VALUE klass) * Date.valid_ordinal?(2001, 34) # => true * Date.valid_ordinal?(2001, 366) # => false * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.ordinal. */ @@ -2760,7 +2760,7 @@ date_s__valid_commercial_p(int argc, VALUE *argv, VALUE klass) * * See Date.commercial. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.commercial. */ @@ -3342,7 +3342,7 @@ static VALUE d_lite_plus(VALUE, VALUE); * * Date.jd(Date::ITALY - 1).julian? # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.new. */ @@ -3407,7 +3407,7 @@ date_s_jd(int argc, VALUE *argv, VALUE klass) * * Raises an exception if +yday+ is zero or out of range. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.new. */ @@ -3484,7 +3484,7 @@ date_s_civil(int argc, VALUE *argv, VALUE klass) * where +n+ is the number of days in the month; * when the argument is negative, counts backward from the end of the month. * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Date.civil is an alias for Date.new. * @@ -3592,7 +3592,7 @@ date_initialize(int argc, VALUE *argv, VALUE self) * Date.commercial(2020, 1, 1).to_s # => "2019-12-30" Date.commercial(2020, 1, 7).to_s # => "2020-01-05" * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * Related: Date.jd, Date.new, Date.ordinal. */ @@ -3777,7 +3777,7 @@ static void set_sg(union DateData *, double); * * Date.today.to_s # => "2022-07-06" * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -4409,7 +4409,7 @@ date_s__strptime(int argc, VALUE *argv, VALUE klass) * {Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html]. * (Unlike Date.strftime, does not support flags and width.) * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * * See also {strptime(3)}[https://man7.org/linux/man-pages/man3/strptime.3.html]. * @@ -4505,6 +4505,9 @@ date_s__parse_internal(int argc, VALUE *argv, VALUE klass) * Note: * This method recognizes many forms in +string+, * but it is not a validator. + * For formats, see + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] + * * If +string+ does not specify a valid date, * the result is unpredictable; * consider using Date._strptime instead. @@ -4537,6 +4540,8 @@ date_s__parse(int argc, VALUE *argv, VALUE klass) * Note: * This method recognizes many forms in +string+, * but it is not a validator. + * For formats, see + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] * If +string+ does not specify a valid date, * the result is unpredictable; * consider using Date._strptime instead. @@ -4556,7 +4561,7 @@ date_s__parse(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._parse (returns a hash). @@ -4636,7 +4641,7 @@ date_s__iso8601(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._iso8601 (returns a hash). @@ -4671,7 +4676,7 @@ date_s_iso8601(int argc, VALUE *argv, VALUE klass) * Date._rfc3339(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * {RFC 3339 format}[https://datatracker.ietf.org/doc/html/rfc3339]: + * {RFC 3339 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+3339+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc3339 # => "2001-02-03T00:00:00+00:00" @@ -4699,7 +4704,7 @@ date_s__rfc3339(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 3339 format}[https://datatracker.ietf.org/doc/html/rfc3339]: + * {RFC 3339 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+3339+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc3339 # => "2001-02-03T00:00:00+00:00" @@ -4707,7 +4712,7 @@ date_s__rfc3339(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._rfc3339 (returns a hash). @@ -4776,7 +4781,7 @@ date_s__xmlschema(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._xmlschema (returns a hash). @@ -4811,7 +4816,7 @@ date_s_xmlschema(int argc, VALUE *argv, VALUE klass) * Date._rfc2822(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * {RFC 2822 date format}[https://datatracker.ietf.org/doc/html/rfc2822]: + * {RFC 2822 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+2822+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" @@ -4841,7 +4846,7 @@ date_s__rfc2822(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 2822 date format}[https://datatracker.ietf.org/doc/html/rfc2822]: + * {RFC 2822 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-RFC+2822+Format]: * * d = Date.new(2001, 2, 3) * s = d.rfc2822 # => "Sat, 3 Feb 2001 00:00:00 +0000" @@ -4849,7 +4854,7 @@ date_s__rfc2822(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Date.rfc822 is an alias for Date.rfc2822. @@ -4885,7 +4890,7 @@ date_s_rfc2822(int argc, VALUE *argv, VALUE klass) * Date._httpdate(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * HTTP date format: + * {HTTP date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-HTTP+Format]: * * d = Date.new(2001, 2, 3) * s = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" @@ -4911,7 +4916,7 @@ date_s__httpdate(int argc, VALUE *argv, VALUE klass) * * Returns a new \Date object with values parsed from +string+, * which should be a valid - * {RFC 2616 date format}[https://datatracker.ietf.org/doc/html/rfc2616]: + * {HTTP date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-HTTP+Format]: * * d = Date.new(2001, 2, 3) s = d.httpdate # => "Sat, 03 Feb 2001 00:00:00 GMT" @@ -4919,7 +4924,7 @@ date_s__httpdate(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._httpdate (returns a hash). @@ -4953,7 +4958,7 @@ date_s_httpdate(int argc, VALUE *argv, VALUE klass) * Date._jisx0301(string, limit: 128) -> hash * * Returns a hash of values parsed from +string+, which should be a valid - * JIS X 0301 date format: + * {JIS X 0301 date format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-JIS+X+0301+Format]: * * d = Date.new(2001, 2, 3) * s = d.jisx0301 # => "H13.02.03" @@ -4979,7 +4984,7 @@ date_s__jisx0301(int argc, VALUE *argv, VALUE klass) * Date.jisx0301(string = '-4712-01-01', start = Date::ITALY, limit: 128) -> date * * Returns a new \Date object with values parsed from +string+, - * which should be a valid JIS X 0301 format: + * which should be a valid {JIS X 0301 format}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-JIS+X+0301+Format]: * * d = Date.new(2001, 2, 3) * s = d.jisx0301 # => "H13.02.03" @@ -4991,7 +4996,7 @@ date_s__jisx0301(int argc, VALUE *argv, VALUE klass) * * See: * - * - Argument {start}[rdoc-ref:Date@Argument+start]. + * - Argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * - Argument {limit}[rdoc-ref:Date@Argument+limit]. * * Related: Date._jisx0301 (returns a hash). @@ -5755,7 +5760,7 @@ d_lite_leap_p(VALUE self) * Date.new(2001, 2, 3, Date::GREGORIAN).start # => -Infinity * Date.new(2001, 2, 3, Date::JULIAN).start # => Infinity * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -5830,7 +5835,7 @@ dup_obj_with_new_start(VALUE obj, double sg) * d1 = d0.new_start(Date::JULIAN) * d1.julian? # => true * - * See argument {start}[rdoc-ref:Date@Argument+start]. + * See argument {start}[rdoc-ref:calendars.rdoc@Argument+start]. * */ static VALUE @@ -8811,7 +8816,7 @@ time_to_datetime(VALUE self) ret = d_complex_new_internal(cDateTime, nth, 0, 0, sf, - of, DEFAULT_SG, + of, GREGORIAN, ry, m, d, h, min, s, HAVE_CIVIL | HAVE_TIME); @@ -8915,12 +8920,17 @@ date_to_datetime(VALUE self) static VALUE datetime_to_time(VALUE self) { - volatile VALUE dup = dup_obj(self); + get_d1(self); + + if (m_julian_p(dat)) { + self = d_lite_gregorian(self); + get_d1a(self); + dat = adat; + } + { VALUE t; - get_d1(dup); - t = rb_funcall(rb_cTime, rb_intern("new"), 7, @@ -9378,199 +9388,65 @@ Init_date_core(void) negative_inf = -INFINITY; /* - * date and datetime class - Tadayoshi Funaba 1998-2011 - * - * 'date' provides two classes: Date and DateTime. - * - * == Terms and Definitions - * - * Some terms and definitions are based on ISO 8601 and JIS X 0301. - * - * === Calendar Date - * - * The calendar date is a particular day of a calendar year, - * identified by its ordinal number within a calendar month within - * that year. - * - * In those classes, this is so-called "civil". - * - * === Ordinal Date - * - * The ordinal date is a particular day of a calendar year identified - * by its ordinal number within the year. - * - * In those classes, this is so-called "ordinal". - * - * === Week Date - * - * The week date is a date identified by calendar week and day numbers. - * - * The calendar week is a seven day period within a calendar year, - * starting on a Monday and identified by its ordinal number within - * the year; the first calendar week of the year is the one that - * includes the first Thursday of that year. In the Gregorian - * calendar, this is equivalent to the week which includes January 4. - * - * In those classes, this is so-called "commercial". + * \Class \Date provides methods for storing and manipulating + * calendar dates. * - * === Julian Day Number + * Consider using + * {class Time}[https://docs.ruby-lang.org/en/master/Time.html] + * instead of class \Date if: * - * The Julian day number is in elapsed days since noon (Greenwich Mean - * Time) on January 1, 4713 BCE (in the Julian calendar). + * - You need both dates and times; \Date handles only dates. + * - You need only Gregorian dates (and not Julian dates); + * see {Julian and Gregorian Calendars}[rdoc-ref:calendars.rdoc]. * - * In this document, the astronomical Julian day number is the same as - * the original Julian day number. And the chronological Julian day - * number is a variation of the Julian day number. Its days begin at - * midnight on local time. - * - * In this document, when the term "Julian day number" simply appears, - * it just refers to "chronological Julian day number", not the - * original. - * - * In those classes, those are so-called "ajd" and "jd". - * - * === Modified Julian Day Number - * - * The modified Julian day number is in elapsed days since midnight - * (Coordinated Universal Time) on November 17, 1858 CE (in the - * Gregorian calendar). - * - * In this document, the astronomical modified Julian day number is - * the same as the original modified Julian day number. And the - * chronological modified Julian day number is a variation of the - * modified Julian day number. Its days begin at midnight on local - * time. - * - * In this document, when the term "modified Julian day number" simply - * appears, it just refers to "chronological modified Julian day - * number", not the original. - * - * In those classes, those are so-called "amjd" and "mjd". - * - * == Date - * - * A subclass of Object that includes the Comparable module and - * easily handles date. - * - * A Date object is created with Date::new, Date::jd, Date::ordinal, - * Date::commercial, Date::parse, Date::strptime, Date::today, - * Time#to_date, etc. - * - * require 'date' - * - * Date.new(2001,2,3) - * #=> # - * Date.jd(2451944) - * #=> # - * Date.ordinal(2001,34) - * #=> # - * Date.commercial(2001,5,6) - * #=> # - * Date.parse('2001-02-03') - * #=> # - * Date.strptime('03-02-2001', '%d-%m-%Y') - * #=> # - * Time.new(2001,2,3).to_date - * #=> # - * - * All date objects are immutable; hence cannot modify themselves. - * - * The concept of a date object can be represented as a tuple - * of the day count, the offset and the day of calendar reform. - * - * The day count denotes the absolute position of a temporal - * dimension. The offset is relative adjustment, which determines - * decoded local time with the day count. The day of calendar - * reform denotes the start day of the new style. The old style - * of the West is the Julian calendar which was adopted by - * Caesar. The new style is the Gregorian calendar, which is the - * current civil calendar of many countries. - * - * The day count is virtually the astronomical Julian day number. - * The offset in this class is usually zero, and cannot be - * specified directly. - * - * A Date object can be created with an optional argument, - * the day of calendar reform as a Julian day number, which - * should be 2298874 to 2426355 or negative/positive infinity. - * The default value is +Date::ITALY+ (2299161=1582-10-15). - * See also sample/cal.rb. + * A \Date object, once created, is immutable, and cannot be modified. * - * $ ruby sample/cal.rb -c it 10 1582 - * October 1582 - * S M Tu W Th F S - * 1 2 3 4 15 16 - * 17 18 19 20 21 22 23 - * 24 25 26 27 28 29 30 - * 31 + * == Creating a \Date * - * $ ruby sample/cal.rb -c gb 9 1752 - * September 1752 - * S M Tu W Th F S - * 1 2 14 15 16 - * 17 18 19 20 21 22 23 - * 24 25 26 27 28 29 30 + * You can create a date for the current date, using Date.today: * - * A Date object has various methods. See each reference. + * Date.today # => # * - * d = Date.parse('3rd Feb 2001') - * #=> # - * d.year #=> 2001 - * d.mon #=> 2 - * d.mday #=> 3 - * d.wday #=> 6 - * d += 1 #=> # - * d.strftime('%a %d %b %Y') #=> "Sun 04 Feb 2001" + * You can create a specific date from various combinations of arguments: * - * === Argument +start+ + * - Date.new takes integer year, month, and day-of-month: * - * Certain calculations and comparisons for a \Date object - * are affected by what the object considers to have been - * the changeover date from the - * {Julian}[https://en.wikipedia.org/wiki/Julian_calendar] to the - * {Gregorian}[https://en.wikipedia.org/wiki/Gregorian_calendar] - * calendar; - * this is set by argument +start+ when the object is created: + * Date.new(1999, 12, 31) # => # * - * - Dates before the changeover are considered to be Julian. - * - Dates after the changeover are considered to be Gregorian. + * - Date.ordinal takes integer year and day-of-year: * - * The value of the +start+ argument may be: + * Date.ordinal(1999, 365) # => # * - * - Date::ITALY (the default) - the changeover date is October 10, 1582: + * - Date.jd takes integer Julian day: * - * Date::ITALY # => 2299161 - * Date.jd(Date::ITALY).to_s # => "1582-10-15" + * Date.jd(2451544) # => # * - * # Julian base date, Julian result date. - * (Date.new(1581, 1, 1, Date::ITALY) + 365).to_s # => "1582-01-01" - * # Gregorian base date, Gregorian result date. - * (Date.new(1583, 1, 1, Date::ITALY) + 365).to_s # => "1584-01-01" + * - Date.commercial takes integer commercial data (year, week, day-of-week): * - * # Julian base date, Gregorian result date. - * (Date.new(1582, 1, 1, Date::ITALY) + 365).to_s # => "1583-01-11" - * # Gregorian base date, Julian result date. - * (Date.new(1583, 1, 1, Date::ITALY) - 365).to_s # => "1581-12-22" + * Date.commercial(1999, 52, 5) # => # * - * - Date::ENGLAND - the changeover date is September 9, 1752: + * - Date.parse takes a string, which it parses heuristically: * - * Date::ENGLAND # => 2361222 - * Date.jd(Date::ENGLAND).to_s # => "1752-09-14" + * Date.parse('1999-12-31') # => # + * Date.parse('31-12-1999') # => # + * Date.parse('1999-365') # => # + * Date.parse('1999-W52-5') # => # * - * # Julian base date, Julian result date. - * (Date.new(1751, 1, 1, Date::ENGLAND) + 365).to_s # => "1752-01-01" - * # Gregorian base date, Gregorian result date. - * (Date.new(1753, 1, 1, Date::ENGLAND) + 365).to_s # => "1754-01-01" + * - Date.strptime takes a date string and a format string, + * then parses the date string according to the format string: * - * # Julian base date, Gregorian result date. - * (Date.new(1752, 1, 1, Date::ENGLAND) + 365).to_s # => "1753-01-11" - * # Gregorian base date, Julian result date. - * (Date.new(1753, 1, 1, Date::ENGLAND) - 365).to_s # => "1751-12-22" + * Date.strptime('1999-12-31', '%Y-%m-%d') # => # + * Date.strptime('31-12-1999', '%d-%m-%Y') # => # + * Date.strptime('1999-365', '%Y-%j') # => # + * Date.strptime('1999-W52-5', '%G-W%V-%u') # => # + * Date.strptime('1999 52 5', '%Y %U %w') # => # + * Date.strptime('1999 52 5', '%Y %W %u') # => # + * Date.strptime('fri31dec99', '%a%d%b%y') # => # * - * - Date::JULIAN - no changeover date; all dates are Julian. - * - Date::GREGORIAN - no changeover date; all dates are Gregorian. + * See also the specialized methods in + * {"Specialized Format Strings" in Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html#label-Specialized+Format+Strings] * - * === Argument +limit+ + * == Argument +limit+ * * Certain singleton methods in \Date that parse string arguments * also take optional keyword argument +limit+, diff --git a/ext/date/lib/date.rb b/ext/date/lib/date.rb index 88984d7bd295aa..18996114404173 100644 --- a/ext/date/lib/date.rb +++ b/ext/date/lib/date.rb @@ -4,7 +4,7 @@ require 'date_core' class Date - VERSION = '3.2.2' # :nodoc: + VERSION = "3.2.3" # :nodoc: # call-seq: # infinite? -> false diff --git a/ext/extmk.rb b/ext/extmk.rb index 1624ec9099eb40..40fc10ea1c4258 100755 --- a/ext/extmk.rb +++ b/ext/extmk.rb @@ -66,12 +66,17 @@ def system(*args) def atomic_write_open(filename) filename_new = filename + ".new.#$$" - open(filename_new, "wb") do |f| + clean = false + File.open(filename_new, "wbx") do |f| + clean = true yield f end if File.binread(filename_new) != (File.binread(filename) rescue nil) File.rename(filename_new, filename) - else + clean = false + end +ensure + if clean File.unlink(filename_new) end end diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb index 42d79efb832d43..4a2ab58514cbc0 100644 --- a/ext/psych/lib/psych.rb +++ b/ext/psych/lib/psych.rb @@ -307,7 +307,7 @@ def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: fals # A Psych::DisallowedClass exception will be raised if the yaml contains a # class that isn't in the +permitted_classes+ list. # - # A Psych::BadAlias exception will be raised if the yaml contains aliases + # A Psych::AliasesNotEnabled exception will be raised if the yaml contains aliases # but the +aliases+ keyword argument is set to false. # # +filename+ will be used in the exception message if any exception is raised diff --git a/ext/psych/lib/psych/exception.rb b/ext/psych/lib/psych/exception.rb index f473b95a3bbb6f..d7469a4b308a33 100644 --- a/ext/psych/lib/psych/exception.rb +++ b/ext/psych/lib/psych/exception.rb @@ -6,6 +6,20 @@ class Exception < RuntimeError class BadAlias < Exception end + # Subclasses `BadAlias` for backwards compatibility + class AliasesNotEnabled < BadAlias + def initialize + super "Alias parsing was not enabled. To enable it, pass `aliases: true` to `Psych::load` or `Psych::safe_load`." + end + end + + # Subclasses `BadAlias` for backwards compatibility + class AnchorNotDefined < BadAlias + def initialize anchor_name + super "An alias referenced an unknown anchor: #{anchor_name}" + end + end + class DisallowedClass < Exception def initialize action, klass_name super "Tried to #{action} unspecified class: #{klass_name}" diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index 935bc74f21c347..cce5daf3bbdbfe 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -323,7 +323,7 @@ def visit_Psych_Nodes_Stream o end def visit_Psych_Nodes_Alias o - @st.fetch(o.anchor) { raise BadAlias, "Unknown alias: #{o.anchor}" } + @st.fetch(o.anchor) { raise AnchorNotDefined, o.anchor } end private @@ -427,7 +427,7 @@ def resolve_class klassname class NoAliasRuby < ToRuby def visit_Psych_Nodes_Alias o - raise BadAlias, "Unknown alias: #{o.anchor}" + raise AliasesNotEnabled end end end diff --git a/ext/pty/extconf.rb b/ext/pty/extconf.rb index 038bdf4d2c337f..ba0c4286fd31d1 100644 --- a/ext/pty/extconf.rb +++ b/ext/pty/extconf.rb @@ -7,10 +7,12 @@ have_header("sys/stropts.h") have_func("setresuid") have_header("libutil.h") - have_header("util.h") # OpenBSD openpty have_header("pty.h") have_header("pwd.h") - util = have_library("util", "openpty") + if /openbsd/ =~ RUBY_PLATFORM + have_header("util.h") # OpenBSD openpty + util = have_library("util", "openpty") + end if have_func("posix_openpt") or (util or have_func("openpty")) or have_func("_getpty") or diff --git a/gc.c b/gc.c index 0fb47108357a48..adc6024eee32ab 100644 --- a/gc.c +++ b/gc.c @@ -4165,7 +4165,8 @@ objspace_each_objects_try(VALUE arg) uintptr_t pstart = (uintptr_t)page->start; uintptr_t pend = pstart + (page->total_slots * size_pool->slot_size); - if ((*data->callback)((void *)pstart, (void *)pend, size_pool->slot_size, data->data)) { + if (!__asan_region_is_poisoned((void *)pstart, pend - pstart) && + (*data->callback)((void *)pstart, (void *)pend, size_pool->slot_size, data->data)) { break; } diff --git a/gem_prelude.rb b/gem_prelude.rb index 94ada316aa8bff..f382021ca3d933 100644 --- a/gem_prelude.rb +++ b/gem_prelude.rb @@ -17,3 +17,10 @@ rescue LoadError warn "`did_you_mean' was not loaded." end if defined?(DidYouMean) + +begin + require 'syntax_suggest/core_ext' +rescue LoadError + warn "`syntax_suggest' was not loaded." +end if defined?(SyntaxSuggest) + diff --git a/gems/bundled_gems b/gems/bundled_gems index 4bf063abff3b34..be637125fb5b47 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -1,16 +1,16 @@ # gem-name version-to-bundle repository-url [optional-commit-hash-to-test-or-defaults-to-v-version] -minitest 5.16.2 https://github.com/seattlerb/minitest -power_assert 2.0.1 https://github.com/ruby/power_assert -rake 13.0.6 https://github.com/ruby/rake -test-unit 3.5.3 https://github.com/test-unit/test-unit -rexml 3.2.5 https://github.com/ruby/rexml -rss 0.2.9 https://github.com/ruby/rss -net-ftp 0.1.3 https://github.com/ruby/net-ftp -net-imap 0.2.3 https://github.com/ruby/net-imap -net-pop 0.1.1 https://github.com/ruby/net-pop -net-smtp 0.3.1 https://github.com/ruby/net-smtp -matrix 0.4.2 https://github.com/ruby/matrix -prime 0.1.2 https://github.com/ruby/prime -rbs 2.6.0 https://github.com/ruby/rbs 14abbbae8885a09a2ed82de2ef31d67a9c0a108d -typeprof 0.21.3 https://github.com/ruby/typeprof -debug 1.6.1 https://github.com/ruby/debug +minitest 5.16.3 https://github.com/seattlerb/minitest +power_assert 2.0.1 https://github.com/ruby/power_assert +rake 13.0.6 https://github.com/ruby/rake +test-unit 3.5.3 https://github.com/test-unit/test-unit +rexml 3.2.5 https://github.com/ruby/rexml +rss 0.2.9 https://github.com/ruby/rss +net-ftp 0.1.3 https://github.com/ruby/net-ftp +net-imap 0.2.3 https://github.com/ruby/net-imap +net-pop 0.1.1 https://github.com/ruby/net-pop +net-smtp 0.3.1 https://github.com/ruby/net-smtp +matrix 0.4.2 https://github.com/ruby/matrix +prime 0.1.2 https://github.com/ruby/prime +rbs 2.6.0 https://github.com/ruby/rbs 5ec9d53efe4bf0a97f33c3016aed430be135583a +typeprof 0.21.3 https://github.com/ruby/typeprof +debug 1.6.2 https://github.com/ruby/debug e7c37486ff9579251e5d25645b8d38ec96708f12 diff --git a/include/ruby/assert.h b/include/ruby/assert.h index c9f2c3fbef29e2..0c052363bcf79a 100644 --- a/include/ruby/assert.h +++ b/include/ruby/assert.h @@ -103,7 +103,7 @@ # /* keep NDEBUG undefined */ #elif (RBIMPL_NDEBUG == 0) && (RBIMPL_RUBY_DEBUG == 0) -# /* The (*1) situation in avobe diagram. */ +# /* The (*1) situation in above diagram. */ # define RUBY_DEBUG 0 # define RUBY_NDEBUG 1 # define NDEBUG diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h index e42a1777ff5eae..fe1977a9a181a0 100644 --- a/include/ruby/internal/abi.h +++ b/include/ruby/internal/abi.h @@ -1,6 +1,8 @@ #ifndef RUBY_ABI_H #define RUBY_ABI_H +#ifdef RUBY_ABI_VERSION /* should match the definition in config.h */ + /* This number represents Ruby's ABI version. * * In development Ruby, it should be bumped every time an ABI incompatible @@ -19,7 +21,7 @@ * - Backwards compatible refactors. * - Editing comments. * - * In released versions of Ruby, this number should not be changed since teeny + * In released versions of Ruby, this number is not defined since teeny * versions of Ruby should guarantee ABI compatibility. */ #define RUBY_ABI_VERSION 2 @@ -49,3 +51,5 @@ ruby_abi_version(void) #endif #endif + +#endif diff --git a/include/ruby/internal/arithmetic.h b/include/ruby/internal/arithmetic.h index 3f7840c3840afe..7ebb4a86f1ec6f 100644 --- a/include/ruby/internal/arithmetic.h +++ b/include/ruby/internal/arithmetic.h @@ -18,7 +18,8 @@ * Do not expect for instance `__VA_ARGS__` is always available. * We assume C99 for ruby itself but we don't assume languages of * extension libraries. They could be written in C++98. - * @brief Conversion between C's arithmtic types and Ruby's numeric types. + * @brief Conversion between C's arithmetic types and Ruby's numeric + * types. */ #include "ruby/internal/arithmetic/char.h" #include "ruby/internal/arithmetic/double.h" diff --git a/include/ruby/internal/attr/nodiscard.h b/include/ruby/internal/attr/nodiscard.h index 087192a7a8b18a..c3ae1189424a96 100644 --- a/include/ruby/internal/attr/nodiscard.h +++ b/include/ruby/internal/attr/nodiscard.h @@ -26,7 +26,7 @@ /** * Wraps (or simulates) `[[nodiscard]]`. In C++ (at least since C++20) a - * nodiscard attribute can have a message why the result shall not be ignoed. + * nodiscard attribute can have a message why the result shall not be ignored. * However GCC attribute and SAL annotation cannot take them. */ #if RBIMPL_HAS_CPP_ATTRIBUTE(nodiscard) diff --git a/include/ruby/internal/encoding/ctype.h b/include/ruby/internal/encoding/ctype.h index 64aaf0a990898a..05c314aeb3d487 100644 --- a/include/ruby/internal/encoding/ctype.h +++ b/include/ruby/internal/encoding/ctype.h @@ -36,8 +36,8 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() * @param[in] p Pointer to a possibly-middle of a character. * @param[in] end End of the string. * @param[in] enc Encoding. - * @retval 0 It isn't. - * @retval otherwise It is. + * @retval false It isn't. + * @retval true It is. */ static inline bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) @@ -53,11 +53,11 @@ rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc) * encoding. The "character type" here is a set of macros defined in onigmo.h, * like `ONIGENC_CTYPE_PUNCT`. * - * @param[in] c An `OnigCodePoint` value. - * @param[in] t An `OnigCtype` value. - * @param[in] enc A `rb_encoding*` value. - * @retval 1 `c` is of `t` in `enc`. - * @retval 0 Otherwise. + * @param[in] c An `OnigCodePoint` value. + * @param[in] t An `OnigCtype` value. + * @param[in] enc A `rb_encoding*` value. + * @retval true `c` is of `t` in `enc`. + * @retval false Otherwise. */ static inline bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) @@ -68,10 +68,10 @@ rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc) /** * Identical to rb_isascii(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 0 `c` is out of range of ASCII character set in `enc`. - * @retval 1 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval false `c` is out of range of ASCII character set in `enc`. + * @retval true Otherwise. * * @internal * @@ -87,10 +87,10 @@ rb_enc_isascii(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalpha(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ALPHA". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ALPHA". + * @retval false Otherwise. */ static inline bool rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) @@ -101,10 +101,10 @@ rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_islower(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "LOWER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "LOWER". + * @retval false Otherwise. */ static inline bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc) @@ -115,10 +115,10 @@ rb_enc_islower(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isupper(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "UPPER". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "UPPER". + * @retval false Otherwise. */ static inline bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) @@ -126,13 +126,27 @@ rb_enc_isupper(OnigCodePoint c, rb_encoding *enc) return ONIGENC_IS_CODE_UPPER(enc, c); } +/** + * Identical to rb_iscntrl(), except it additionally takes an encoding. + * + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "CNTRL". + * @retval false Otherwise. + */ +static inline bool +rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc) +{ + return ONIGENC_IS_CODE_CNTRL(enc, c); +} + /** * Identical to rb_ispunct(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PUNCT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PUNCT". + * @retval false Otherwise. */ static inline bool rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) @@ -143,10 +157,10 @@ rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isalnum(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "ANUM". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "ANUM". + * @retval false Otherwise. */ static inline bool rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) @@ -157,10 +171,10 @@ rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isprint(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) @@ -171,10 +185,10 @@ rb_enc_isprint(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isspace(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "PRINT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "PRINT". + * @retval false Otherwise. */ static inline bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) @@ -185,10 +199,10 @@ rb_enc_isspace(OnigCodePoint c, rb_encoding *enc) /** * Identical to rb_isdigit(), except it additionally takes an encoding. * - * @param[in] c A code point. - * @param[in] enc An encoding. - * @retval 1 `enc` classifies `c` as "DIGIT". - * @retval 0 Otherwise. + * @param[in] c A code point. + * @param[in] enc An encoding. + * @retval true `enc` classifies `c` as "DIGIT". + * @retval false Otherwise. */ static inline bool rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc) @@ -235,6 +249,7 @@ RBIMPL_SYMBOL_EXPORT_END() #define rb_enc_isdigit rb_enc_isdigit #define rb_enc_islower rb_enc_islower #define rb_enc_isprint rb_enc_isprint +#define rb_enc_iscntrl rb_enc_iscntrl #define rb_enc_ispunct rb_enc_ispunct #define rb_enc_isspace rb_enc_isspace #define rb_enc_isupper rb_enc_isupper diff --git a/include/ruby/internal/encoding/encoding.h b/include/ruby/internal/encoding/encoding.h index 22deb8f8c908e5..4748ca806b17ca 100644 --- a/include/ruby/internal/encoding/encoding.h +++ b/include/ruby/internal/encoding/encoding.h @@ -643,10 +643,12 @@ rb_enc_code_to_mbclen(int c, rb_encoding *enc) * Identical to rb_enc_uint_chr(), except it writes back to the passed buffer * instead of allocating one. * - * @param[in] c Code point. - * @param[out] buf Return buffer. - * @param[in] enc Target encoding scheme. - * @post `c` is encoded according to `enc`, then written to `buf`. + * @param[in] c Code point. + * @param[out] buf Return buffer. + * @param[in] enc Target encoding scheme. + * @retval <= 0 `c` is invalid in `enc`. + * @return otherwise Number of bytes written to `buf`. + * @post `c` is encoded according to `enc`, then written to `buf`. * * @internal * diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h index 60c96a41c9f9bb..7f26d2eae98b03 100644 --- a/include/ruby/internal/encoding/transcode.h +++ b/include/ruby/internal/encoding/transcode.h @@ -476,16 +476,16 @@ enum ruby_econv_flag_type { RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, /** (Unclear; seems unused). */ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, /** Universal newline mode. */ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, @@ -496,11 +496,14 @@ enum ruby_econv_flag_type { /** CRLF to CR conversion shall happen. */ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, /** (Unclear; seems unused). */ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, @@ -529,6 +532,7 @@ enum ruby_econv_flag_type { #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ @@ -543,10 +547,10 @@ enum ruby_econv_flag_type { */ /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ diff --git a/include/ruby/internal/intern/select/posix.h b/include/ruby/internal/intern/select/posix.h index 5f828e66e2cf14..0a9b0b2e51e688 100644 --- a/include/ruby/internal/intern/select/posix.h +++ b/include/ruby/internal/intern/select/posix.h @@ -136,7 +136,7 @@ rb_fd_max(const rb_fdset_t *f) } /** @cond INTERNAL_MACRO */ -/* :FIXME: What are these? They don't exist for shibling implementations. */ +/* :FIXME: What are these? They don't exist for sibling implementations. */ #define rb_fd_init_copy(d, s) (*(d) = *(s)) #define rb_fd_term(f) ((void)(f)) /** @endcond */ diff --git a/insns.def b/insns.def index 06ca31a85010db..15c4734b8b7257 100644 --- a/insns.def +++ b/insns.def @@ -597,6 +597,25 @@ swap /* none */ } +/* reverse stack top N order. */ +DEFINE_INSN +opt_reverse +(rb_num_t n) +(...) +(...) +// attr rb_snum_t sp_inc = 0; +{ + rb_num_t i; + VALUE *sp = STACK_ADDR_FROM_TOP(n); + + for (i=0; ilimit == 0) + rb_raise(rb_eArgError, "invalid limit: 0 for foreach"); while (!NIL_P(str = rb_io_getline_1(arg->rs, arg->limit, arg->chomp, arg->io))) { rb_lastline_set(str); rb_yield(str); @@ -11776,12 +11778,16 @@ seek_before_access(VALUE argp) * IO.read('| cat t.txt') * # => "First line\nSecond line\n\nThird line\nFourth line\n" * - * With only argument +path+ given, reads and returns the entire content + * With only argument +path+ given, reads in text mode and returns the entire content * of the file at the given path: * * IO.read('t.txt') * # => "First line\nSecond line\n\nThird line\nFourth line\n" * + * On Windows, text mode can terminate reading and leave bytes in the file + * unread when encountering certain special bytes. Consider using + * IO.binread if all bytes in the file should be read. + * * For both forms, command and path, the remaining arguments are the same. * * With argument +length+, returns +length+ bytes if available: @@ -14500,9 +14506,11 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * Either of the following may be suffixed to any of the string read/write modes above: * * - 't': Text data; sets the default external encoding to +Encoding::UTF_8+; - * on Windows, enables conversion between EOL and CRLF. + * on Windows, enables conversion between EOL and CRLF and enables interpreting +0x1A+ + * as an end-of-file marker. * - 'b': Binary data; sets the default external encoding to +Encoding::ASCII_8BIT+; - * on Windows, suppresses conversion between EOL and CRLF. + * on Windows, suppresses conversion between EOL and CRLF and disables interpreting +0x1A+ + * as an end-of-file marker. * * If neither is given, the stream defaults to text data. * diff --git a/iseq.c b/iseq.c index 3d40b88a0de1bc..4a2c9a33ee68ee 100644 --- a/iseq.c +++ b/iseq.c @@ -175,7 +175,7 @@ rb_iseq_free(const rb_iseq_t *iseq) iseq_clear_ic_references(iseq); struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); mjit_free_iseq(iseq); /* Notify MJIT */ -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_free(body->yjit_payload); #endif ruby_xfree((void *)body->iseq_encoded); @@ -438,7 +438,7 @@ rb_iseq_update_references(rb_iseq_t *iseq) #if USE_MJIT mjit_update_references(iseq); #endif -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_update_references(body->yjit_payload); #endif } @@ -526,7 +526,7 @@ rb_iseq_mark(const rb_iseq_t *iseq) #if USE_MJIT mjit_mark_cc_entries(body); #endif -#if YJIT_BUILD +#if USE_YJIT rb_yjit_iseq_mark(body->yjit_payload); #endif } @@ -2411,7 +2411,7 @@ rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent) rb_str_cat2(str, "== disasm: "); rb_str_append(str, iseq_inspect(iseq)); - rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "TRUE" : "FALSE"); + rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "true" : "false"); if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) { rb_str_modify_expand(str, header_minlen - l); memset(RSTRING_END(str), '=', header_minlen - l); diff --git a/lib/bundler.rb b/lib/bundler.rb index 7df22ab3a54813..24785ef5ebd07b 100644 --- a/lib/bundler.rb +++ b/lib/bundler.rb @@ -53,13 +53,12 @@ module Bundler autoload :GemHelpers, File.expand_path("bundler/gem_helpers", __dir__) autoload :GemVersionPromoter, File.expand_path("bundler/gem_version_promoter", __dir__) autoload :Graph, File.expand_path("bundler/graph", __dir__) - autoload :IncompleteSpecification, File.expand_path("bundler/incomplete_specification", __dir__) autoload :Index, File.expand_path("bundler/index", __dir__) autoload :Injector, File.expand_path("bundler/injector", __dir__) autoload :Installer, File.expand_path("bundler/installer", __dir__) autoload :LazySpecification, File.expand_path("bundler/lazy_specification", __dir__) autoload :LockfileParser, File.expand_path("bundler/lockfile_parser", __dir__) - autoload :MatchPlatform, File.expand_path("bundler/match_platform", __dir__) + autoload :MatchRemoteMetadata, File.expand_path("bundler/match_remote_metadata", __dir__) autoload :ProcessLock, File.expand_path("bundler/process_lock", __dir__) autoload :RemoteSpecification, File.expand_path("bundler/remote_specification", __dir__) autoload :Resolver, File.expand_path("bundler/resolver", __dir__) @@ -332,9 +331,9 @@ def rm_rf(path) FileUtils.remove_entry_secure(path) if path && File.exist?(path) rescue ArgumentError message = < "P", :type => :string, :banner => "Gem trust policy (like gem install -P). Must be one of " + - Bundler.rubygems.security_policy_keys.join("|") + Bundler.rubygems.security_policy_keys.join("|") method_option "without", :type => :array, :banner => "Exclude gems that are part of the specified named group." method_option "with", :type => :array, :banner => diff --git a/lib/bundler/cli/platform.rb b/lib/bundler/cli/platform.rb index 16d4e0145add4c..73da8cf80e45ef 100644 --- a/lib/bundler/cli/platform.rb +++ b/lib/bundler/cli/platform.rb @@ -9,7 +9,7 @@ def initialize(options) def run platforms, ruby_version = Bundler.ui.silence do - locked_ruby_version = Bundler.locked_gems && Bundler.locked_gems.ruby_version.gsub(/p\d+\Z/, "") + locked_ruby_version = Bundler.locked_gems && Bundler.locked_gems.ruby_version&.gsub(/p\d+\Z/, "") gemfile_ruby_version = Bundler.definition.ruby_version && Bundler.definition.ruby_version.single_version_string [Bundler.definition.platforms.map {|p| "* #{p}" }, locked_ruby_version || gemfile_ruby_version] diff --git a/lib/bundler/current_ruby.rb b/lib/bundler/current_ruby.rb index 36f26b7ab43e8e..f9987c4da8713f 100644 --- a/lib/bundler/current_ruby.rb +++ b/lib/bundler/current_ruby.rb @@ -36,17 +36,18 @@ class CurrentRuby rbx ruby truffleruby + windows x64_mingw ].freeze def ruby? return true if Bundler::GemHelpers.generic_local_platform == Gem::Platform::RUBY - !mswin? && (RUBY_ENGINE == "ruby" || RUBY_ENGINE == "rbx" || RUBY_ENGINE == "maglev" || RUBY_ENGINE == "truffleruby") + !windows? && (RUBY_ENGINE == "ruby" || RUBY_ENGINE == "rbx" || RUBY_ENGINE == "maglev" || RUBY_ENGINE == "truffleruby") end def mri? - !mswin? && RUBY_ENGINE == "ruby" + !windows? && RUBY_ENGINE == "ruby" end def rbx? @@ -65,16 +66,24 @@ def truffleruby? RUBY_ENGINE == "truffleruby" end - def mswin? + def windows? Gem.win_platform? end + def mswin? + # For backwards compatibility + windows? + + # TODO: This should correctly be: + # windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin32" && Bundler.local_platform.cpu == "x86" + end + def mswin64? - Gem.win_platform? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin64" && Bundler.local_platform.cpu == "x64" + windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mswin64" && Bundler.local_platform.cpu == "x64" end def mingw? - Gem.win_platform? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mingw32" && Bundler.local_platform.cpu != "x64" + windows? && Bundler.local_platform != Gem::Platform::RUBY && Bundler.local_platform.os == "mingw32" && Bundler.local_platform.cpu != "x64" end def x64_mingw? diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 34c23796e8fb78..8bd9e11f32bfb2 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -145,8 +145,6 @@ def initialize(lockfile, dependencies, sources, unlock, ruby_version = nil, opti @dependency_changes = converge_dependencies @local_changes = converge_locals - @reresolve = nil - @requires = compute_requires end @@ -218,6 +216,7 @@ def missing_specs? true rescue BundlerError => e @resolve = nil + @resolver = nil @specs = nil @gem_version_promoter = nil @@ -288,7 +287,7 @@ def resolve end else Bundler.ui.debug("Found changes from the lockfile, re-resolving dependencies because #{change_reason}") - @reresolve = reresolve + resolver.start(expanded_dependencies) end end @@ -482,10 +481,16 @@ def unlocking? private - def reresolve - last_resolve = converge_locked_specs - expanded_dependencies = expand_dependencies(dependencies + metadata_dependencies, true) - Resolver.resolve(expanded_dependencies, source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) + def resolver + @resolver ||= begin + last_resolve = converge_locked_specs + remove_ruby_from_platforms_if_necessary!(dependencies) + Resolver.new(source_requirements, last_resolve, gem_version_promoter, additional_base_requirements_for_resolve, platforms) + end + end + + def expanded_dependencies + @expanded_dependencies ||= expand_dependencies(dependencies + metadata_dependencies, true) end def filter_specs(specs, deps) @@ -513,15 +518,13 @@ def materialize(dependencies) raise GemNotFound, "Could not find #{missing_specs_list.join(" nor ")}" end - if @reresolve.nil? + loop do incomplete_specs = specs.incomplete_specs + break if incomplete_specs.empty? - if incomplete_specs.any? - Bundler.ui.debug("The lockfile does not have all gems needed for the current platform though, Bundler will still re-resolve dependencies") - @unlock[:gems].concat(incomplete_specs.map(&:name)) - @resolve = reresolve - specs = resolve.materialize(dependencies) - end + Bundler.ui.debug("The lockfile does not have all gems needed for the current platform though, Bundler will still re-resolve dependencies") + @resolve = resolver.start(expanded_dependencies, :exclude_specs => incomplete_specs) + specs = resolve.materialize(dependencies) end bundler = sources.metadata_source.specs.search(Gem::Dependency.new("bundler", VERSION)).last @@ -714,7 +717,9 @@ def converge_dependencies # commonly happen if the Gemfile has changed since the lockfile was last # generated def converge_locked_specs - resolve = converge_specs(@locked_specs) + converged = converge_specs(@locked_specs) + + resolve = SpecSet.new(converged.reject {|s| @unlock[:gems].include?(s.name) }) diff = nil @@ -738,16 +743,24 @@ def converge_specs(specs) specs[dep].any? {|s| s.satisfies?(dep) && (!dep.source || s.source.include?(dep.source)) } end + @specs_that_changed_sources = [] + specs.each do |s| - # Replace the locked dependency's source with the equivalent source from the Gemfile dep = @dependencies.find {|d| s.satisfies?(d) } - s.source = (dep && dep.source) || sources.get(s.source) || sources.default_source + # Replace the locked dependency's source with the equivalent source from the Gemfile + s.source = if dep && dep.source + gemfile_source = dep.source + lockfile_source = s.source - next if @unlock[:sources].include?(s.source.name) + @specs_that_changed_sources << s if gemfile_source != lockfile_source + + gemfile_source + else + sources.get_with_fallback(s.source) + end - # If the spec is from a path source and it doesn't exist anymore - # then we unlock it. + next if @unlock[:sources].include?(s.source.name) # Path sources have special logic if s.source.instance_of?(Source::Path) || s.source.instance_of?(Source::Gemspec) @@ -779,7 +792,7 @@ def converge_specs(specs) end end - SpecSet.new(filter_specs(converged, deps).reject {|s| @unlock[:gems].include?(s.name) }) + filter_specs(converged, deps) end def metadata_dependencies @@ -823,9 +836,18 @@ def source_requirements end source_requirements[:default_bundler] = source_requirements["bundler"] || sources.default_source source_requirements["bundler"] = sources.metadata_source # needs to come last to override + verify_changed_sources! source_requirements end + def verify_changed_sources! + @specs_that_changed_sources.each do |s| + if s.source.specs.search(s.name).empty? + raise GemNotFound, "Could not find gem '#{s.name}' in #{s.source}" + end + end + end + def requested_groups values = groups - Bundler.settings[:without] - @optional_groups + Bundler.settings[:with] values &= Bundler.settings[:only] unless Bundler.settings[:only].empty? @@ -859,10 +881,19 @@ def compute_requires def additional_base_requirements_for_resolve return [] unless @locked_gems && unlocking? && !sources.expired_sources?(@locked_gems.sources) converge_specs(@originally_locked_specs).map do |locked_spec| - name = locked_spec.name - dep = Dependency.new(name, ">= #{locked_spec.version}") - DepProxy.get_proxy(dep, locked_spec.platform) - end + Dependency.new(locked_spec.name, ">= #{locked_spec.version}") + end.uniq + end + + def remove_ruby_from_platforms_if_necessary!(dependencies) + return if Bundler.frozen_bundle? || + Bundler.local_platform == Gem::Platform::RUBY || + !platforms.include?(Gem::Platform::RUBY) || + (@new_platform && platforms.last == Gem::Platform::RUBY) || + !@originally_locked_specs.incomplete_ruby_specs?(expand_dependencies(dependencies)) + + remove_platform(Gem::Platform::RUBY) + add_current_platform end def source_map diff --git a/lib/bundler/dependency.rb b/lib/bundler/dependency.rb index 7f94079e096f26..49ce23ec888c15 100644 --- a/lib/bundler/dependency.rb +++ b/lib/bundler/dependency.rb @@ -42,6 +42,7 @@ class Dependency < Gem::Dependency :jruby => Gem::Platform::JAVA, :jruby_18 => Gem::Platform::JAVA, :jruby_19 => Gem::Platform::JAVA, + :windows => Gem::Platform::WINDOWS, :mswin => Gem::Platform::MSWIN, :mswin_18 => Gem::Platform::MSWIN, :mswin_19 => Gem::Platform::MSWIN, @@ -151,7 +152,7 @@ def current_platform? def to_lock out = super out << "!" if source - out << "\n" + out end def specific? diff --git a/lib/bundler/endpoint_specification.rb b/lib/bundler/endpoint_specification.rb index e9aa366b41d99e..ea197328ba05c8 100644 --- a/lib/bundler/endpoint_specification.rb +++ b/lib/bundler/endpoint_specification.rb @@ -3,7 +3,7 @@ module Bundler # used for Creating Specifications from the Gemcutter Endpoint class EndpointSpecification < Gem::Specification - include MatchPlatform + include MatchRemoteMetadata attr_reader :name, :version, :platform, :checksum attr_accessor :source, :remote, :dependencies @@ -22,17 +22,6 @@ def initialize(name, version, platform, spec_fetcher, dependencies, metadata = n parse_metadata(metadata) end - def required_ruby_version - @required_ruby_version ||= _remote_specification.required_ruby_version - end - - # A fallback is included because the original version of the specification - # API didn't include that field, so some marshalled specs in the index have it - # set to +nil+. - def required_rubygems_version - @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default - end - def fetch_platform @platform end diff --git a/lib/bundler/fetcher.rb b/lib/bundler/fetcher.rb index e9d5dd505ca4c4..e399a50cfd98ba 100644 --- a/lib/bundler/fetcher.rb +++ b/lib/bundler/fetcher.rb @@ -240,8 +240,8 @@ def cis def connection @connection ||= begin needs_ssl = remote_uri.scheme == "https" || - Bundler.settings[:ssl_verify_mode] || - Bundler.settings[:ssl_client_cert] + Bundler.settings[:ssl_verify_mode] || + Bundler.settings[:ssl_client_cert] raise SSLError if needs_ssl && !defined?(OpenSSL::SSL) con = PersistentHTTP.new :name => "bundler", :proxy => :ENV @@ -256,8 +256,8 @@ def connection end ssl_client_cert = Bundler.settings[:ssl_client_cert] || - (Gem.configuration.ssl_client_cert if - Gem.configuration.respond_to?(:ssl_client_cert)) + (Gem.configuration.ssl_client_cert if + Gem.configuration.respond_to?(:ssl_client_cert)) if ssl_client_cert pem = File.read(ssl_client_cert) con.cert = OpenSSL::X509::Certificate.new(pem) @@ -288,8 +288,8 @@ def gemspec_cached_path(spec_file_name) def bundler_cert_store store = OpenSSL::X509::Store.new ssl_ca_cert = Bundler.settings[:ssl_ca_cert] || - (Gem.configuration.ssl_ca_cert if - Gem.configuration.respond_to?(:ssl_ca_cert)) + (Gem.configuration.ssl_ca_cert if + Gem.configuration.respond_to?(:ssl_ca_cert)) if ssl_ca_cert if File.directory? ssl_ca_cert store.add_path ssl_ca_cert diff --git a/lib/bundler/gem_version_promoter.rb b/lib/bundler/gem_version_promoter.rb index 3cce3f2139da89..ddf7446dd21f3e 100644 --- a/lib/bundler/gem_version_promoter.rb +++ b/lib/bundler/gem_version_promoter.rb @@ -88,6 +88,10 @@ def sort_versions(dep, spec_groups) end end + def reset + @sort_versions = {} + end + # @return [bool] Convenience method for testing value of level variable. def major? level == :major diff --git a/lib/bundler/incomplete_specification.rb b/lib/bundler/incomplete_specification.rb deleted file mode 100644 index 6d0b9b901c061c..00000000000000 --- a/lib/bundler/incomplete_specification.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Bundler - class IncompleteSpecification - attr_reader :name, :platform - - def initialize(name, platform) - @name = name - @platform = platform - end - end -end diff --git a/lib/bundler/inline.rb b/lib/bundler/inline.rb index 8ef580f1f00ac4..25e055fbe4cc49 100644 --- a/lib/bundler/inline.rb +++ b/lib/bundler/inline.rb @@ -54,7 +54,7 @@ def definition.lock(*); end Bundler.ui = install ? ui : Bundler::UI::Silent.new if install || definition.missing_specs? - Bundler.settings.temporary(:inline => true) do + Bundler.settings.temporary(:inline => true, :no_install => false) do installer = Bundler::Installer.install(Bundler.root, definition, :system => true) installer.post_install_messages.each do |name, message| Bundler.ui.info "Post-install message from #{name}:\n#{message}" diff --git a/lib/bundler/installer.rb b/lib/bundler/installer.rb index b7b0e36dfd85fc..1b17de5d4e2d83 100644 --- a/lib/bundler/installer.rb +++ b/lib/bundler/installer.rb @@ -238,19 +238,14 @@ def load_plugins end def ensure_specs_are_compatible! - system_ruby = Bundler::RubyVersion.system - rubygems_version = Bundler.rubygems.version @definition.specs.each do |spec| - if required_ruby_version = spec.required_ruby_version - unless required_ruby_version.satisfied_by?(system_ruby.gem_version) - raise InstallError, "#{spec.full_name} requires ruby version #{required_ruby_version}, " \ - "which is incompatible with the current version, #{system_ruby}" - end + unless spec.matches_current_ruby? + raise InstallError, "#{spec.full_name} requires ruby version #{spec.required_ruby_version}, " \ + "which is incompatible with the current version, #{Gem.ruby_version}" end - next unless required_rubygems_version = spec.required_rubygems_version - unless required_rubygems_version.satisfied_by?(rubygems_version) - raise InstallError, "#{spec.full_name} requires rubygems version #{required_rubygems_version}, " \ - "which is incompatible with the current version, #{rubygems_version}" + unless spec.matches_current_rubygems? + raise InstallError, "#{spec.full_name} requires rubygems version #{spec.required_rubygems_version}, " \ + "which is incompatible with the current version, #{Gem.rubygems_version}" end end end diff --git a/lib/bundler/lazy_specification.rb b/lib/bundler/lazy_specification.rb index 9f75c7bab263a5..ec141cfa27da29 100644 --- a/lib/bundler/lazy_specification.rb +++ b/lib/bundler/lazy_specification.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "match_platform" - module Bundler class LazySpecification include MatchPlatform @@ -93,20 +91,12 @@ def materialize_for_installation __materialize__(candidates) end - def materialize_for_resolution - return self unless Gem::Platform.match_spec?(self) - - candidates = source.specs.search(self) - - __materialize__(candidates) - end - def __materialize__(candidates) @specification = begin search = candidates.reverse.find do |spec| spec.is_a?(StubSpecification) || - (spec.required_ruby_version.satisfied_by?(Gem.ruby_version) && - spec.required_rubygems_version.satisfied_by?(Gem.rubygems_version)) + (spec.matches_current_ruby? && + spec.matches_current_rubygems?) end if search.nil? && Bundler.frozen_bundle? search = candidates.last diff --git a/lib/bundler/lockfile_generator.rb b/lib/bundler/lockfile_generator.rb index 0578a93fdc4513..23413dbdd6db21 100644 --- a/lib/bundler/lockfile_generator.rb +++ b/lib/bundler/lockfile_generator.rb @@ -60,7 +60,7 @@ def add_dependencies handled = [] definition.dependencies.sort_by(&:to_s).each do |dep| next if handled.include?(dep.name) - out << dep.to_lock + out << dep.to_lock << "\n" handled << dep.name end end diff --git a/lib/bundler/match_metadata.rb b/lib/bundler/match_metadata.rb new file mode 100644 index 00000000000000..499036ca93efcf --- /dev/null +++ b/lib/bundler/match_metadata.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Bundler + module MatchMetadata + def matches_current_ruby? + @required_ruby_version.satisfied_by?(Gem.ruby_version) + end + + def matches_current_rubygems? + @required_rubygems_version.satisfied_by?(Gem.rubygems_version) + end + end +end diff --git a/lib/bundler/match_remote_metadata.rb b/lib/bundler/match_remote_metadata.rb new file mode 100644 index 00000000000000..e1b2f4d0e277f9 --- /dev/null +++ b/lib/bundler/match_remote_metadata.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Bundler + module FetchMetadata + def matches_current_ruby? + @required_ruby_version ||= _remote_specification.required_ruby_version + + super + end + + def matches_current_rubygems? + # A fallback is included because the original version of the specification + # API didn't include that field, so some marshalled specs in the index have it + # set to +nil+. + @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default + + super + end + end + + module MatchRemoteMetadata + include MatchMetadata + + prepend FetchMetadata + end +end diff --git a/lib/bundler/remote_specification.rb b/lib/bundler/remote_specification.rb index b5d7e3a6c9a61b..601957746f3af6 100644 --- a/lib/bundler/remote_specification.rb +++ b/lib/bundler/remote_specification.rb @@ -6,6 +6,7 @@ module Bundler # be seeded with what we're given from the source's abbreviated index - the # full specification will only be fetched when necessary. class RemoteSpecification + include MatchRemoteMetadata include MatchPlatform include Comparable @@ -28,13 +29,6 @@ def fetch_platform @platform = _remote_specification.platform end - # A fallback is included because the original version of the specification - # API didn't include that field, so some marshalled specs in the index have it - # set to +nil+. - def required_rubygems_version - @required_rubygems_version ||= _remote_specification.required_rubygems_version || Gem::Requirement.default - end - def full_name if @original_platform == Gem::Platform::RUBY "#{@name}-#{@version}" diff --git a/lib/bundler/resolver.rb b/lib/bundler/resolver.rb index 40bc247b32253d..a74af45027f935 100644 --- a/lib/bundler/resolver.rb +++ b/lib/bundler/resolver.rb @@ -19,40 +19,48 @@ class Resolver # collection of gemspecs is returned. Otherwise, nil is returned. def self.resolve(requirements, source_requirements = {}, base = [], gem_version_promoter = GemVersionPromoter.new, additional_base_requirements = [], platforms = nil) base = SpecSet.new(base) unless base.is_a?(SpecSet) - metadata_requirements, regular_requirements = requirements.partition {|dep| dep.name.end_with?("\0") } - resolver = new(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms, metadata_requirements) - result = resolver.start(requirements) - SpecSet.new(SpecSet.new(result).for(regular_requirements, false, platforms)) + resolver = new(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms) + resolver.start(requirements) end - def initialize(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms, metadata_requirements) + def initialize(source_requirements, base, gem_version_promoter, additional_base_requirements, platforms) @source_requirements = source_requirements - @metadata_requirements = metadata_requirements + @base = base @resolver = Molinillo::Resolver.new(self, self) + @results_for = {} @search_for = {} - @base_dg = Molinillo::DependencyGraph.new - @base = base.materialized_for_resolution do |ls| - dep = Dependency.new(ls.name, ls.version) - @base_dg.add_vertex(ls.name, DepProxy.get_proxy(dep, ls.platform), true) - end - additional_base_requirements.each {|d| @base_dg.add_vertex(d.name, d) } - @platforms = platforms.reject {|p| p != Gem::Platform::RUBY && (platforms - [p]).any? {|pl| generic(pl) == p } } + @additional_base_requirements = additional_base_requirements + @platforms = platforms @resolving_only_for_ruby = platforms == [Gem::Platform::RUBY] @gem_version_promoter = gem_version_promoter @use_gvp = Bundler.feature_flag.use_gem_version_promoter_for_major_updates? || !@gem_version_promoter.major? end - def start(requirements) + def start(requirements, exclude_specs: []) + @metadata_requirements, regular_requirements = requirements.partition {|dep| dep.name.end_with?("\0") } + + exclude_specs.each do |spec| + remove_from_candidates(spec) + end + + @base_dg = Molinillo::DependencyGraph.new + @base.each do |ls| + dep = Dependency.new(ls.name, ls.version) + @base_dg.add_vertex(ls.name, DepProxy.get_proxy(dep, ls.platform), true) + end + @additional_base_requirements.each {|d| @base_dg.add_vertex(d.name, d) } + @gem_version_promoter.prerelease_specified = @prerelease_specified = {} requirements.each {|dep| @prerelease_specified[dep.name] ||= dep.prerelease? } verify_gemfile_dependencies_are_found!(requirements) - dg = @resolver.resolve(requirements, @base_dg) - dg. + result = @resolver.resolve(requirements, @base_dg). map(&:payload). reject {|sg| sg.name.end_with?("\0") }. map(&:to_specs). flatten + + SpecSet.new(SpecSet.new(result).for(regular_requirements, false, @platforms)) rescue Molinillo::VersionConflict => e message = version_conflict_message(e) raise VersionConflict.new(e.conflicts.keys.uniq, message) @@ -176,7 +184,7 @@ def source_for(name) end def results_for(dependency) - index_for(dependency).search(dependency) + @results_for[dependency] ||= index_for(dependency).search(dependency) end def name_for(dependency) @@ -227,6 +235,19 @@ def self.platform_sort_key(platform) private + def remove_from_candidates(spec) + @base.delete(spec) + @gem_version_promoter.reset + + @results_for.keys.each do |dep| + next unless dep.name == spec.name + + @results_for[dep].reject {|s| s.name == spec.name && s.version == spec.version } + end + + @search_for = {} + end + # returns an integer \in (-\infty, 0] # a number closer to 0 means the dependency is less constraining # diff --git a/lib/bundler/resolver/spec_group.rb b/lib/bundler/resolver/spec_group.rb index 4de5b91aa6a38d..4e5b0082d3d316 100644 --- a/lib/bundler/resolver/spec_group.rb +++ b/lib/bundler/resolver/spec_group.rb @@ -97,14 +97,17 @@ def __dependencies(platform) def metadata_dependencies(platform) spec = @specs[platform].first return [] if spec.is_a?(LazySpecification) - dependencies = [] - unless spec.required_ruby_version.none? - dependencies << DepProxy.get_proxy(Dependency.new("Ruby\0", spec.required_ruby_version), platform) - end - unless spec.required_rubygems_version.none? - dependencies << DepProxy.get_proxy(Dependency.new("RubyGems\0", spec.required_rubygems_version), platform) - end - dependencies + + [ + metadata_dependency("Ruby", spec.required_ruby_version, platform), + metadata_dependency("RubyGems", spec.required_rubygems_version, platform), + ].compact + end + + def metadata_dependency(name, requirement, platform) + return if requirement.nil? || requirement.none? + + DepProxy.get_proxy(Dependency.new("#{name}\0", requirement), platform) end end end diff --git a/lib/bundler/ruby_dsl.rb b/lib/bundler/ruby_dsl.rb index f6ba220cd55904..3b3a0583a5bc85 100644 --- a/lib/bundler/ruby_dsl.rb +++ b/lib/bundler/ruby_dsl.rb @@ -9,7 +9,7 @@ def ruby(*ruby_version) raise GemfileError, "Please define :engine" if options[:engine_version] && options[:engine].nil? if options[:engine] == "ruby" && options[:engine_version] && - ruby_version != Array(options[:engine_version]) + ruby_version != Array(options[:engine_version]) raise GemfileEvalError, "ruby_version must match the :engine_version for MRI" end @ruby_version = RubyVersion.new(ruby_version, options[:patchlevel], options[:engine], options[:engine_version]) diff --git a/lib/bundler/rubygems_ext.rb b/lib/bundler/rubygems_ext.rb index 938c58e64d855d..9b8455d0fca748 100644 --- a/lib/bundler/rubygems_ext.rb +++ b/lib/bundler/rubygems_ext.rb @@ -15,6 +15,7 @@ # `Gem::Source` from the redefined `Gem::Specification#source`. require "rubygems/source" +require_relative "match_metadata" require_relative "match_platform" # Cherry-pick fixes to `Gem.ruby_version` to be useful for modern Bundler @@ -28,6 +29,7 @@ module Gem class Specification + include ::Bundler::MatchMetadata include ::Bundler::MatchPlatform attr_accessor :remote, :location, :relative_loaded_from @@ -235,6 +237,33 @@ class Platform MINGW = Gem::Platform.new("x86-mingw32") X64_MINGW = [Gem::Platform.new("x64-mingw32"), Gem::Platform.new("x64-mingw-ucrt")].freeze + WINDOWS = [MSWIN, MSWIN64, MINGW, X64_MINGW].flatten.freeze + + if Gem::Platform.new("x86_64-linux-musl") === Gem::Platform.new("x86_64-linux") + remove_method :=== + + def ===(other) + return nil unless Gem::Platform === other + + # universal-mingw32 matches x64-mingw-ucrt + return true if (@cpu == "universal" || other.cpu == "universal") && + @os.start_with?("mingw") && other.os.start_with?("mingw") + + # cpu + ([nil,"universal"].include?(@cpu) || [nil, "universal"].include?(other.cpu) || @cpu == other.cpu || + (@cpu == "arm" && other.cpu.start_with?("arm"))) && + + # os + @os == other.os && + + # version + ( + (@os != "linux" && (@version.nil? || other.version.nil?)) || + (@os == "linux" && (other.version == "gnu#{@version}" || other.version == "musl#{@version}" || @version == "gnu#{other.version}")) || + @version == other.version + ) + end + end end Platform.singleton_class.module_eval do diff --git a/lib/bundler/source_list.rb b/lib/bundler/source_list.rb index a4773397c7df4c..6ea2910d185203 100644 --- a/lib/bundler/source_list.rb +++ b/lib/bundler/source_list.rb @@ -101,6 +101,10 @@ def get(source) source_list_for(source).find {|s| equivalent_source?(source, s) } end + def get_with_fallback(source) + get(source) || default_source + end + def lock_sources lock_other_sources + lock_rubygems_sources end diff --git a/lib/bundler/spec_set.rb b/lib/bundler/spec_set.rb index 735cdac126c916..4965ca9e60c51b 100644 --- a/lib/bundler/spec_set.rb +++ b/lib/bundler/spec_set.rb @@ -7,8 +7,11 @@ class SpecSet include Enumerable include TSort - def initialize(specs) + attr_reader :incomplete_specs + + def initialize(specs, incomplete_specs = []) @specs = specs + @incomplete_specs = incomplete_specs end def for(dependencies, check = false, platforms = [nil]) @@ -19,7 +22,10 @@ def for(dependencies, check = false, platforms = [nil]) loop do break unless dep = deps.shift - key = [dep[0].name, dep[1]] + name = dep[0].name + platform = dep[1] + + key = [name, platform] next if handled.key?(key) handled[key] = true @@ -33,7 +39,7 @@ def for(dependencies, check = false, platforms = [nil]) deps << [d, dep[1]] end elsif check - specs << IncompleteSpecification.new(*key) + @incomplete_specs += lookup[name] end end @@ -51,6 +57,12 @@ def []=(key, value) @sorted = nil end + def delete(spec) + @specs.delete(spec) + @lookup = nil + @sorted = nil + end + def sort! self end @@ -66,7 +78,7 @@ def to_hash def materialize(deps) materialized = self.for(deps, true) - SpecSet.new(materialized) + SpecSet.new(materialized, incomplete_specs) end # Materialize for all the specs in the spec set, regardless of what platform they're for @@ -82,23 +94,16 @@ def materialized_for_all_platforms end end - def materialized_for_resolution - materialized = @specs.map do |s| - spec = s.materialize_for_resolution - yield spec if spec - spec - end.compact - SpecSet.new(materialized) + def incomplete_ruby_specs?(deps) + self.for(deps, true, [Gem::Platform::RUBY]) + + @incomplete_specs.any? end def missing_specs @specs.select {|s| s.is_a?(LazySpecification) } end - def incomplete_specs - @specs.select {|s| s.is_a?(IncompleteSpecification) } - end - def merge(set) arr = sorted.dup set.each do |set_spec| diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 55e61bf984fa67..5a5c77ac9764dd 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -5,24 +5,57 @@ module Util; end extend Util end module CGI::Util - @@accept_charset="UTF-8" unless defined?(@@accept_charset) - # URL-encode a string. + @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) + + # URL-encode a string into application/x-www-form-urlencoded. + # Space characters (+" "+) are encoded with plus signs (+"+"+) # url_encoded_string = CGI.escape("'Stop!' said Fred") # # => "%27Stop%21%27+said+Fred" def escape(string) encoding = string.encoding - string.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) do |m| + buffer = string.b + buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end.tr(' ', '+').force_encoding(encoding) + end + buffer.tr!(' ', '+') + buffer.force_encoding(encoding) end - # URL-decode a string with encoding(optional). + # URL-decode an application/x-www-form-urlencoded string with encoding(optional). # string = CGI.unescape("%27Stop%21%27+said+Fred") # # => "'Stop!' said Fred" - def unescape(string,encoding=@@accept_charset) - str=string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m| + def unescape(string, encoding = @@accept_charset) + str = string.tr('+', ' ') + str = str.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # URL-encode a string following RFC 3986 + # Space characters (+" "+) are encoded with (+"%20"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27%20said%20Fred" + def escapeURIComponent(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.force_encoding(encoding) + end + + # URL-decode a string following RFC 3986 with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said%20Fred") + # # => "'Stop!'+said Fred" + def unescapeURIComponent(string, encoding = @@accept_charset) + str = string.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| [m.delete('%')].pack('H*') - end.force_encoding(encoding) + end + str.force_encoding(encoding) str.valid_encoding? ? str : str.force_encoding(string.encoding) end diff --git a/lib/error_highlight/base.rb b/lib/error_highlight/base.rb index 8392979e245784..4c115cc8285c6b 100644 --- a/lib/error_highlight/base.rb +++ b/lib/error_highlight/base.rb @@ -1,12 +1,17 @@ require_relative "version" module ErrorHighlight - # Identify the code fragment that seems associated with a given error + # Identify the code fragment at that a given exception occurred. # - # Arguments: - # node: RubyVM::AbstractSyntaxTree::Node (script_lines should be enabled) - # point_type: :name | :args - # name: The name associated with the NameError/NoMethodError + # Options: + # + # point_type: :name | :args + # :name (default) points the method/variable name that the exception occurred. + # :args points the arguments of the method call that the exception occurred. + # + # backtrace_location: Thread::Backtrace::Location + # It locates the code fragment of the given backtrace_location. + # By default, it uses the first frame of backtrace_locations of the given exception. # # Returns: # { @@ -15,9 +20,57 @@ module ErrorHighlight # last_lineno: Integer, # last_column: Integer, # snippet: String, + # script_lines: [String], # } | nil - def self.spot(...) - Spotter.new(...).spot + # + # Limitations: + # + # Currently, ErrorHighlight.spot only supports a single-line code fragment. + # Therefore, if the return value is not nil, first_lineno and last_lineno will have + # the same value. If the relevant code fragment spans multiple lines + # (e.g., Array#[] of +ary[(newline)expr(newline)]+), the method will return nil. + # This restriction may be removed in the future. + def self.spot(obj, **opts) + case obj + when Exception + exc = obj + loc = opts[:backtrace_location] + opts = { point_type: opts.fetch(:point_type, :name) } + + unless loc + case exc + when TypeError, ArgumentError + opts[:point_type] = :args + end + + locs = exc.backtrace_locations + return nil unless locs + + loc = locs.first + return nil unless loc + + opts[:name] = exc.name if NameError === obj + end + + return nil unless Thread::Backtrace::Location === loc + + node = RubyVM::AbstractSyntaxTree.of(loc, keep_script_lines: true) + + Spotter.new(node, **opts).spot + + when RubyVM::AbstractSyntaxTree::Node + # Just for compatibility + Spotter.new(node, **opts).spot + + else + raise TypeError, "Exception is expected" + end + + rescue SyntaxError, + SystemCallError, # file not found or something + ArgumentError # eval'ed code + + return nil end class Spotter @@ -122,6 +175,7 @@ def spot last_lineno: @end_lineno, last_column: @end_column, snippet: @snippet, + script_lines: @node.script_lines, } else return nil diff --git a/lib/error_highlight/core_ext.rb b/lib/error_highlight/core_ext.rb index 53e409dd8f6f8c..00d567164846c8 100644 --- a/lib/error_highlight/core_ext.rb +++ b/lib/error_highlight/core_ext.rb @@ -3,36 +3,9 @@ module ErrorHighlight module CoreExt private def generate_snippet - locs = backtrace_locations - return "" unless locs - - loc = locs.first - return "" unless loc - - begin - node = RubyVM::AbstractSyntaxTree.of(loc, keep_script_lines: true) - opts = {} - - case self - when NoMethodError, NameError - opts[:point_type] = :name - opts[:name] = name - when TypeError, ArgumentError - opts[:point_type] = :args - end - - spot = ErrorHighlight.spot(node, **opts) - - rescue SyntaxError - rescue SystemCallError # file not found or something - rescue ArgumentError # eval'ed code - end - - if spot - return ErrorHighlight.formatter.message_for(spot) - end - - "" + spot = ErrorHighlight.spot(self) + return "" unless spot + return ErrorHighlight.formatter.message_for(spot) end if Exception.method_defined?(:detailed_message) @@ -64,9 +37,6 @@ def to_s end NameError.prepend(CoreExt) - - # The extension for TypeError/ArgumentError is temporarily disabled due to many test failures - - #TypeError.prepend(CoreExt) - #ArgumentError.prepend(CoreExt) + TypeError.prepend(CoreExt) + ArgumentError.prepend(CoreExt) end diff --git a/lib/error_highlight/version.rb b/lib/error_highlight/version.rb index 49a34502cb923a..4279b6d05fc9ff 100644 --- a/lib/error_highlight/version.rb +++ b/lib/error_highlight/version.rb @@ -1,3 +1,3 @@ module ErrorHighlight - VERSION = "0.3.0" + VERSION = "0.4.0" end diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 7eb66dda0c83a3..74bb904e28143a 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -374,7 +374,7 @@ def mkdir_p(list, mode: nil, noop: nil, verbose: nil) path = remove_trailing_slash(item) stack = [] - until File.directory?(path) + until File.directory?(path) || File.dirname(path) == path stack.push path path = File.dirname(path) end @@ -1165,7 +1165,7 @@ def mv(src, dest, force: nil, noop: nil, verbose: nil, secure: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of StandardError + # - force: true - ignores raised exceptions of Errno::ENOENT # and its descendants. # - noop: true - does not remove files; returns +nil+. # - verbose: true - prints an equivalent command: @@ -1248,7 +1248,7 @@ def rm_f(list, noop: nil, verbose: nil) # # Keyword arguments: # - # - force: true - ignores raised exceptions of StandardError + # - force: true - ignores raised exceptions of Errno::ENOENT # and its descendants. # - noop: true - does not remove entries; returns +nil+. # - secure: true - removes +src+ securely; @@ -1315,7 +1315,7 @@ def rm_rf(list, noop: nil, verbose: nil, secure: nil) # see {Avoiding the TOCTTOU Vulnerability}[rdoc-ref:FileUtils@Avoiding+the+TOCTTOU+Vulnerability]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1384,10 +1384,12 @@ def remove_entry_secure(path, force = false) ent.remove rescue raise unless force + raise unless Errno::ENOENT === $! end end rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_entry_secure @@ -1413,7 +1415,7 @@ def fu_stat_identical_entry?(a, b) #:nodoc: # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: FileUtils.remove_entry_secure. # @@ -1423,10 +1425,12 @@ def remove_entry(path, force = false) ent.remove rescue raise unless force + raise unless Errno::ENOENT === $! end end rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_entry @@ -1437,7 +1441,7 @@ def remove_entry(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -1445,6 +1449,7 @@ def remove_file(path, force = false) Entry_.new(path).remove_file rescue raise unless force + raise unless Errno::ENOENT === $! end module_function :remove_file @@ -1456,7 +1461,7 @@ def remove_file(path, force = false) # should be {interpretable as a path}[rdoc-ref:FileUtils@Path+Arguments]. # # Optional argument +force+ specifies whether to ignore - # raised exceptions of StandardError and its descendants. + # raised exceptions of Errno::ENOENT and its descendants. # # Related: {methods for deleting}[rdoc-ref:FileUtils@Deleting]. # @@ -2328,13 +2333,21 @@ def preorder_traverse def postorder_traverse if directory? - entries().each do |ent| + begin + children = entries() + rescue Errno::EACCES + # Failed to get the list of children. + # Assuming there is no children, try to process the parent directory. + yield self + return + end + + children.each do |ent| ent.postorder_traverse do |e| yield e end end end - ensure yield self end diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb index fd68239ee38d61..b77fd3207def87 100644 --- a/lib/irb/input-method.rb +++ b/lib/irb/input-method.rb @@ -14,7 +14,6 @@ require_relative 'completion' require 'io/console' require 'reline' -require 'rdoc' module IRB STDIN_FILE_NAME = "(line)" # :nodoc: @@ -321,6 +320,11 @@ def auto_indent(&block) [195, 164], # The "ä" that appears when Alt+d is pressed on xterm. [226, 136, 130] # The "∂" that appears when Alt+d in pressed on iTerm2. ] + begin + require 'rdoc' + rescue LoadError + return nil + end if just_cursor_moving and completion_journey_data.nil? return nil diff --git a/lib/net/protocol.rb b/lib/net/protocol.rb index 822bc00574500a..c676854b672112 100644 --- a/lib/net/protocol.rb +++ b/lib/net/protocol.rb @@ -120,6 +120,7 @@ def initialize(io, read_timeout: 60, write_timeout: 60, continue_timeout: nil, d @continue_timeout = continue_timeout @debug_output = debug_output @rbuf = ''.b + @rbuf_offset = 0 end attr_reader :io @@ -154,14 +155,15 @@ def read(len, dest = ''.b, ignore_eof = false) LOG "reading #{len} bytes..." read_bytes = 0 begin - while read_bytes + @rbuf.size < len - s = rbuf_consume(@rbuf.size) - read_bytes += s.size - dest << s + while read_bytes + rbuf_size < len + if s = rbuf_consume_all_shareable! + read_bytes += s.bytesize + dest << s + end rbuf_fill end s = rbuf_consume(len - read_bytes) - read_bytes += s.size + read_bytes += s.bytesize dest << s rescue EOFError raise unless ignore_eof @@ -175,9 +177,10 @@ def read_all(dest = ''.b) read_bytes = 0 begin while true - s = rbuf_consume(@rbuf.size) - read_bytes += s.size - dest << s + if s = rbuf_consume_all_shareable! + read_bytes += s.bytesize + dest << s + end rbuf_fill end rescue EOFError @@ -188,14 +191,16 @@ def read_all(dest = ''.b) end def readuntil(terminator, ignore_eof = false) + offset = @rbuf_offset begin - until idx = @rbuf.index(terminator) + until idx = @rbuf.index(terminator, offset) + offset = @rbuf.bytesize rbuf_fill end - return rbuf_consume(idx + terminator.size) + return rbuf_consume(idx + terminator.bytesize - @rbuf_offset) rescue EOFError raise unless ignore_eof - return rbuf_consume(@rbuf.size) + return rbuf_consume end end @@ -208,12 +213,16 @@ def readline BUFSIZE = 1024 * 16 def rbuf_fill - tmp = @rbuf.empty? ? @rbuf : nil + tmp = @rbuf_empty ? @rbuf : nil case rv = @io.read_nonblock(BUFSIZE, tmp, exception: false) when String - return if rv.equal?(tmp) - @rbuf << rv - rv.clear + @rbuf_empty = false + if rv.equal?(tmp) + @rbuf_offset = 0 + else + @rbuf << rv + rv.clear + end return when :wait_readable (io = @io.to_io).wait_readable(@read_timeout) or raise Net::ReadTimeout.new(io) @@ -228,13 +237,50 @@ def rbuf_fill end while true end - def rbuf_consume(len) - if len == @rbuf.size + def rbuf_flush + if @rbuf_empty + @rbuf.clear + @rbuf_offset = 0 + end + nil + end + + def rbuf_size + @rbuf.bytesize - @rbuf_offset + end + + # Warning: this method may share the buffer to avoid + # copying. The caller must no longer use the returned + # string once rbuf_fill has been called again + def rbuf_consume_all_shareable! + @rbuf_empty = true + buf = if @rbuf_offset == 0 + @rbuf + else + @rbuf.byteslice(@rbuf_offset..-1) + end + @rbuf_offset = @rbuf.bytesize + buf + end + + def rbuf_consume(len = nil) + if @rbuf_offset == 0 && (len.nil? || len == @rbuf.bytesize) s = @rbuf @rbuf = ''.b + @rbuf_offset = 0 + @rbuf_empty = true + elsif len.nil? + s = @rbuf.byteslice(@rbuf_offset..-1) + @rbuf = ''.b + @rbuf_offset = 0 + @rbuf_empty = true else - s = @rbuf.slice!(0, len) + s = @rbuf.byteslice(@rbuf_offset, len) + @rbuf_offset += len + @rbuf_empty = @rbuf_offset == @rbuf.bytesize + rbuf_flush end + @debug_output << %Q[-> #{s.dump}\n] if @debug_output s end diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb index 25a7217d3c6cd2..26b32f9b73aff4 100644 --- a/lib/rdoc/markdown.rb +++ b/lib/rdoc/markdown.rb @@ -811,7 +811,7 @@ def parse markdown @note_order.each_with_index do |ref, index| label = index + 1 - note = @footnotes[ref] + note = @footnotes[ref] or raise ParseError, "footnote [^#{ref}] not found" link = "{^#{label}}[rdoc-label:footmark-#{label}:foottext-#{label}] " note.parts.unshift link @@ -15533,7 +15533,7 @@ def _Notes return _tmp end - # RawNoteBlock = @StartList:a (!@BlankLine OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a } + # RawNoteBlock = @StartList:a (!@BlankLine !RawNoteReference OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a } def _RawNoteBlock _save = self.pos @@ -15556,6 +15556,14 @@ def _RawNoteBlock self.pos = _save2 break end + _save4 = self.pos + _tmp = apply(:_RawNoteReference) + _tmp = _tmp ? nil : true + self.pos = _save4 + unless _tmp + self.pos = _save2 + break + end _tmp = apply(:_OptionallyIndentedLine) l = @result unless _tmp @@ -15573,26 +15581,34 @@ def _RawNoteBlock if _tmp while true - _save4 = self.pos + _save5 = self.pos while true # sequence - _save5 = self.pos + _save6 = self.pos _tmp = _BlankLine() _tmp = _tmp ? nil : true - self.pos = _save5 + self.pos = _save6 unless _tmp - self.pos = _save4 + self.pos = _save5 + break + end + _save7 = self.pos + _tmp = apply(:_RawNoteReference) + _tmp = _tmp ? nil : true + self.pos = _save7 + unless _tmp + self.pos = _save5 break end _tmp = apply(:_OptionallyIndentedLine) l = @result unless _tmp - self.pos = _save4 + self.pos = _save5 break end @result = begin; a << l ; end _tmp = true unless _tmp - self.pos = _save4 + self.pos = _save5 end break end # end sequence @@ -16656,7 +16672,7 @@ def _DefinitionListDefinition Rules[:_Note] = rule_info("Note", "&{ notes? } @NonindentSpace RawNoteReference:ref \":\" @Sp @StartList:a RawNoteBlock:i { a.concat i } (&Indent RawNoteBlock:i { a.concat i })* { @footnotes[ref] = paragraph a nil }") Rules[:_InlineNote] = rule_info("InlineNote", "&{ notes? } \"^[\" @StartList:a (!\"]\" Inline:l { a << l })+ \"]\" { ref = [:inline, @note_order.length] @footnotes[ref] = paragraph a note_for ref }") Rules[:_Notes] = rule_info("Notes", "(Note | SkipBlock)*") - Rules[:_RawNoteBlock] = rule_info("RawNoteBlock", "@StartList:a (!@BlankLine OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a }") + Rules[:_RawNoteBlock] = rule_info("RawNoteBlock", "@StartList:a (!@BlankLine !RawNoteReference OptionallyIndentedLine:l { a << l })+ < @BlankLine* > { a << text } { a }") Rules[:_CodeFence] = rule_info("CodeFence", "&{ github? } Ticks3 (@Sp StrChunk:format)? Spnl < ((!\"`\" Nonspacechar)+ | !Ticks3 /`+/ | Spacechar | @Newline)+ > Ticks3 @Sp @Newline* { verbatim = RDoc::Markup::Verbatim.new text verbatim.format = format.intern if format.instance_of?(String) verbatim }") Rules[:_Table] = rule_info("Table", "&{ github? } TableRow:header TableLine:line TableRow+:body { table = RDoc::Markup::Table.new(header, line, body) }") Rules[:_TableRow] = rule_info("TableRow", "TableItem+:row \"|\" @Newline { row }") diff --git a/lib/rdoc/markup.rb b/lib/rdoc/markup.rb index 7fec1c686970cf..6e9303096509b0 100644 --- a/lib/rdoc/markup.rb +++ b/lib/rdoc/markup.rb @@ -99,355 +99,6 @@ # # See RDoc::MarkupReference. # -# === Escaping Text Markup -# -# Text markup can be escaped with a backslash, as in \, which was obtained -# with \\. Except in verbatim sections and between \ tags, -# to produce a backslash you have to double it unless it is followed by a -# space, tab or newline. Otherwise, the HTML formatter will discard it, as it -# is used to escape potential links: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org]. -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org]. -# * This will not be linked to \RDoc::RDoc#document -# -# generates: -# -# * The \ must be doubled if not followed by white space: \\. -# * But not in \ tags: in a Regexp, \S matches non-space. -# * This is a link to {ruby-lang}[https://www.ruby-lang.org] -# * This is not a link, however: \{ruby-lang.org}[https://www.ruby-lang.org] -# * This will not be linked to \RDoc::RDoc#document -# -# Inside \ tags, more precisely, leading backslashes are removed only if -# followed by a markup character (<*_+), a backslash, or a known link -# reference (a known class or method). So in the example above, the backslash -# of \S would be removed if there was a class or module named +S+ in -# the current context. -# -# This behavior is inherited from RDoc version 1, and has been kept for -# compatibility with existing RDoc documentation. -# -# === Conversion of characters -# -# HTML will convert two/three dashes to an em-dash. Other common characters are -# converted as well: -# -# em-dash:: -- or --- -# ellipsis:: ... -# -# single quotes:: 'text' or `text' -# double quotes:: "text" or ``text'' -# -# copyright:: (c) -# registered trademark:: (r) -# -# produces: -# -# em-dash:: -- or --- -# ellipsis:: ... -# -# single quotes:: 'text' or `text' -# double quotes:: "text" or ``text'' -# -# copyright:: (c) -# registered trademark:: (r) -# -# -# == Documenting Source Code -# -# Comment blocks can be written fairly naturally, either using # on -# successive lines of the comment, or by including the comment in -# a =begin/=end block. If you use the latter form, -# the =begin line _must_ be flagged with an +rdoc+ tag: -# -# =begin rdoc -# Documentation to be processed by RDoc. -# -# ... -# =end -# -# RDoc stops processing comments if it finds a comment line starting -# with -- right after the # character (otherwise, -# it will be treated as a rule if it has three dashes or more). -# This can be used to separate external from internal comments, -# or to stop a comment being associated with a method, class, or module. -# Commenting can be turned back on with a line that starts with ++. -# -# ## -# # Extract the age and calculate the date-of-birth. -# #-- -# # FIXME: fails if the birthday falls on February 29th -# #++ -# # The DOB is returned as a Time object. -# -# def get_dob(person) -# # ... -# end -# -# Names of classes, files, and any method names containing an underscore or -# preceded by a hash character are automatically linked from comment text to -# their description. This linking works inside the current class or module, -# and with ancestor methods (in included modules or in the superclass). -# -# Method parameter lists are extracted and displayed with the method -# description. If a method calls +yield+, then the parameters passed to yield -# will also be displayed: -# -# def fred -# ... -# yield line, address -# -# This will get documented as: -# -# fred() { |line, address| ... } -# -# You can override this using a comment containing ':yields: ...' immediately -# after the method definition -# -# def fred # :yields: index, position -# # ... -# -# yield line, address -# -# which will get documented as -# -# fred() { |index, position| ... } -# -# +:yields:+ is an example of a documentation directive. These appear -# immediately after the start of the document element they are modifying. -# -# RDoc automatically cross-references words with underscores or camel-case. -# To suppress cross-references, prefix the word with a \ character. To -# include special characters like "\n", you'll need to use -# two \ characters in normal text, but only one in \ text: -# -# "\\n" or "\n" -# -# produces: -# -# "\\n" or "\n" -# -# == Directives -# -# Directives are keywords surrounded by ":" characters. -# -# === Controlling what is documented -# -# [+:nodoc:+ / :nodoc: all] -# This directive prevents documentation for the element from -# being generated. For classes and modules, methods, aliases, -# constants, and attributes directly within the affected class or -# module also will be omitted. By default, though, modules and -# classes within that class or module _will_ be documented. This is -# turned off by adding the +all+ modifier. -# -# module MyModule # :nodoc: -# class Input -# end -# end -# -# module OtherModule # :nodoc: all -# class Output -# end -# end -# -# In the above code, only class MyModule::Input will be documented. -# -# The +:nodoc:+ directive, like +:enddoc:+, +:stopdoc:+ and +:startdoc:+ -# presented below, is local to the current file: if you do not want to -# document a module that appears in several files, specify +:nodoc:+ on each -# appearance, at least once per file. -# -# [+:stopdoc:+ / +:startdoc:+] -# Stop and start adding new documentation elements to the current container. -# For example, if a class has a number of constants that you don't want to -# document, put a +:stopdoc:+ before the first, and a +:startdoc:+ after the -# last. If you don't specify a +:startdoc:+ by the end of the container, -# disables documentation for the rest of the current file. -# -# [+:doc:+] -# Forces a method or attribute to be documented even if it wouldn't be -# otherwise. Useful if, for example, you want to include documentation of a -# particular private method. -# -# [+:enddoc:+] -# Document nothing further at the current level: directives +:startdoc:+ and -# +:doc:+ that appear after this will not be honored for the current container -# (file, class or module), in the current file. -# -# [+:notnew:+ / +:not_new:+ / +:not-new:+ ] -# Only applicable to the +initialize+ instance method. Normally RDoc -# assumes that the documentation and parameters for +initialize+ are -# actually for the +new+ method, and so fakes out a +new+ for the class. -# The +:notnew:+ directive stops this. Remember that +initialize+ is private, -# so you won't see the documentation unless you use the +-a+ command line -# option. -# -# === Method arguments -# -# [+:arg:+ or +:args:+ _parameters_] -# Overrides the default argument handling with exactly these parameters. -# -# ## -# # :args: a, b -# -# def some_method(*a) -# end -# -# [+:yield:+ or +:yields:+ _parameters_] -# Overrides the default yield discovery with these parameters. -# -# ## -# # :yields: key, value -# -# def each_thing &block -# @things.each(&block) -# end -# -# [+:call-seq:+] -# Lines up to the next blank line or lines with a common prefix in the -# comment are treated as the method's calling sequence, overriding the -# default parsing of method parameters and yield arguments. -# -# Multiple lines may be used. -# -# # :call-seq: -# # ARGF.readlines(sep=$/) -> array -# # ARGF.readlines(limit) -> array -# # ARGF.readlines(sep, limit) -> array -# # -# # ARGF.to_a(sep=$/) -> array -# # ARGF.to_a(limit) -> array -# # ARGF.to_a(sep, limit) -> array -# # -# # The remaining lines are documentation ... -# -# === Sections -# -# Sections allow you to group methods in a class into sensible containers. If -# you use the sections 'Public', 'Internal' and 'Deprecated' (the three -# allowed method statuses from TomDoc) the sections will be displayed in that -# order placing the most useful methods at the top. Otherwise, sections will -# be displayed in alphabetical order. -# -# [+:category:+ _section_] -# Adds this item to the named +section+ overriding the current section. Use -# this to group methods by section in RDoc output while maintaining a -# sensible ordering (like alphabetical). -# -# # :category: Utility Methods -# # -# # CGI escapes +text+ -# -# def convert_string text -# CGI.escapeHTML text -# end -# -# An empty category will place the item in the default category: -# -# # :category: -# # -# # This method is in the default category -# -# def some_method -# # ... -# end -# -# Unlike the :section: directive, :category: is not sticky. The category -# only applies to the item immediately following the comment. -# -# Use the :section: directive to provide introductory text for a section of -# documentation. -# -# [+:section:+ _title_] -# Provides section introductory text in RDoc output. The title following -# +:section:+ is used as the section name and the remainder of the comment -# containing the section is used as introductory text. A section's comment -# block must be separated from following comment blocks. Use an empty title -# to switch to the default section. -# -# The :section: directive is sticky, so subsequent methods, aliases, -# attributes, and classes will be contained in this section until the -# section is changed. The :category: directive will override the :section: -# directive. -# -# A :section: comment block may have one or more lines before the :section: -# directive. These will be removed, and any identical lines at the end of -# the block are also removed. This allows you to add visual cues to the -# section. -# -# Example: -# -# # ---------------------------------------- -# # :section: My Section -# # This is the section that I wrote. -# # See it glisten in the noon-day sun. -# # ---------------------------------------- -# -# ## -# # Comment for some_method -# -# def some_method -# # ... -# end -# -# === Other directives -# -# [+:markup:+ _type_] -# Overrides the default markup type for this comment with the specified -# markup type. For Ruby files, if the first comment contains this directive -# it is applied automatically to all comments in the file. -# -# Unless you are converting between markup formats you should use a -# .rdoc_options file to specify the default documentation -# format for your entire project. See RDoc::Options@Saved+Options for -# instructions. -# -# At the top of a file the +:markup:+ directive applies to the entire file: -# -# # coding: UTF-8 -# # :markup: TomDoc -# -# # TomDoc comment here ... -# -# class MyClass -# # ... -# -# For just one comment: -# -# # ... -# end -# -# # :markup: RDoc -# # -# # This is a comment in RDoc markup format ... -# -# def some_method -# # ... -# -# See Markup@CONTRIBUTING for instructions on adding a new markup format. -# -# [+:include:+ _filename_] -# Include the contents of the named file at this point. This directive -# must appear alone on one line, possibly preceded by spaces. In this -# position, it can be escaped with a \ in front of the first colon. -# -# The file will be searched for in the directories listed by the +--include+ -# option, or in the current directory by default. The contents of the file -# will be shifted to have the same indentation as the ':' at the start of -# the +:include:+ directive. -# -# [+:title:+ _text_] -# Sets the title for the document. Equivalent to the --title -# command line parameter. (The command line parameter overrides any :title: -# directive in the source). -# -# [+:main:+ _name_] -# Equivalent to the --main command line parameter. -# #-- # Original Author:: Dave Thomas, dave@pragmaticprogrammer.com # License:: Ruby license diff --git a/lib/reline.rb b/lib/reline.rb index 0487232a0d6246..f22b573e6d5539 100644 --- a/lib/reline.rb +++ b/lib/reline.rb @@ -601,24 +601,21 @@ def self.line_editor end require 'reline/general_io' -if RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince|emc/ - require 'reline/windows' - if Reline::Windows.msys_tty? - Reline::IOGate = if ENV['TERM'] == 'dumb' - Reline::GeneralIO - else - require 'reline/ansi' - Reline::ANSI - end +io = Reline::GeneralIO +unless ENV['TERM'] == 'dumb' + case RbConfig::CONFIG['host_os'] + when /mswin|msys|mingw|cygwin|bccwin|wince|emc/ + require 'reline/windows' + tty = (io = Reline::Windows).msys_tty? else - Reline::IOGate = Reline::Windows + tty = $stdout.tty? end +end +Reline::IOGate = if tty + require 'reline/ansi' + Reline::ANSI else - Reline::IOGate = if $stdout.isatty - require 'reline/ansi' - Reline::ANSI - else - Reline::GeneralIO - end + io end + Reline::HISTORY = Reline::History.new(Reline.core.config) diff --git a/lib/reline/line_editor.rb b/lib/reline/line_editor.rb index 8d0719ef7ce9e9..af5240547ab810 100644 --- a/lib/reline/line_editor.rb +++ b/lib/reline/line_editor.rb @@ -758,7 +758,6 @@ def add_dialog_proc(name, p, context = nil) @output.write @full_block elsif dialog.scrollbar_pos <= (i * 2) and (i * 2) < (dialog.scrollbar_pos + bar_height) @output.write @upper_half_block - str += '' elsif dialog.scrollbar_pos <= (i * 2 + 1) and (i * 2) < (dialog.scrollbar_pos + bar_height) @output.write @lower_half_block else diff --git a/lib/rubygems.rb b/lib/rubygems.rb index b21f00acc74b90..915a899f38edfb 100644 --- a/lib/rubygems.rb +++ b/lib/rubygems.rb @@ -121,7 +121,7 @@ module Gem # When https://bugs.ruby-lang.org/issues/17259 is available, there is no need to override Kernel#warn KERNEL_WARN_IGNORES_INTERNAL_ENTRIES = RUBY_ENGINE == "truffleruby" || - (RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.0") + (RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.0") ## # An Array of Regexps that match windows Ruby platforms. @@ -741,8 +741,8 @@ def self.pre_uninstall(&hook) def self.prefix prefix = File.dirname RUBYGEMS_DIR - if prefix != File.expand_path(RbConfig::CONFIG["sitelibdir"]) and - prefix != File.expand_path(RbConfig::CONFIG["libdir"]) and + if prefix != File.expand_path(RbConfig::CONFIG["sitelibdir"]) && + prefix != File.expand_path(RbConfig::CONFIG["libdir"]) && "lib" == File.basename(RUBYGEMS_DIR) prefix end @@ -845,8 +845,8 @@ def self.latest_spec_for(name) # Returns the latest release version of RubyGems. def self.latest_rubygems_version - latest_version_for("rubygems-update") or - raise "Can't find 'rubygems-update' in any repo. Check `gem source list`." + latest_version_for("rubygems-update") || + raise("Can't find 'rubygems-update' in any repo. Check `gem source list`.") end ## @@ -854,7 +854,7 @@ def self.latest_rubygems_version def self.latest_version_for(name) spec = latest_spec_for name - spec and spec.version + spec && spec.version end ## @@ -944,7 +944,7 @@ def self.suffixes ".rb", *%w[DLEXT DLEXT2].map do |key| val = RbConfig::CONFIG[key] - next unless val and not val.empty? + next unless val && !val.empty? ".#{val}" end, ].compact.uniq diff --git a/lib/rubygems/available_set.rb b/lib/rubygems/available_set.rb index 499483d9e94cd4..58b601f6b09dcf 100644 --- a/lib/rubygems/available_set.rb +++ b/lib/rubygems/available_set.rb @@ -26,7 +26,7 @@ def <<(o) s = o.set when Array s = o.map do |sp,so| - if !sp.kind_of?(Gem::Specification) or !so.kind_of?(Gem::Source) + if !sp.kind_of?(Gem::Specification) || !so.kind_of?(Gem::Source) raise TypeError, "Array must be in [[spec, source], ...] form" end @@ -149,8 +149,8 @@ def remove_installed!(dep) @set.reject! do |t| # already locally installed Gem::Specification.any? do |installed_spec| - dep.name == installed_spec.name and - dep.requirement.satisfied_by? installed_spec.version + dep.name == installed_spec.name && + dep.requirement.satisfied_by?(installed_spec.version) end end diff --git a/lib/rubygems/basic_specification.rb b/lib/rubygems/basic_specification.rb index 526a5069c24227..dcc64e64096e73 100644 --- a/lib/rubygems/basic_specification.rb +++ b/lib/rubygems/basic_specification.rb @@ -77,7 +77,7 @@ def contains_requirable_file?(file) if Gem::Platform::RUBY == platform || Gem::Platform.local === platform warn "Ignoring #{full_name} because its extensions are not built. " + - "Try: gem pristine #{name} --version #{version}" + "Try: gem pristine #{name} --version #{version}" end return false @@ -131,7 +131,7 @@ def full_gem_path # default Ruby platform. def full_name - if platform == Gem::Platform::RUBY or platform.nil? + if platform == Gem::Platform::RUBY || platform.nil? "#{name}-#{version}".dup.tap(&Gem::UNTAINT) else "#{name}-#{version}-#{platform}".dup.tap(&Gem::UNTAINT) diff --git a/lib/rubygems/command.rb b/lib/rubygems/command.rb index 9fc3360fa13cc8..badc21023af10c 100644 --- a/lib/rubygems/command.rb +++ b/lib/rubygems/command.rb @@ -159,11 +159,11 @@ def show_lookup_failure(gem_name, version, errors, suppress_suggestions = false, gem = "'#{gem_name}' (#{version})" msg = String.new "Could not find a valid gem #{gem}" - if errors and !errors.empty? + if errors && !errors.empty? msg << ", here is why:\n" errors.each {|x| msg << " #{x.wordy}\n" } else - if required_by and gem != required_by + if required_by && gem != required_by msg << " (required by #{required_by}) in any repository" else msg << " in any repository" @@ -186,7 +186,7 @@ def show_lookup_failure(gem_name, version, errors, suppress_suggestions = false, def get_all_gem_names args = options[:args] - if args.nil? or args.empty? + if args.nil? || args.empty? raise Gem::CommandLineError, "Please specify at least one gem name (e.g. gem build GEMNAME)" end @@ -216,7 +216,7 @@ def get_all_gem_names_and_versions def get_one_gem_name args = options[:args] - if args.nil? or args.empty? + if args.nil? || args.empty? raise Gem::CommandLineError, "Please specify a gem name on the command line (e.g. gem build GEMNAME)" end @@ -554,7 +554,7 @@ def create_option_parser end def configure_options(header, option_list) - return if option_list.nil? or option_list.empty? + return if option_list.nil? || option_list.empty? header = header.to_s.empty? ? "" : "#{header} " @parser.separator " #{header}Options:" @@ -586,7 +586,7 @@ def wrap(text, width) # :doc: add_common_option("-V", "--[no-]verbose", "Set the verbose level of output") do |value, options| # Set us to "really verbose" so the progress meter works - if Gem.configuration.verbose and value + if Gem.configuration.verbose && value Gem.configuration.verbose = 1 else Gem.configuration.verbose = value diff --git a/lib/rubygems/commands/cert_command.rb b/lib/rubygems/commands/cert_command.rb index 56bf5ce6891b85..17b1d11b1915d1 100644 --- a/lib/rubygems/commands/cert_command.rb +++ b/lib/rubygems/commands/cert_command.rb @@ -152,7 +152,7 @@ def build(email) def build_cert(email, key) # :nodoc: expiration_length_days = options[:expiration_length_days] || - Gem.configuration.cert_expiration_length_days + Gem.configuration.cert_expiration_length_days cert = Gem::Security.create_cert_email( email, diff --git a/lib/rubygems/commands/cleanup_command.rb b/lib/rubygems/commands/cleanup_command.rb index 7f143999eb5006..1ae84924c1410d 100644 --- a/lib/rubygems/commands/cleanup_command.rb +++ b/lib/rubygems/commands/cleanup_command.rb @@ -149,7 +149,7 @@ def get_primary_gems @primary_gems = {} Gem::Specification.each do |spec| - if @primary_gems[spec.name].nil? or + if @primary_gems[spec.name].nil? || @primary_gems[spec.name].version < spec.version @primary_gems[spec.name] = spec end diff --git a/lib/rubygems/commands/contents_command.rb b/lib/rubygems/commands/contents_command.rb index 3dd0b16265fc69..c5fdfca31e5e6e 100644 --- a/lib/rubygems/commands/contents_command.rb +++ b/lib/rubygems/commands/contents_command.rb @@ -77,7 +77,7 @@ def execute gem_contents name end - terminate_interaction 1 unless found or names.length > 1 + terminate_interaction 1 unless found || names.length > 1 end end diff --git a/lib/rubygems/commands/dependency_command.rb b/lib/rubygems/commands/dependency_command.rb index c9ddc9af0a8cab..3f69a95e836895 100644 --- a/lib/rubygems/commands/dependency_command.rb +++ b/lib/rubygems/commands/dependency_command.rb @@ -77,7 +77,7 @@ def fetch_specs(name_pattern, requirement, prerelease) # :nodoc: name_matches = name_pattern ? name_pattern =~ spec.name : true version_matches = requirement.satisfied_by?(spec.version) - name_matches and version_matches + name_matches && version_matches }.map(&:to_spec) end @@ -133,7 +133,7 @@ def execute end def ensure_local_only_reverse_dependencies # :nodoc: - if options[:reverse_dependencies] and remote? and not local? + if options[:reverse_dependencies] && remote? && !local? alert_error "Only reverse dependencies for local gems are supported." terminate_interaction 1 end @@ -182,7 +182,7 @@ def find_reverse_dependencies(spec) # :nodoc: sp.dependencies.each do |dep| dep = Gem::Dependency.new(*dep) unless Gem::Dependency === dep - if spec.name == dep.name and + if spec.name == dep.name && dep.requirement.satisfied_by?(spec.version) result << [sp.full_name, dep] end @@ -197,7 +197,7 @@ def find_reverse_dependencies(spec) # :nodoc: def name_pattern(args) return if args.empty? - if args.length == 1 and args.first =~ /\A(.*)(i)?\z/m + if args.length == 1 && args.first =~ /\A(.*)(i)?\z/m flags = $2 ? Regexp::IGNORECASE : nil Regexp.new $1, flags else diff --git a/lib/rubygems/commands/fetch_command.rb b/lib/rubygems/commands/fetch_command.rb index 582563ba8105b2..3a8c435d0e4711 100644 --- a/lib/rubygems/commands/fetch_command.rb +++ b/lib/rubygems/commands/fetch_command.rb @@ -52,8 +52,8 @@ def usage # :nodoc: end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem fetch 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 diff --git a/lib/rubygems/commands/generate_index_command.rb b/lib/rubygems/commands/generate_index_command.rb index 8bb24c9ce3e29d..bc71e60ff0a295 100644 --- a/lib/rubygems/commands/generate_index_command.rb +++ b/lib/rubygems/commands/generate_index_command.rb @@ -68,8 +68,8 @@ def execute # This is always true because it's the only way now. options[:build_modern] = true - if not File.exist?(options[:directory]) or - not File.directory?(options[:directory]) + if !File.exist?(options[:directory]) || + !File.directory?(options[:directory]) alert_error "unknown directory name #{options[:directory]}." terminate_interaction 1 else diff --git a/lib/rubygems/commands/install_command.rb b/lib/rubygems/commands/install_command.rb index 690f90c2e42577..724b4fe51dac1c 100644 --- a/lib/rubygems/commands/install_command.rb +++ b/lib/rubygems/commands/install_command.rb @@ -46,8 +46,8 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--both --version '#{Gem::Requirement.default}' --no-force\n" + - "--install-dir #{Gem.dir} --lock\n" + - install_update_defaults_str + "--install-dir #{Gem.dir} --lock\n" + + install_update_defaults_str end def description # :nodoc: @@ -134,15 +134,15 @@ def usage # :nodoc: end def check_install_dir # :nodoc: - if options[:install_dir] and options[:user_install] + if options[:install_dir] && options[:user_install] alert_error "Use --install-dir or --user-install but not both" terminate_interaction 1 end end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem install 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 @@ -191,8 +191,8 @@ def install_from_gemdeps # :nodoc: end def install_gem(name, version) # :nodoc: - return if options[:conservative] and - not Gem::Dependency.new(name, version).matching_specs.empty? + return if options[:conservative] && + !Gem::Dependency.new(name, version).matching_specs.empty? req = Gem::Requirement.create(version) diff --git a/lib/rubygems/commands/pristine_command.rb b/lib/rubygems/commands/pristine_command.rb index d4dadf0736e261..72db53ef378cc3 100644 --- a/lib/rubygems/commands/pristine_command.rb +++ b/lib/rubygems/commands/pristine_command.rb @@ -102,10 +102,10 @@ def execute # `--extensions` must be explicitly given to pristine only gems # with extensions. - elsif options[:extensions_set] and - options[:extensions] and options[:args].empty? + elsif options[:extensions_set] && + options[:extensions] && options[:args].empty? Gem::Specification.select do |spec| - spec.extensions and not spec.extensions.empty? + spec.extensions && !spec.extensions.empty? end else get_all_gem_names.sort.map do |gem_name| @@ -135,14 +135,14 @@ def execute end end - unless spec.extensions.empty? or options[:extensions] or options[:only_executables] or options[:only_plugins] + unless spec.extensions.empty? || options[:extensions] || options[:only_executables] || options[:only_plugins] say "Skipped #{spec.full_name}, it needs to compile an extension" next end gem = spec.cache_file - unless File.exist? gem or options[:only_executables] or options[:only_plugins] + unless File.exist?(gem) || options[:only_executables] || options[:only_plugins] require_relative "../remote_fetcher" say "Cached gem for #{spec.full_name} not found, attempting to fetch..." diff --git a/lib/rubygems/commands/setup_command.rb b/lib/rubygems/commands/setup_command.rb index 1ed889a7134be0..c782c3618cf480 100644 --- a/lib/rubygems/commands/setup_command.rb +++ b/lib/rubygems/commands/setup_command.rb @@ -329,9 +329,9 @@ def install_rdoc # ignore end - if File.writable? gem_doc_dir and - (not File.exist? rubygems_doc_dir or - File.writable? rubygems_doc_dir) + if File.writable?(gem_doc_dir) && + (!File.exist?(rubygems_doc_dir) || + File.writable?(rubygems_doc_dir)) say "Removing old RubyGems RDoc and ri" if @verbose Dir[File.join(Gem.dir, "doc", "rubygems-[0-9]*")].each do |dir| rm_rf dir @@ -559,7 +559,7 @@ def show_release_notes history_string = "" - until versions.length == 0 or + until versions.length == 0 || versions.shift <= options[:previous_version] do history_string += version_lines.shift + text.shift end diff --git a/lib/rubygems/commands/sources_command.rb b/lib/rubygems/commands/sources_command.rb index a5f2d022c6b20a..5a8f5af9c3da25 100644 --- a/lib/rubygems/commands/sources_command.rb +++ b/lib/rubygems/commands/sources_command.rb @@ -82,7 +82,7 @@ def check_typo_squatting(source) def check_rubygems_https(source_uri) # :nodoc: uri = URI source_uri - if uri.scheme and uri.scheme.downcase == "http" and + if uri.scheme && uri.scheme.downcase == "http" && uri.host.downcase == "rubygems.org" question = <<-QUESTION.chomp https://rubygems.org is recommended for security over #{uri} @@ -215,9 +215,9 @@ def update # :nodoc: def remove_cache_file(desc, path) # :nodoc: FileUtils.rm_rf path - if not File.exist?(path) + if !File.exist?(path) say "*** Removed #{desc} source cache ***" - elsif not File.writable?(path) + elsif !File.writable?(path) say "*** Unable to remove #{desc} source cache (write protected) ***" else say "*** Unable to remove #{desc} source cache ***" diff --git a/lib/rubygems/commands/specification_command.rb b/lib/rubygems/commands/specification_command.rb index 6457a755ae4629..12004a6d5696c4 100644 --- a/lib/rubygems/commands/specification_command.rb +++ b/lib/rubygems/commands/specification_command.rb @@ -88,7 +88,7 @@ def execute raise Gem::CommandLineError, "Unsupported version type: '#{v}'" end - if !req.none? and options[:all] + if !req.none? && options[:all] alert_error "Specify --all or -v, not both" terminate_interaction 1 end @@ -102,7 +102,7 @@ def execute field = get_one_optional_argument raise Gem::CommandLineError, "--ruby and FIELD are mutually exclusive" if - field and options[:format] == :ruby + field && options[:format] == :ruby if local? if File.exist? gem diff --git a/lib/rubygems/commands/uninstall_command.rb b/lib/rubygems/commands/uninstall_command.rb index d03a96bf871891..3c520826e5bf97 100644 --- a/lib/rubygems/commands/uninstall_command.rb +++ b/lib/rubygems/commands/uninstall_command.rb @@ -96,7 +96,7 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--version '#{Gem::Requirement.default}' --no-force " + - "--user-install" + "--user-install" end def description # :nodoc: @@ -114,8 +114,8 @@ def usage # :nodoc: end def check_version # :nodoc: - if options[:version] != Gem::Requirement.default and - get_all_gem_names.size > 1 + if options[:version] != Gem::Requirement.default && + get_all_gem_names.size > 1 alert_error "Can't use --version with multiple gems. You can specify multiple gems with" \ " version requirements using `gem uninstall 'my_gem:1.0.0' 'my_other_gem:~>2.0.0'`" terminate_interaction 1 @@ -125,7 +125,7 @@ def check_version # :nodoc: def execute check_version - if options[:all] and not options[:args].empty? + if options[:all] && !options[:args].empty? uninstall_specific elsif options[:all] uninstall_all diff --git a/lib/rubygems/commands/update_command.rb b/lib/rubygems/commands/update_command.rb index a0796426699f0b..7c24fedcded3f3 100644 --- a/lib/rubygems/commands/update_command.rb +++ b/lib/rubygems/commands/update_command.rb @@ -56,7 +56,7 @@ def arguments # :nodoc: def defaults_str # :nodoc: "--no-force --install-dir #{Gem.dir}\n" + - install_update_defaults_str + install_update_defaults_str end def description # :nodoc: @@ -155,7 +155,7 @@ def highest_installed_gems # :nodoc: Gem::Specification.dirs = Gem.user_dir if options[:user_install] Gem::Specification.each do |spec| - if hig[spec.name].nil? or hig[spec.name].version < spec.version + if hig[spec.name].nil? || hig[spec.name].version < spec.version hig[spec.name] = spec end end @@ -292,8 +292,8 @@ def update_rubygems_arguments # :nodoc: args << "--no-document" unless options[:document].include?("rdoc") || options[:document].include?("ri") args << "--no-format-executable" if options[:no_format_executable] args << "--previous-version" << Gem::VERSION if - options[:system] == true or - Gem::Version.new(options[:system]) >= Gem::Version.new(2) + options[:system] == true || + Gem::Version.new(options[:system]) >= Gem::Version.new(2) args end @@ -301,7 +301,7 @@ def which_to_update(highest_installed_gems, gem_names) result = [] highest_installed_gems.each do |l_name, l_spec| - next if not gem_names.empty? and + next if !gem_names.empty? && gem_names.none? {|name| name == l_spec.name } highest_remote_tup = highest_remote_name_tuple l_spec diff --git a/lib/rubygems/commands/which_command.rb b/lib/rubygems/commands/which_command.rb index 78493e9380ab1b..5b9a79b734be9b 100644 --- a/lib/rubygems/commands/which_command.rb +++ b/lib/rubygems/commands/which_command.rb @@ -71,7 +71,7 @@ def find_paths(package_name, dirs) dirs.each do |dir| Gem.suffixes.each do |ext| full_path = File.join dir, "#{package_name}#{ext}" - if File.exist? full_path and not File.directory? full_path + if File.exist?(full_path) && !File.directory?(full_path) result << full_path return result unless options[:show_all] end diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index d711a51bd08ec4..c53e209ae8ace7 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -39,7 +39,7 @@ class Gem::ConfigFile include Gem::UserInteraction - DEFAULT_BACKTRACE = false + DEFAULT_BACKTRACE = true DEFAULT_BULK_THRESHOLD = 1000 DEFAULT_VERBOSITY = true DEFAULT_UPDATE_SOURCES = true @@ -368,7 +368,7 @@ def load_file(filename) # True if the backtrace option has been specified, or debug is on. def backtrace - @backtrace or $DEBUG + @backtrace || $DEBUG end # The name of the configuration file. @@ -477,11 +477,11 @@ def []=(key, value) end def ==(other) # :nodoc: - self.class === other and - @backtrace == other.backtrace and - @bulk_threshold == other.bulk_threshold and - @verbose == other.verbose and - @update_sources == other.update_sources and + self.class === other && + @backtrace == other.backtrace && + @bulk_threshold == other.bulk_threshold && + @verbose == other.verbose && + @update_sources == other.update_sources && @hash == other.hash end diff --git a/lib/rubygems/core_ext/kernel_warn.rb b/lib/rubygems/core_ext/kernel_warn.rb index 7df6c48b8f34bf..8f43e00456b81d 100644 --- a/lib/rubygems/core_ext/kernel_warn.rb +++ b/lib/rubygems/core_ext/kernel_warn.rb @@ -39,7 +39,7 @@ class << self start += 1 if path = loc.path - unless path.start_with?(rubygems_path) or path.start_with?("(other) def ==(other) case other when self.class - @name == other.name and - @version == other.version and + @name == other.name && + @version == other.version && @platform == other.platform when Array to_a == other diff --git a/lib/rubygems/package.rb b/lib/rubygems/package.rb index 77f9f282d8fb79..084dc5d2d9523a 100644 --- a/lib/rubygems/package.rb +++ b/lib/rubygems/package.rb @@ -68,14 +68,14 @@ def initialize(message, source = nil) class PathError < Error def initialize(destination, destination_dir) super "installing into parent path %s of %s is not allowed" % - [destination, destination_dir] + [destination, destination_dir] end end class SymlinkError < Error def initialize(name, destination, destination_dir) super "installing symlink '%s' pointing to parent path %s of %s is not allowed" % - [name, destination, destination_dir] + [name, destination, destination_dir] end end @@ -687,7 +687,7 @@ def verify_files(gem) "package content (data.tar.gz) is missing", @gem end - if duplicates = @files.group_by {|f| f }.select {|k,v| v.size > 1 }.map(&:first) and duplicates.any? + if (duplicates = @files.group_by {|f| f }.select {|k,v| v.size > 1 }.map(&:first)) && duplicates.any? raise Gem::Security::Exception, "duplicate files in the package: (#{duplicates.map(&:inspect).join(', ')})" end end diff --git a/lib/rubygems/package/tar_header.rb b/lib/rubygems/package/tar_header.rb index fb70765dde2ff4..ee515a9e050f5a 100644 --- a/lib/rubygems/package/tar_header.rb +++ b/lib/rubygems/package/tar_header.rb @@ -173,23 +173,23 @@ def empty? end def ==(other) # :nodoc: - self.class === other and - @checksum == other.checksum and - @devmajor == other.devmajor and - @devminor == other.devminor and - @gid == other.gid and - @gname == other.gname and - @linkname == other.linkname and - @magic == other.magic and - @mode == other.mode and - @mtime == other.mtime and - @name == other.name and - @prefix == other.prefix and - @size == other.size and - @typeflag == other.typeflag and - @uid == other.uid and - @uname == other.uname and - @version == other.version + self.class === other && + @checksum == other.checksum && + @devmajor == other.devmajor && + @devminor == other.devminor && + @gid == other.gid && + @gname == other.gname && + @linkname == other.linkname && + @magic == other.magic && + @mode == other.mode && + @mtime == other.mtime && + @name == other.name && + @prefix == other.prefix && + @size == other.size && + @typeflag == other.typeflag && + @uid == other.uid && + @uname == other.uname && + @version == other.version end def to_s # :nodoc: diff --git a/lib/rubygems/package/tar_writer.rb b/lib/rubygems/package/tar_writer.rb index 6f068f50c22b34..db5242c5e4a46e 100644 --- a/lib/rubygems/package/tar_writer.rb +++ b/lib/rubygems/package/tar_writer.rb @@ -314,7 +314,7 @@ def split_name(name) # :nodoc: prefix = parts.join("/") end - if name.bytesize > 100 or prefix.empty? + if name.bytesize > 100 || prefix.empty? raise Gem::Package::TooLongFileName.new("File \"#{prefix}/#{name}\" has a too long name (should be 100 or less)") end diff --git a/lib/rubygems/platform.rb b/lib/rubygems/platform.rb index 607e3906d6ed0e..06de5ded8da6c9 100644 --- a/lib/rubygems/platform.rb +++ b/lib/rubygems/platform.rb @@ -23,9 +23,9 @@ def self.match(platform) def self.match_platforms?(platform, platforms) platforms.any? do |local_platform| - platform.nil? or - local_platform == platform or - (local_platform != Gem::Platform::RUBY and local_platform =~ platform) + platform.nil? || + local_platform == platform || + (local_platform != Gem::Platform::RUBY && platform =~ local_platform) end end private_class_method :match_platforms? @@ -70,7 +70,7 @@ def initialize(arch) when String then arch = arch.split "-" - if arch.length > 2 and arch.last !~ /\d/ # reassemble x86-linux-gnu + if arch.length > 2 && arch.last !~ /\d+(\.\d+)?$/ # reassemble x86-linux-{libc} extra = arch.pop arch.last << "-#{extra}" end @@ -82,7 +82,7 @@ def initialize(arch) else cpu end - if arch.length == 2 and arch.last =~ /^\d+(\.\d+)?$/ # for command-line + if arch.length == 2 && arch.last =~ /^\d+(\.\d+)?$/ # for command-line @os, @version = arch return end @@ -102,12 +102,12 @@ def initialize(arch) when /^dalvik(\d+)?$/ then [ "dalvik", $1 ] when /^dotnet$/ then [ "dotnet", nil ] when /^dotnet([\d.]*)/ then [ "dotnet", $1 ] - when /linux-?((?!gnu)\w+)?/ then [ "linux", $1 ] + when /linux-?(\w+)?/ then [ "linux", $1 ] when /mingw32/ then [ "mingw32", nil ] when /mingw-?(\w+)?/ then [ "mingw", $1 ] when /(mswin\d+)(\_(\d+))?/ then os, version = $1, $3 - @cpu = "x86" if @cpu.nil? and os =~ /32$/ + @cpu = "x86" if @cpu.nil? && os =~ /32$/ [os, version] when /netbsdelf/ then [ "netbsdelf", nil ] when /openbsd(\d+\.\d+)?/ then [ "openbsd", $1 ] @@ -139,7 +139,7 @@ def to_s # the same CPU, OS and version. def ==(other) - self.class === other and to_a == other.to_a + self.class === other && to_a == other.to_a end alias :eql? :== @@ -151,27 +151,38 @@ def hash # :nodoc: ## # Does +other+ match this platform? Two platforms match if they have the # same CPU, or either has a CPU of 'universal', they have the same OS, and - # they have the same version, or either has no version. + # they have the same version, or either one has no version # # Additionally, the platform will match if the local CPU is 'arm' and the # other CPU starts with "arm" (for generic ARM family support). + # + # Of note, this method is not commutative. Indeed the OS 'linux' has a + # special case: the version is the libc name, yet while "no version" stands + # as a wildcard for a binary gem platform (as for other OSes), for the + # runtime platform "no version" stands for 'gnu'. To be able to disinguish + # these, the method receiver is the gem platform, while the argument is + # the runtime platform. def ===(other) return nil unless Gem::Platform === other # universal-mingw32 matches x64-mingw-ucrt - return true if (@cpu == "universal" or other.cpu == "universal") and - @os.start_with?("mingw") and other.os.start_with?("mingw") + return true if (@cpu == "universal" || other.cpu == "universal") && + @os.start_with?("mingw") && other.os.start_with?("mingw") # cpu - ([nil,"universal"].include?(@cpu) or [nil, "universal"].include?(other.cpu) or @cpu == other.cpu or - (@cpu == "arm" and other.cpu.start_with?("arm"))) and - - # os - @os == other.os and - - # version - (@version.nil? or other.version.nil? or @version == other.version) + ([nil,"universal"].include?(@cpu) || [nil, "universal"].include?(other.cpu) || @cpu == other.cpu || + (@cpu == "arm" && other.cpu.start_with?("arm"))) && + + # os + @os == other.os && + + # version + ( + (@os != "linux" && (@version.nil? || other.version.nil?)) || + (@os == "linux" && (other.version == "gnu#{@version}" || other.version == "musl#{@version}" || @version == "gnu#{other.version}")) || + @version == other.version + ) end ## diff --git a/lib/rubygems/query_utils.rb b/lib/rubygems/query_utils.rb index 4601d9374ca93d..a502717f94c96a 100644 --- a/lib/rubygems/query_utils.rb +++ b/lib/rubygems/query_utils.rb @@ -112,7 +112,7 @@ def args end def display_header(type) - if (ui.outs.tty? and Gem.configuration.verbose) or both? + if (ui.outs.tty? && Gem.configuration.verbose) || both? say say "*** #{type} GEMS ***" say @@ -132,7 +132,7 @@ def show_local_gems(name, req = Gem::Requirement.default) name_matches = name ? s.name =~ name : true version_matches = show_prereleases? || !s.version.prerelease? - name_matches and version_matches + name_matches && version_matches end spec_tuples = specs.map do |spec| @@ -176,7 +176,7 @@ def specs_type # Check if gem +name+ version +version+ is installed. def installed?(name, req = Gem::Requirement.default) - Gem::Specification.any? {|s| s.name =~ name and req =~ s.version } + Gem::Specification.any? {|s| s.name =~ name && req =~ s.version } end def output_query_results(spec_tuples) @@ -242,7 +242,7 @@ def entry_versions(entry, name_tuples, platforms, specs) return unless options[:versions] list = - if platforms.empty? or options[:details] + if platforms.empty? || options[:details] name_tuples.map {|n| n.version }.uniq else platforms.sort.reverse.map do |version, pls| @@ -289,13 +289,13 @@ def spec_authors(entry, spec) end def spec_homepage(entry, spec) - return if spec.homepage.nil? or spec.homepage.empty? + return if spec.homepage.nil? || spec.homepage.empty? entry << "\n" << format_text("Homepage: #{spec.homepage}", 68, 4) end def spec_license(entry, spec) - return if spec.license.nil? or spec.license.empty? + return if spec.license.nil? || spec.license.empty? licenses = "License#{spec.licenses.length > 1 ? 's' : ''}: ".dup licenses << spec.licenses.join(", ") diff --git a/lib/rubygems/remote_fetcher.rb b/lib/rubygems/remote_fetcher.rb index d0287398614c7b..0ac6eaa130e9be 100644 --- a/lib/rubygems/remote_fetcher.rb +++ b/lib/rubygems/remote_fetcher.rb @@ -114,7 +114,7 @@ def download(spec, source_uri, install_dir = Gem.dir) cache_dir = if Dir.pwd == install_dir # see fetch_command install_dir - elsif File.writable?(install_cache_dir) || (File.writable?(install_dir) && (not File.exist?(install_cache_dir))) + elsif File.writable?(install_cache_dir) || (File.writable?(install_dir) && (!File.exist?(install_cache_dir))) install_cache_dir else File.join Gem.user_dir, "cache" @@ -247,7 +247,7 @@ def fetch_path(uri, mtime = nil, head = false) data = send "fetch_#{uri.scheme}", uri, mtime, head - if data and !head and uri.to_s.end_with?(".gz") + if data && !head && uri.to_s.end_with?(".gz") begin data = Gem::Util.gunzip data rescue Zlib::GzipFile::Error @@ -288,7 +288,7 @@ def cache_update_path(uri, path = nil, update = true) return Gem.read_binary(path) end - if update and path + if update && path Gem.write_binary(path, data) end diff --git a/lib/rubygems/request.rb b/lib/rubygems/request.rb index d15ba91209fb7c..c3ea46e0ebf344 100644 --- a/lib/rubygems/request.rb +++ b/lib/rubygems/request.rb @@ -173,7 +173,7 @@ def self.get_proxy_from_env(scheme = "http") require "uri" uri = URI(Gem::UriFormatter.new(env_proxy).normalize) - if uri and uri.user.nil? and uri.password.nil? + if uri && uri.user.nil? && uri.password.nil? user = ENV["#{_scheme}_proxy_user"] || ENV["#{_SCHEME}_PROXY_USER"] password = ENV["#{_scheme}_proxy_pass"] || ENV["#{_SCHEME}_PROXY_PASS"] diff --git a/lib/rubygems/request/connection_pools.rb b/lib/rubygems/request/connection_pools.rb index a283267674ac93..44280489fbeb68 100644 --- a/lib/rubygems/request/connection_pools.rb +++ b/lib/rubygems/request/connection_pools.rb @@ -39,7 +39,7 @@ def close_all def get_no_proxy_from_env env_no_proxy = ENV["no_proxy"] || ENV["NO_PROXY"] - return [] if env_no_proxy.nil? or env_no_proxy.empty? + return [] if env_no_proxy.nil? || env_no_proxy.empty? env_no_proxy.split(/\s*,\s*/) end @@ -78,7 +78,7 @@ def net_http_args(uri, proxy_uri) no_proxy = get_no_proxy_from_env - if proxy_uri and not no_proxy?(hostname, no_proxy) + if proxy_uri && !no_proxy?(hostname, no_proxy) proxy_hostname = proxy_uri.respond_to?(:hostname) ? proxy_uri.hostname : proxy_uri.host net_http_args + [ proxy_hostname, diff --git a/lib/rubygems/request/http_pool.rb b/lib/rubygems/request/http_pool.rb index f028516db8c407..7b309eedd36f1a 100644 --- a/lib/rubygems/request/http_pool.rb +++ b/lib/rubygems/request/http_pool.rb @@ -26,7 +26,7 @@ def checkin(connection) def close_all until @queue.empty? - if connection = @queue.pop(true) and connection.started? + if (connection = @queue.pop(true)) && connection.started? connection.finish end end diff --git a/lib/rubygems/request_set.rb b/lib/rubygems/request_set.rb index df215e4af38ad9..64701a821407c7 100644 --- a/lib/rubygems/request_set.rb +++ b/lib/rubygems/request_set.rb @@ -443,14 +443,14 @@ def tsort_each_node(&block) # :nodoc: def tsort_each_child(node) # :nodoc: node.spec.dependencies.each do |dep| - next if dep.type == :development and not @development + next if dep.type == :development && !@development match = @requests.find do |r| - dep.match? r.spec.name, r.spec.version, @prerelease + dep.match? r.spec.name, r.spec.version, r.spec.is_a?(Gem::Resolver::InstalledSpecification) || @prerelease end unless match - next if dep.type == :development and @development_shallow + next if dep.type == :development && @development_shallow next if @soft_missing raise Gem::DependencyError, "Unresolved dependency found during sorting - #{dep} (requested by #{node.spec.full_name})" diff --git a/lib/rubygems/request_set/gem_dependency_api.rb b/lib/rubygems/request_set/gem_dependency_api.rb index 568d9f952ff8aa..fe75ac5208de5c 100644 --- a/lib/rubygems/request_set/gem_dependency_api.rb +++ b/lib/rubygems/request_set/gem_dependency_api.rb @@ -371,7 +371,7 @@ def gem(name, *requirements) duplicate = @dependencies.include? name @dependencies[name] = - if requirements.empty? and not source_set + if requirements.empty? && !source_set Gem::Requirement.default elsif source_set Gem::Requirement.source_set @@ -789,7 +789,7 @@ def ruby(version, options = {}) raise ArgumentError, "You must specify engine_version along with the Ruby engine" if - engine and not engine_version + engine && !engine_version return true if @installing @@ -800,7 +800,7 @@ def ruby(version, options = {}) raise Gem::RubyVersionMismatch, message end - if engine and engine != Gem.ruby_engine + if engine && engine != Gem.ruby_engine message = "Your Ruby engine is #{Gem.ruby_engine}, " + "but your #{gem_deps_file} requires #{engine}" diff --git a/lib/rubygems/request_set/lockfile/parser.rb b/lib/rubygems/request_set/lockfile/parser.rb index 376d37f9e2d8ae..8446f9df8eb5f1 100644 --- a/lib/rubygems/request_set/lockfile/parser.rb +++ b/lib/rubygems/request_set/lockfile/parser.rb @@ -30,7 +30,7 @@ def parse when "PLATFORMS" then parse_PLATFORMS else - token = get until @tokens.empty? or peek.first == :section + token = get until @tokens.empty? || peek.first == :section end else raise "BUG: unhandled token #{token.type} (#{token.value.inspect}) at line #{token.line} column #{token.column}" @@ -44,7 +44,7 @@ def parse def get(expected_types = nil, expected_value = nil) # :nodoc: token = @tokens.shift - if expected_types and not Array(expected_types).include? token.type + if expected_types && !Array(expected_types).include?(token.type) unget token message = "unexpected token [#{token.type.inspect}, #{token.value.inspect}], " + @@ -53,7 +53,7 @@ def get(expected_types = nil, expected_value = nil) # :nodoc: raise Gem::RequestSet::Lockfile::ParseError.new message, token.column, token.line, @filename end - if expected_value and expected_value != token.value + if expected_value && expected_value != token.value unget token message = "unexpected token [#{token.type.inspect}, #{token.value.inspect}], " + @@ -67,7 +67,7 @@ def get(expected_types = nil, expected_value = nil) # :nodoc: end def parse_DEPENDENCIES # :nodoc: - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text requirements = [] @@ -127,7 +127,7 @@ def parse_GEM # :nodoc: set = Gem::Resolver::LockSet.new sources last_specs = nil - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text name = token.value column = token.column @@ -144,7 +144,7 @@ def parse_GEM # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 version, platform = data.split "-", 2 platform = @@ -183,7 +183,7 @@ def parse_GIT # :nodoc: type = peek.type value = peek.value - if type == :entry and %w[branch ref tag].include? value + if type == :entry && %w[branch ref tag].include?(value) get get :text @@ -199,7 +199,7 @@ def parse_GIT # :nodoc: last_spec = nil - while not @tokens.empty? and :text == peek.type do + while !@tokens.empty? && :text == peek.type do token = get :text name = token.value column = token.column @@ -214,7 +214,7 @@ def parse_GIT # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 last_spec = set.add_git_spec name, data, repository, revision, true else dependency = parse_dependency name, data @@ -246,7 +246,7 @@ def parse_PATH # :nodoc: set = Gem::Resolver::VendorSet.new last_spec = nil - while not @tokens.empty? and :text == peek.first do + while !@tokens.empty? && :text == peek.first do token = get :text name = token.value column = token.column @@ -261,7 +261,7 @@ def parse_PATH # :nodoc: type = token.type data = token.value - if type == :text and column == 4 + if type == :text && column == 4 last_spec = set.add_vendor_gem name, directory else dependency = parse_dependency name, data @@ -281,7 +281,7 @@ def parse_PATH # :nodoc: end def parse_PLATFORMS # :nodoc: - while not @tokens.empty? and :text == peek.first do + while !@tokens.empty? && :text == peek.first do name = get(:text).value @platforms << name diff --git a/lib/rubygems/request_set/lockfile/tokenizer.rb b/lib/rubygems/request_set/lockfile/tokenizer.rb index 79c573a02d9fb3..4476a041c426dd 100644 --- a/lib/rubygems/request_set/lockfile/tokenizer.rb +++ b/lib/rubygems/request_set/lockfile/tokenizer.rb @@ -1,4 +1,4 @@ -# frozen_string_literal: true +#) frozen_string_literal: true require_relative "parser" class Gem::RequestSet::Lockfile::Tokenizer @@ -26,7 +26,7 @@ def to_a end def skip(type) - @tokens.shift while not @tokens.empty? and peek.type == type + @tokens.shift while !@tokens.empty? && peek.type == type end ## diff --git a/lib/rubygems/requirement.rb b/lib/rubygems/requirement.rb index 12bf371f4ed17e..4f19b8c5b0362d 100644 --- a/lib/rubygems/requirement.rb +++ b/lib/rubygems/requirement.rb @@ -253,7 +253,7 @@ def satisfied_by?(version) def specific? return true if @requirements.length > 1 # GIGO, > 1, > 2 is silly - not %w[> >=].include? @requirements.first.first # grab the operator + !%w[> >=].include? @requirements.first.first # grab the operator end def to_s # :nodoc: diff --git a/lib/rubygems/resolver.rb b/lib/rubygems/resolver.rb index 097e8243eedb3a..bf7d6d943b8fb7 100644 --- a/lib/rubygems/resolver.rb +++ b/lib/rubygems/resolver.rb @@ -153,10 +153,10 @@ def requests(s, act, reqs=[]) # :nodoc: s.fetch_development_dependencies if @development s.dependencies.reverse_each do |d| - next if d.type == :development and not @development - next if d.type == :development and @development_shallow and + next if d.type == :development && !@development + next if d.type == :development && @development_shallow && act.development? - next if d.type == :development and @development_shallow and + next if d.type == :development && @development_shallow && act.parent reqs << Gem::Resolver::DependencyRequest.new(d, act) @@ -192,7 +192,7 @@ def resolve conflict = e.conflicts.values.first raise Gem::DependencyResolutionError, Conflict.new(conflict.requirement_trees.first.first, conflict.existing, conflict.requirement) ensure - @output.close if defined?(@output) and !debug? + @output.close if defined?(@output) && !debug? end ## diff --git a/lib/rubygems/resolver/api_specification.rb b/lib/rubygems/resolver/api_specification.rb index 7af4d9cff369ec..1e65d5e5a9dc2e 100644 --- a/lib/rubygems/resolver/api_specification.rb +++ b/lib/rubygems/resolver/api_specification.rb @@ -40,10 +40,10 @@ def initialize(set, api_data) end def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @name == other.name and - @version == other.version and + self.class === other && + @set == other.set && + @name == other.name && + @version == other.version && @platform == other.platform end diff --git a/lib/rubygems/resolver/best_set.rb b/lib/rubygems/resolver/best_set.rb index ab91ebca087e41..075ee1ef5c94ba 100644 --- a/lib/rubygems/resolver/best_set.rb +++ b/lib/rubygems/resolver/best_set.rb @@ -25,7 +25,7 @@ def pick_sets # :nodoc: end def find_all(req) # :nodoc: - pick_sets if @remote and @sets.empty? + pick_sets if @remote && @sets.empty? super rescue Gem::RemoteFetcher::FetchError => e @@ -35,7 +35,7 @@ def find_all(req) # :nodoc: end def prefetch(reqs) # :nodoc: - pick_sets if @remote and @sets.empty? + pick_sets if @remote && @sets.empty? super end @@ -63,7 +63,7 @@ def replace_failed_api_set(error) # :nodoc: uri = uri + "." raise error unless api_set = @sets.find do |set| - Gem::Resolver::APISet === set and set.dep_uri == uri + Gem::Resolver::APISet === set && set.dep_uri == uri end index_set = Gem::Resolver::IndexSet.new api_set.source diff --git a/lib/rubygems/resolver/conflict.rb b/lib/rubygems/resolver/conflict.rb index 54a7ca4641a21b..aba6d73ea730b2 100644 --- a/lib/rubygems/resolver/conflict.rb +++ b/lib/rubygems/resolver/conflict.rb @@ -27,9 +27,9 @@ def initialize(dependency, activated, failed_dep=dependency) end def ==(other) # :nodoc: - self.class === other and - @dependency == other.dependency and - @activated == other.activated and + self.class === other && + @dependency == other.dependency && + @activated == other.activated && @failed_dep == other.failed_dep end diff --git a/lib/rubygems/resolver/git_specification.rb b/lib/rubygems/resolver/git_specification.rb index d1e04737da02d3..6a178ea82e2db1 100644 --- a/lib/rubygems/resolver/git_specification.rb +++ b/lib/rubygems/resolver/git_specification.rb @@ -6,9 +6,9 @@ class Gem::Resolver::GitSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @spec == other.spec and + self.class === other && + @set == other.set && + @spec == other.spec && @source == other.source end diff --git a/lib/rubygems/resolver/installed_specification.rb b/lib/rubygems/resolver/installed_specification.rb index 7c7ad8d85b9bc8..8932e068be8640 100644 --- a/lib/rubygems/resolver/installed_specification.rb +++ b/lib/rubygems/resolver/installed_specification.rb @@ -5,8 +5,8 @@ class Gem::Resolver::InstalledSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and + self.class === other && + @set == other.set && @spec == other.spec end diff --git a/lib/rubygems/resolver/installer_set.rb b/lib/rubygems/resolver/installer_set.rb index 15580d7095e27f..f663ce4ad58157 100644 --- a/lib/rubygems/resolver/installer_set.rb +++ b/lib/rubygems/resolver/installer_set.rb @@ -61,13 +61,12 @@ def add_always_install(dependency) found = find_all request found.delete_if do |s| - s.version.prerelease? and not s.local? + s.version.prerelease? && !s.local? end unless dependency.prerelease? found = found.select do |s| - Gem::Source::SpecificFile === s.source or - Gem::Platform::RUBY == s.platform or - Gem::Platform.local === s.platform + Gem::Source::SpecificFile === s.source || + Gem::Platform.match(s.platform) end found = found.sort_by do |s| @@ -111,14 +110,14 @@ def add_local(dep_name, spec, source) # Should local gems should be considered? def consider_local? # :nodoc: - @domain == :both or @domain == :local + @domain == :both || @domain == :local end ## # Should remote gems should be considered? def consider_remote? # :nodoc: - @domain == :both or @domain == :remote + @domain == :both || @domain == :remote end ## @@ -137,8 +136,8 @@ def find_all(req) dep = req.dependency - return res if @ignore_dependencies and - @always_install.none? {|spec| dep.match? spec } + return res if @ignore_dependencies && + @always_install.none? {|spec| dep.match? spec } name = dep.name @@ -168,10 +167,6 @@ def find_all(req) end end - res.delete_if do |spec| - spec.version.prerelease? and not dep.prerelease? - end - res.concat @remote_set.find_all req if consider_remote? res diff --git a/lib/rubygems/resolver/lock_set.rb b/lib/rubygems/resolver/lock_set.rb index ff6c6c912f99a6..b1a5433cb54307 100644 --- a/lib/rubygems/resolver/lock_set.rb +++ b/lib/rubygems/resolver/lock_set.rb @@ -54,7 +54,7 @@ def load_spec(name, version, platform, source) # :nodoc: dep = Gem::Dependency.new name, version found = @specs.find do |spec| - dep.matches_spec? spec and spec.platform == platform + dep.matches_spec?(spec) && spec.platform == platform end tuple = Gem::NameTuple.new found.name, found.version, found.platform diff --git a/lib/rubygems/resolver/lock_specification.rb b/lib/rubygems/resolver/lock_specification.rb index 4a30dcf8490212..7de2a146589de2 100644 --- a/lib/rubygems/resolver/lock_specification.rb +++ b/lib/rubygems/resolver/lock_specification.rb @@ -71,7 +71,7 @@ def pretty_print(q) # :nodoc: def spec @spec ||= Gem::Specification.find do |spec| - spec.name == @name and spec.version == @version + spec.name == @name && spec.version == @version end @spec ||= Gem::Specification.new do |s| diff --git a/lib/rubygems/resolver/vendor_specification.rb b/lib/rubygems/resolver/vendor_specification.rb index 8dfe5940f2a954..600a98a2bfd17b 100644 --- a/lib/rubygems/resolver/vendor_specification.rb +++ b/lib/rubygems/resolver/vendor_specification.rb @@ -6,9 +6,9 @@ class Gem::Resolver::VendorSpecification < Gem::Resolver::SpecSpecification def ==(other) # :nodoc: - self.class === other and - @set == other.set and - @spec == other.spec and + self.class === other && + @set == other.set && + @spec == other.spec && @source == other.source end diff --git a/lib/rubygems/security.rb b/lib/rubygems/security.rb index 4eb402305546a4..dd16283a982296 100644 --- a/lib/rubygems/security.rb +++ b/lib/rubygems/security.rb @@ -618,7 +618,7 @@ def self.write(pemmable, path, permissions = 0600, passphrase = nil, cipher = KE path = File.expand_path path File.open path, "wb", permissions do |io| - if passphrase and cipher + if passphrase && cipher io.write pemmable.to_pem cipher, passphrase else io.write pemmable.to_pem diff --git a/lib/rubygems/security/policy.rb b/lib/rubygems/security/policy.rb index 43588fd7f10a78..959880ddc1e653 100644 --- a/lib/rubygems/security/policy.rb +++ b/lib/rubygems/security/policy.rb @@ -88,16 +88,16 @@ def check_cert(signer, issuer, time) message = "certificate #{signer.subject}" - if not_before = signer.not_before and not_before > time + if (not_before = signer.not_before) && not_before > time raise Gem::Security::Exception, "#{message} not valid before #{not_before}" end - if not_after = signer.not_after and not_after < time + if (not_after = signer.not_after) && not_after < time raise Gem::Security::Exception, "#{message} not valid after #{not_after}" end - if issuer and not signer.verify issuer.public_key + if issuer && !signer.verify(issuer.public_key) raise Gem::Security::Exception, "#{message} was not issued by #{issuer.subject}" end @@ -109,7 +109,7 @@ def check_cert(signer, issuer, time) # Ensures the public key of +key+ matches the public key in +signer+ def check_key(signer, key) - unless signer and key + unless signer && key return true unless @only_signed raise Gem::Security::Exception, "missing key or signature" @@ -231,7 +231,7 @@ def verify(chain, key = nil, digests = {}, signatures = {}, if @verify_data raise Gem::Security::Exception, "no digests provided (probable bug)" if - signer_digests.nil? or signer_digests.empty? + signer_digests.nil? || signer_digests.empty? else signer_digests = {} end @@ -248,7 +248,7 @@ def verify(chain, key = nil, digests = {}, signatures = {}, if @only_trusted check_trust chain, digester, trust_dir - elsif signatures.empty? and digests.empty? + elsif signatures.empty? && digests.empty? # trust is irrelevant if there's no signatures to verify else alert_warning "#{subject signer} is not trusted for #{full_name}" diff --git a/lib/rubygems/security/signer.rb b/lib/rubygems/security/signer.rb index b1308c4e4212a6..cca82f1cf85a88 100644 --- a/lib/rubygems/security/signer.rb +++ b/lib/rubygems/security/signer.rb @@ -141,7 +141,7 @@ def sign(data) raise Gem::Security::Exception, "no certs provided" if @cert_chain.empty? - if @cert_chain.length == 1 and @cert_chain.last.not_after < Time.now + if @cert_chain.length == 1 && @cert_chain.last.not_after < Time.now alert("Your certificate has expired, trying to re-sign it...") re_sign_key( diff --git a/lib/rubygems/source.rb b/lib/rubygems/source.rb index 7c3b6786458d6a..fc72a1038aa68a 100644 --- a/lib/rubygems/source.rb +++ b/lib/rubygems/source.rb @@ -62,7 +62,7 @@ def <=>(other) end def ==(other) # :nodoc: - self.class === other and @uri == other.uri + self.class === other && @uri == other.uri end alias_method :eql?, :== # :nodoc: diff --git a/lib/rubygems/source/git.rb b/lib/rubygems/source/git.rb index 22355adcfa90e8..2609a309e818cb 100644 --- a/lib/rubygems/source/git.rb +++ b/lib/rubygems/source/git.rb @@ -76,10 +76,10 @@ def <=>(other) end def ==(other) # :nodoc: - super and - @name == other.name and - @repository == other.repository and - @reference == other.reference and + super && + @name == other.name && + @repository == other.repository && + @reference == other.reference && @need_submodules == other.need_submodules end diff --git a/lib/rubygems/spec_fetcher.rb b/lib/rubygems/spec_fetcher.rb index 43e7e05b63d693..0d06d1f144d523 100644 --- a/lib/rubygems/spec_fetcher.rb +++ b/lib/rubygems/spec_fetcher.rb @@ -98,7 +98,7 @@ def search_for_dependency(dependency, matching_platform=true) found[source] = specs.select do |tup| if dependency.match?(tup) - if matching_platform and !Gem::Platform.match_gem?(tup.platform, tup.name) + if matching_platform && !Gem::Platform.match_gem?(tup.platform, tup.name) pm = ( rejected_specs[dependency] ||= \ Gem::PlatformMismatch.new(tup.name, tup.version)) diff --git a/lib/rubygems/specification.rb b/lib/rubygems/specification.rb index 28ad176b535578..af07cd36e25e81 100644 --- a/lib/rubygems/specification.rb +++ b/lib/rubygems/specification.rb @@ -473,7 +473,7 @@ def author=(o) # spec.platform = Gem::Platform.local def platform=(platform) - if @original_platform.nil? or + if @original_platform.nil? || @original_platform == Gem::Platform::RUBY @original_platform = platform end @@ -1046,7 +1046,7 @@ def self.find_inactive_by_path(path) def self.find_active_stub_by_path(path) stub = @@active_stub_with_requirable_file[path] ||= (stubs.find do |s| - s.activated? and s.contains_requirable_file? path + s.activated? && s.contains_requirable_file?(path) end || NOT_FOUND) stub.this end @@ -1234,7 +1234,7 @@ def self.outdated_and_latest_version latest_remote = remotes.sort.last yield [local_spec, latest_remote] if - latest_remote and local_spec.version < latest_remote + latest_remote && local_spec.version < latest_remote end nil @@ -1556,7 +1556,7 @@ def add_self_to_load_path # Singular reader for #authors. Returns the first author in the list def author - val = authors and val.first + (val = authors) && val.first end ## @@ -1668,7 +1668,7 @@ def conflicts conflicts = {} self.runtime_dependencies.each do |dep| spec = Gem.loaded_specs[dep.name] - if spec and not spec.satisfies_requirement? dep + if spec && !spec.satisfies_requirement?(dep) (conflicts[spec] ||= []) << dep end end @@ -1695,7 +1695,7 @@ def has_conflicts? self.dependencies.any? do |dep| if dep.runtime? spec = Gem.loaded_specs[dep.name] - spec and not spec.satisfies_requirement? dep + spec && !spec.satisfies_requirement?(dep) else false end @@ -1716,7 +1716,7 @@ def date DateLike = Object.new # :nodoc: def DateLike.===(obj) # :nodoc: - defined?(::Date) and Date === obj + defined?(::Date) && Date === obj end DateTimeFormat = # :nodoc: @@ -1756,9 +1756,9 @@ def date=(date) # executable now. See Gem.bin_path. def default_executable # :nodoc: - if defined?(@default_executable) and @default_executable + if defined?(@default_executable) && @default_executable result = @default_executable - elsif @executables and @executables.size == 1 + elsif @executables && @executables.size == 1 result = Array(@executables).first else result = nil @@ -1875,7 +1875,7 @@ def eql?(other) # :nodoc: # Singular accessor for #executables def executable - val = executables and val.first + (val = executables) && val.first end ## @@ -1987,7 +1987,7 @@ def has_rdoc=(ignored) # :nodoc: # True if this gem has files in test_files def has_unit_tests? # :nodoc: - not test_files.empty? + !test_files.empty? end # :stopdoc: @@ -2040,7 +2040,7 @@ def initialize(name = nil, version = nil) self.name = name if name self.version = version if version - if platform = Gem.platforms.last and platform != Gem::Platform::RUBY and platform != Gem::Platform.local + if (platform = Gem.platforms.last) && platform != Gem::Platform::RUBY && platform != Gem::Platform.local self.platform = platform end @@ -2155,8 +2155,8 @@ def method_missing(sym, *a, &b) # :nodoc: return end - if @specification_version > CURRENT_SPECIFICATION_VERSION and - sym.to_s.end_with?("=") + if @specification_version > CURRENT_SPECIFICATION_VERSION && + sym.to_s.end_with?("=") warn "ignoring #{sym} loading #{full_name}" if $DEBUG else super @@ -2182,7 +2182,7 @@ def missing_extensions? # file list. def normalize - if defined?(@extra_rdoc_files) and @extra_rdoc_files + if defined?(@extra_rdoc_files) && @extra_rdoc_files @extra_rdoc_files.uniq! @files ||= [] @files.concat(@extra_rdoc_files) @@ -2207,7 +2207,7 @@ def name_tuple # platform. For use with legacy gems. def original_name # :nodoc: - if platform == Gem::Platform::RUBY or platform.nil? + if platform == Gem::Platform::RUBY || platform.nil? "#{@name}-#{@version}" else "#{@name}-#{@version}-#{@original_platform}" @@ -2240,8 +2240,8 @@ def pretty_print(q) # :nodoc: attributes.each do |attr_name| current_value = self.send attr_name current_value = current_value.sort if %i[files test_files].include? attr_name - if current_value != default_value(attr_name) or - self.class.required_attribute? attr_name + if current_value != default_value(attr_name) || + self.class.required_attribute?(attr_name) q.text "s.#{attr_name} = " @@ -2299,7 +2299,7 @@ def rdoc_options=(options) # Singular accessor for #require_paths def require_path - val = require_paths and val.first + (val = require_paths) && val.first end ## @@ -2374,7 +2374,7 @@ def same_attributes?(spec) def satisfies_requirement?(dependency) return @name == dependency.name && - dependency.requirement.satisfied_by?(@version) + dependency.requirement.satisfied_by?(@version) end ## @@ -2428,7 +2428,7 @@ def summary=(str) # Singular accessor for #test_files def test_file # :nodoc: - val = test_files and val.first + (val = test_files) && val.first end ## @@ -2450,7 +2450,7 @@ def test_files # :nodoc: @test_files = [@test_suite_file].flatten @test_suite_file = nil end - if defined?(@test_files) and @test_files + if defined?(@test_files) && @test_files @test_files else @test_files = [] @@ -2474,13 +2474,13 @@ def to_ruby result << " s.name = #{ruby_code name}" result << " s.version = #{ruby_code version}" - unless platform.nil? or platform == Gem::Platform::RUBY + unless platform.nil? || platform == Gem::Platform::RUBY result << " s.platform = #{ruby_code original_platform}" end result << "" result << " s.required_rubygems_version = #{ruby_code required_rubygems_version} if s.respond_to? :required_rubygems_version=" - if metadata and !metadata.empty? + if metadata && !metadata.empty? result << " s.metadata = #{ruby_code metadata} if s.respond_to? :metadata=" end result << " s.require_paths = #{ruby_code raw_require_paths}" diff --git a/lib/rubygems/specification_policy.rb b/lib/rubygems/specification_policy.rb index 332189ae9f4c30..44b31211e536ca 100644 --- a/lib/rubygems/specification_policy.rb +++ b/lib/rubygems/specification_policy.rb @@ -188,7 +188,7 @@ def validate_dependencies # :nodoc: prerelease_dep && !@specification.version.prerelease? open_ended = dep.requirement.requirements.all? do |op, version| - not version.prerelease? and (op == ">" or op == ">=") + !version.prerelease? && (op == ">" || op == ">=") end if open_ended @@ -203,7 +203,7 @@ def validate_dependencies # :nodoc: else bugfix = if op == ">" ", '> #{dep_version}'" - elsif op == ">=" and base != segments + elsif op == ">=" && base != segments ", '>= #{dep_version}'" end @@ -338,7 +338,7 @@ def validate_array_attribute(field) String end - unless Array === val and val.all? {|x| x.kind_of?(klass) } + unless Array === val && val.all? {|x| x.kind_of?(klass) } error "#{field} must be an Array of #{klass}" end end @@ -404,7 +404,7 @@ def validate_lazy_metadata homepage = @specification.homepage # Make sure a homepage is valid HTTP/HTTPS URI - if homepage and not homepage.empty? + if homepage && !homepage.empty? require "uri" begin homepage_uri = URI.parse(homepage) diff --git a/lib/rubygems/uninstaller.rb b/lib/rubygems/uninstaller.rb index 1ae301a44de850..5883ed1c41d82d 100644 --- a/lib/rubygems/uninstaller.rb +++ b/lib/rubygems/uninstaller.rb @@ -105,8 +105,8 @@ def uninstall @default_specs_matching_uninstall_params = default_specs list, other_repo_specs = list.partition do |spec| - @gem_home == spec.base_dir or - (@user_install and spec.base_dir == Gem.user_dir) + @gem_home == spec.base_dir || + (@user_install && spec.base_dir == Gem.user_dir) end list.sort! @@ -239,8 +239,8 @@ def remove_all(list) # spec:: the spec of the gem to be uninstalled def remove(spec) - unless path_ok?(@gem_home, spec) or - (@user_install and path_ok?(Gem.user_dir, spec)) + unless path_ok?(@gem_home, spec) || + (@user_install && path_ok?(Gem.user_dir, spec)) e = Gem::GemNotInHomeException.new \ "Gem '#{spec.full_name}' is not installed in directory #{@gem_home}" e.spec = spec diff --git a/lib/rubygems/user_interaction.rb b/lib/rubygems/user_interaction.rb index 4b0a7c60bb6b61..2fa505423becd2 100644 --- a/lib/rubygems/user_interaction.rb +++ b/lib/rubygems/user_interaction.rb @@ -284,7 +284,7 @@ def ask_yes_no(question, default=nil) # Ask a question. Returns an answer if connected to a tty, nil otherwise. def ask(question) - return nil if not tty? + return nil if !tty? @outs.print(question + " ") @outs.flush @@ -298,7 +298,7 @@ def ask(question) # Ask for a password. Does not echo response to terminal. def ask_for_password(question) - return nil if not tty? + return nil if !tty? @outs.print(question, " ") @outs.flush diff --git a/lib/rubygems/version.rb b/lib/rubygems/version.rb index bb41374ffc3d02..f67889ef1a315b 100644 --- a/lib/rubygems/version.rb +++ b/lib/rubygems/version.rb @@ -252,7 +252,7 @@ def bump # same precision. Version "1.0" is not the same as version "1". def eql?(other) - self.class === other and @version == other._version + self.class === other && @version == other._version end def hash # :nodoc: diff --git a/lib/syntax_suggest.rb b/lib/syntax_suggest.rb new file mode 100644 index 00000000000000..1a45dfa6760a48 --- /dev/null +++ b/lib/syntax_suggest.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative "syntax_suggest/core_ext" diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb new file mode 100644 index 00000000000000..5b725e13d71972 --- /dev/null +++ b/lib/syntax_suggest/api.rb @@ -0,0 +1,199 @@ +# frozen_string_literal: true + +require_relative "version" + +require "tmpdir" +require "stringio" +require "pathname" +require "ripper" +require "timeout" + +module SyntaxSuggest + # Used to indicate a default value that cannot + # be confused with another input. + DEFAULT_VALUE = Object.new.freeze + + class Error < StandardError; end + TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i + + # SyntaxSuggest.handle_error [Public] + # + # Takes a `SyntaxError` exception, uses the + # error message to locate the file. Then the file + # will be analyzed to find the location of the syntax + # error and emit that location to stderr. + # + # Example: + # + # begin + # require 'bad_file' + # rescue => e + # SyntaxSuggest.handle_error(e) + # end + # + # By default it will re-raise the exception unless + # `re_raise: false`. The message output location + # can be configured using the `io: $stderr` input. + # + # If a valid filename cannot be determined, the original + # exception will be re-raised (even with + # `re_raise: false`). + def self.handle_error(e, re_raise: true, io: $stderr) + unless e.is_a?(SyntaxError) + io.puts("SyntaxSuggest: Must pass a SyntaxError, got: #{e.class}") + raise e + end + + file = PathnameFromMessage.new(e.message, io: io).call.name + raise e unless file + + io.sync = true + + call( + io: io, + source: file.read, + filename: file + ) + + raise e if re_raise + end + + # SyntaxSuggest.call [Private] + # + # Main private interface + def self.call(source:, filename: DEFAULT_VALUE, terminal: DEFAULT_VALUE, record_dir: DEFAULT_VALUE, timeout: TIMEOUT_DEFAULT, io: $stderr) + search = nil + filename = nil if filename == DEFAULT_VALUE + Timeout.timeout(timeout) do + record_dir ||= ENV["DEBUG"] ? "tmp" : nil + search = CodeSearch.new(source, record_dir: record_dir).call + end + + blocks = search.invalid_blocks + DisplayInvalidBlocks.new( + io: io, + blocks: blocks, + filename: filename, + terminal: terminal, + code_lines: search.code_lines + ).call + rescue Timeout::Error => e + io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with DEBUG=1 for more info" + io.puts e.backtrace.first(3).join($/) + end + + # SyntaxSuggest.record_dir [Private] + # + # Used to generate a unique directory to record + # search steps for debugging + def self.record_dir(dir) + time = Time.now.strftime("%Y-%m-%d-%H-%M-%s-%N") + dir = Pathname(dir) + dir.join(time).tap { |path| + path.mkpath + FileUtils.ln_sf(time, dir.join("last")) + } + end + + # SyntaxSuggest.valid_without? [Private] + # + # This will tell you if the `code_lines` would be valid + # if you removed the `without_lines`. In short it's a + # way to detect if we've found the lines with syntax errors + # in our document yet. + # + # code_lines = [ + # CodeLine.new(line: "def foo\n", index: 0) + # CodeLine.new(line: " def bar\n", index: 1) + # CodeLine.new(line: "end\n", index: 2) + # ] + # + # SyntaxSuggest.valid_without?( + # without_lines: code_lines[1], + # code_lines: code_lines + # ) # => true + # + # SyntaxSuggest.valid?(code_lines) # => false + def self.valid_without?(without_lines:, code_lines:) + lines = code_lines - Array(without_lines).flatten + + if lines.empty? + true + else + valid?(lines) + end + end + + # SyntaxSuggest.invalid? [Private] + # + # Opposite of `SyntaxSuggest.valid?` + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s + + Ripper.new(source).tap(&:parse).error? + end + + # SyntaxSuggest.valid? [Private] + # + # Returns truthy if a given input source is valid syntax + # + # SyntaxSuggest.valid?(<<~EOM) # => true + # def foo + # end + # EOM + # + # SyntaxSuggest.valid?(<<~EOM) # => false + # def foo + # def bar # Syntax error here + # end + # EOM + # + # You can also pass in an array of lines and they'll be + # joined before evaluating + # + # SyntaxSuggest.valid?( + # [ + # "def foo\n", + # "end\n" + # ] + # ) # => true + # + # SyntaxSuggest.valid?( + # [ + # "def foo\n", + # " def bar\n", # Syntax error here + # "end\n" + # ] + # ) # => false + # + # As an FYI the CodeLine class instances respond to `to_s` + # so passing a CodeLine in as an object or as an array + # will convert it to it's code representation. + def self.valid?(source) + !invalid?(source) + end +end + +# Integration +require_relative "cli" + +# Core logic +require_relative "code_search" +require_relative "code_frontier" +require_relative "explain_syntax" +require_relative "clean_document" + +# Helpers +require_relative "lex_all" +require_relative "code_line" +require_relative "code_block" +require_relative "block_expand" +require_relative "ripper_errors" +require_relative "priority_queue" +require_relative "unvisited_lines" +require_relative "around_block_scan" +require_relative "priority_engulf_queue" +require_relative "pathname_from_message" +require_relative "display_invalid_blocks" +require_relative "parse_blocks_from_indent_line" diff --git a/lib/syntax_suggest/around_block_scan.rb b/lib/syntax_suggest/around_block_scan.rb new file mode 100644 index 00000000000000..2a57d1b19e6b7e --- /dev/null +++ b/lib/syntax_suggest/around_block_scan.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is useful for exploring contents before and after + # a block + # + # It searches above and below the passed in block to match for + # whatever criteria you give it: + # + # Example: + # + # def dog # 1 + # puts "bark" # 2 + # puts "bark" # 3 + # end # 4 + # + # scan = AroundBlockScan.new( + # code_lines: code_lines + # block: CodeBlock.new(lines: code_lines[1]) + # ) + # + # scan.scan_while { true } + # + # puts scan.before_index # => 0 + # puts scan.after_index # => 3 + # + # Contents can also be filtered using AroundBlockScan#skip + # + # To grab the next surrounding indentation use AroundBlockScan#scan_adjacent_indent + class AroundBlockScan + def initialize(code_lines:, block:) + @code_lines = code_lines + @orig_before_index = block.lines.first.index + @orig_after_index = block.lines.last.index + @orig_indent = block.current_indent + @skip_array = [] + @after_array = [] + @before_array = [] + @stop_after_kw = false + + @skip_hidden = false + @skip_empty = false + end + + def skip(name) + case name + when :hidden? + @skip_hidden = true + when :empty? + @skip_empty = true + else + raise "Unsupported skip #{name}" + end + self + end + + def stop_after_kw + @stop_after_kw = true + self + end + + def scan_while + stop_next = false + + kw_count = 0 + end_count = 0 + index = before_lines.reverse_each.take_while do |line| + next false if stop_next + next true if @skip_hidden && line.hidden? + next true if @skip_empty && line.empty? + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && kw_count > end_count + stop_next = true + end + + yield line + end.last&.index + + if index && index < before_index + @before_index = index + end + + stop_next = false + kw_count = 0 + end_count = 0 + index = after_lines.take_while do |line| + next false if stop_next + next true if @skip_hidden && line.hidden? + next true if @skip_empty && line.empty? + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && end_count > kw_count + stop_next = true + end + + yield line + end.last&.index + + if index && index > after_index + @after_index = index + end + self + end + + def capture_neighbor_context + lines = [] + kw_count = 0 + end_count = 0 + before_lines.reverse_each do |line| + next if line.empty? + break if line.indent < @orig_indent + next if line.indent != @orig_indent + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line + end + + lines.reverse! + + kw_count = 0 + end_count = 0 + after_lines.each do |line| + next if line.empty? + break if line.indent < @orig_indent + next if line.indent != @orig_indent + + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line + end + + lines + end + + def on_falling_indent + last_indent = @orig_indent + before_lines.reverse_each do |line| + next if line.empty? + if line.indent < last_indent + yield line + last_indent = line.indent + end + end + + last_indent = @orig_indent + after_lines.each do |line| + next if line.empty? + if line.indent < last_indent + yield line + last_indent = line.indent + end + end + end + + def scan_neighbors + scan_while { |line| line.not_empty? && line.indent >= @orig_indent } + end + + def next_up + @code_lines[before_index.pred] + end + + def next_down + @code_lines[after_index.next] + end + + def scan_adjacent_indent + before_after_indent = [] + before_after_indent << (next_up&.indent || 0) + before_after_indent << (next_down&.indent || 0) + + indent = before_after_indent.min + scan_while { |line| line.not_empty? && line.indent >= indent } + + self + end + + def start_at_next_line + before_index + after_index + @before_index -= 1 + @after_index += 1 + self + end + + def code_block + CodeBlock.new(lines: lines) + end + + def lines + @code_lines[before_index..after_index] + end + + def before_index + @before_index ||= @orig_before_index + end + + def after_index + @after_index ||= @orig_after_index + end + + private def before_lines + @code_lines[0...before_index] || [] + end + + private def after_lines + @code_lines[after_index.next..-1] || [] + end + end +end diff --git a/lib/syntax_suggest/block_expand.rb b/lib/syntax_suggest/block_expand.rb new file mode 100644 index 00000000000000..396b2c3a1acc88 --- /dev/null +++ b/lib/syntax_suggest/block_expand.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is responsible for taking a code block that exists + # at a far indentaion and then iteratively increasing the block + # so that it captures everything within the same indentation block. + # + # def dog + # puts "bow" + # puts "wow" + # end + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(CodeBlock.new(lines: code_lines[1])) + # + # puts block.to_s + # # => puts "bow" + # puts "wow" + # + # + # Once a code block has captured everything at a given indentation level + # then it will expand to capture surrounding indentation. + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(block) + # + # block.to_s + # # => def dog + # puts "bow" + # puts "wow" + # end + # + class BlockExpand + def initialize(code_lines:) + @code_lines = code_lines + end + + def call(block) + if (next_block = expand_neighbors(block)) + return next_block + end + + expand_indent(block) + end + + def expand_indent(block) + AroundBlockScan.new(code_lines: @code_lines, block: block) + .skip(:hidden?) + .stop_after_kw + .scan_adjacent_indent + .code_block + end + + def expand_neighbors(block) + expanded_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) + .skip(:hidden?) + .stop_after_kw + .scan_neighbors + .scan_while { |line| line.empty? } # Slurp up empties + .lines + + if block.lines == expanded_lines + nil + else + CodeBlock.new(lines: expanded_lines) + end + end + + # Managable rspec errors + def inspect + "#" + end + end +end diff --git a/lib/syntax_suggest/capture_code_context.rb b/lib/syntax_suggest/capture_code_context.rb new file mode 100644 index 00000000000000..c74a366a258d3f --- /dev/null +++ b/lib/syntax_suggest/capture_code_context.rb @@ -0,0 +1,233 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Turns a "invalid block(s)" into useful context + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the third part. + # + # The algorithm is very good at capturing all of a syntax + # error in a single block in number 2, however the results + # can contain ambiguities. Humans are good at pattern matching + # and filtering and can mentally remove extraneous data, but + # they can't add extra data that's not present. + # + # In the case of known ambiguious cases, this class adds context + # back to the ambiguitiy so the programmer has full information. + # + # Beyond handling these ambiguities, it also captures surrounding + # code context information: + # + # puts block.to_s # => "def bark" + # + # context = CaptureCodeContext.new( + # blocks: block, + # code_lines: code_lines + # ) + # + # lines = context.call.map(&:original) + # puts lines.join + # # => + # class Dog + # def bark + # end + # + class CaptureCodeContext + attr_reader :code_lines + + def initialize(blocks:, code_lines:) + @blocks = Array(blocks) + @code_lines = code_lines + @visible_lines = @blocks.map(&:visible_lines).flatten + @lines_to_output = @visible_lines.dup + end + + def call + @blocks.each do |block| + capture_first_kw_end_same_indent(block) + capture_last_end_same_indent(block) + capture_before_after_kws(block) + capture_falling_indent(block) + end + + @lines_to_output.select!(&:not_empty?) + @lines_to_output.uniq! + @lines_to_output.sort! + + @lines_to_output + end + + # Shows the context around code provided by "falling" indentation + # + # Converts: + # + # it "foo" do + # + # into: + # + # class OH + # def hello + # it "foo" do + # end + # end + # + # + def capture_falling_indent(block) + AroundBlockScan.new( + block: block, + code_lines: @code_lines + ).on_falling_indent do |line| + @lines_to_output << line + end + end + + # Shows surrounding kw/end pairs + # + # The purpose of showing these extra pairs is due to cases + # of ambiguity when only one visible line is matched. + # + # For example: + # + # 1 class Dog + # 2 def bark + # 4 def eat + # 5 end + # 6 end + # + # In this case either line 2 could be missing an `end` or + # line 4 was an extra line added by mistake (it happens). + # + # When we detect the above problem it shows the issue + # as only being on line 2 + # + # 2 def bark + # + # Showing "neighbor" keyword pairs gives extra context: + # + # 2 def bark + # 4 def eat + # 5 end + # + def capture_before_after_kws(block) + return unless block.visible_lines.count == 1 + + around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) + .start_at_next_line + .capture_neighbor_context + + around_lines -= block.lines + + @lines_to_output.concat(around_lines) + end + + # When there is an invalid block with a keyword + # missing an end right before another end, + # it is unclear where which keyword is missing the + # end + # + # Take this example: + # + # class Dog # 1 + # def bark # 2 + # puts "woof" # 3 + # end # 4 + # + # However due to https://github.com/zombocom/syntax_suggest/issues/32 + # the problem line will be identified as: + # + # ❯ class Dog # 1 + # + # Because lines 2, 3, and 4 are technically valid code and are expanded + # first, deemed valid, and hidden. We need to un-hide the matching end + # line 4. Also work backwards and if there's a mis-matched keyword, show it + # too + def capture_last_end_same_indent(block) + return if block.visible_lines.length != 1 + return unless block.visible_lines.first.is_kw? + + visible_line = block.visible_lines.first + lines = @code_lines[visible_line.index..block.lines.last.index] + + # Find first end with same indent + # (this would return line 4) + # + # end # 4 + matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? } + return unless matching_end + + @lines_to_output << matching_end + + # Work backwards from the end to + # see if there are mis-matched + # keyword/end pairs + # + # Return the first mis-matched keyword + # this would find line 2 + # + # def bark # 2 + # puts "woof" # 3 + # end # 4 + end_count = 0 + kw_count = 0 + kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line| + end_count += 1 if line.is_end? + kw_count += 1 if line.is_kw? + + !kw_count.zero? && kw_count >= end_count + end + return unless kw_line + @lines_to_output << kw_line + end + + # The logical inverse of `capture_last_end_same_indent` + # + # When there is an invalid block with an `end` + # missing a keyword right after another `end`, + # it is unclear where which end is missing the + # keyword. + # + # Take this example: + # + # class Dog # 1 + # puts "woof" # 2 + # end # 3 + # end # 4 + # + # the problem line will be identified as: + # + # ❯ end # 4 + # + # This happens because lines 1, 2, and 3 are technically valid code and are expanded + # first, deemed valid, and hidden. We need to un-hide the matching keyword on + # line 1. Also work backwards and if there's a mis-matched end, show it + # too + def capture_first_kw_end_same_indent(block) + return if block.visible_lines.length != 1 + return unless block.visible_lines.first.is_end? + + visible_line = block.visible_lines.first + lines = @code_lines[block.lines.first.index..visible_line.index] + matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? } + return unless matching_kw + + @lines_to_output << matching_kw + + kw_count = 0 + end_count = 0 + orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line| + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + + end_count >= kw_count + end + + return unless orphan_end + @lines_to_output << orphan_end + end + end +end diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb new file mode 100644 index 00000000000000..b572189259e53f --- /dev/null +++ b/lib/syntax_suggest/clean_document.rb @@ -0,0 +1,304 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Parses and sanitizes source into a lexically aware document + # + # Internally the document is represented by an array with each + # index containing a CodeLine correlating to a line from the source code. + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the first part. + # + # The reason this class exists is to format input source + # for better/easier/cleaner exploration. + # + # The CodeSearch class operates at the line level so + # we must be careful to not introduce lines that look + # valid by themselves, but when removed will trigger syntax errors + # or strange behavior. + # + # ## Join Trailing slashes + # + # Code with a trailing slash is logically treated as a single line: + # + # 1 it "code can be split" \ + # 2 "across multiple lines" do + # + # In this case removing line 2 would add a syntax error. We get around + # this by internally joining the two lines into a single "line" object + # + # ## Logically Consecutive lines + # + # Code that can be broken over multiple + # lines such as method calls are on different lines: + # + # 1 User. + # 2 where(name: "schneems"). + # 3 first + # + # Removing line 2 can introduce a syntax error. To fix this, all lines + # are joined into one. + # + # ## Heredocs + # + # A heredoc is an way of defining a multi-line string. They can cause many + # problems. If left as a single line, Ripper would try to parse the contents + # as ruby code rather than as a string. Even without this problem, we still + # hit an issue with indentation + # + # 1 foo = <<~HEREDOC + # 2 "Be yourself; everyone else is already taken."" + # 3 ― Oscar Wilde + # 4 puts "I look like ruby code" # but i'm still a heredoc + # 5 HEREDOC + # + # If we didn't join these lines then our algorithm would think that line 4 + # is separate from the rest, has a higher indentation, then look at it first + # and remove it. + # + # If the code evaluates line 5 by itself it will think line 5 is a constant, + # remove it, and introduce a syntax errror. + # + # All of these problems are fixed by joining the whole heredoc into a single + # line. + # + # ## Comments and whitespace + # + # Comments can throw off the way the lexer tells us that the line + # logically belongs with the next line. This is valid ruby but + # results in a different lex output than before: + # + # 1 User. + # 2 where(name: "schneems"). + # 3 # Comment here + # 4 first + # + # To handle this we can replace comment lines with empty lines + # and then re-lex the source. This removal and re-lexing preserves + # line index and document size, but generates an easier to work with + # document. + # + class CleanDocument + def initialize(source:) + lines = clean_sweep(source: source) + @document = CodeLine.from_source(lines.join, lines: lines) + end + + # Call all of the document "cleaners" + # and return self + def call + join_trailing_slash! + join_consecutive! + join_heredoc! + + self + end + + # Return an array of CodeLines in the + # document + def lines + @document + end + + # Renders the document back to a string + def to_s + @document.join + end + + # Remove comments and whitespace only lines + # + # replace with empty newlines + # + # source = <<~'EOM' + # # Comment 1 + # puts "hello" + # # Comment 2 + # puts "world" + # EOM + # + # lines = CleanDocument.new(source: source).lines + # expect(lines[0].to_s).to eq("\n") + # expect(lines[1].to_s).to eq("puts "hello") + # expect(lines[2].to_s).to eq("\n") + # expect(lines[3].to_s).to eq("puts "world") + # + # Important: This must be done before lexing. + # + # After this change is made, we lex the document because + # removing comments can change how the doc is parsed. + # + # For example: + # + # values = LexAll.new(source: <<~EOM)) + # User. + # # comment + # where(name: 'schneems') + # EOM + # expect( + # values.count {|v| v.type == :on_ignored_nl} + # ).to eq(1) + # + # After the comment is removed: + # + # values = LexAll.new(source: <<~EOM)) + # User. + # + # where(name: 'schneems') + # EOM + # expect( + # values.count {|v| v.type == :on_ignored_nl} + # ).to eq(2) + # + def clean_sweep(source:) + source.lines.map do |line| + if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs + $/ + else + line + end + end + end + + # Smushes all heredoc lines into one line + # + # source = <<~'EOM' + # foo = <<~HEREDOC + # lol + # hehehe + # HEREDOC + # EOM + # + # lines = CleanDocument.new(source: source).join_heredoc!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_heredoc! + start_index_stack = [] + heredoc_beg_end_index = [] + lines.each do |line| + line.lex.each do |lex_value| + case lex_value.type + when :on_heredoc_beg + start_index_stack << line.index + when :on_heredoc_end + start_index = start_index_stack.pop + end_index = line.index + heredoc_beg_end_index << [start_index, end_index] + end + end + end + + heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] } + + join_groups(heredoc_groups) + self + end + + # Smushes logically "consecutive" lines + # + # source = <<~'EOM' + # User. + # where(name: 'schneems'). + # first + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + # + # The one known case this doesn't handle is: + # + # Ripper.lex <<~EOM + # a && + # b || + # c + # EOM + # + # For some reason this introduces `on_ignore_newline` but with BEG type + # + def join_consecutive! + consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| + take_while_including(code_line.index..-1) do |line| + line.ignore_newline_not_beg? + end + end + + join_groups(consecutive_groups) + self + end + + # Join lines with a trailing slash + # + # source = <<~'EOM' + # it "code can be split" \ + # "across multiple lines" do + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_trailing_slash! + trailing_groups = @document.select(&:trailing_slash?).map do |code_line| + take_while_including(code_line.index..-1) { |x| x.trailing_slash? } + end + join_groups(trailing_groups) + self + end + + # Helper method for joining "groups" of lines + # + # Input is expected to be type Array> + # + # The outer array holds the various "groups" while the + # inner array holds code lines. + # + # All code lines are "joined" into the first line in + # their group. + # + # To preserve document size, empty lines are placed + # in the place of the lines that were "joined" + def join_groups(groups) + groups.each do |lines| + line = lines.first + + # Handle the case of multiple groups in a a row + # if one is already replaced, move on + next if @document[line.index].empty? + + # Join group into the first line + @document[line.index] = CodeLine.new( + lex: lines.map(&:lex).flatten, + line: lines.join, + index: line.index + ) + + # Hide the rest of the lines + lines[1..-1].each do |line| + # The above lines already have newlines in them, if add more + # then there will be double newline, use an empty line instead + @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + end + end + self + end + + # Helper method for grabbing elements from document + # + # Like `take_while` except when it stops + # iterating, it also returns the line + # that caused it to stop + def take_while_including(range = 0..-1) + take_next_and_stop = false + @document[range].take_while do |line| + next if take_next_and_stop + + take_next_and_stop = !(yield line) + true + end + end + end +end diff --git a/lib/syntax_suggest/cli.rb b/lib/syntax_suggest/cli.rb new file mode 100644 index 00000000000000..b89fa5d0138577 --- /dev/null +++ b/lib/syntax_suggest/cli.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require "pathname" +require "optparse" + +module SyntaxSuggest + # All the logic of the exe/syntax_suggest CLI in one handy spot + # + # Cli.new(argv: ["--help"]).call + # Cli.new(argv: [".rb"]).call + # Cli.new(argv: [".rb", "--record=tmp"]).call + # Cli.new(argv: [".rb", "--terminal"]).call + # + class Cli + attr_accessor :options + + # ARGV is Everything passed to the executable, does not include executable name + # + # All other intputs are dependency injection for testing + def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV) + @options = {} + @parser = nil + options[:record_dir] = env["SYNTAX_SUGGEST_RECORD_DIR"] + options[:record_dir] = "tmp" if env["DEBUG"] + options[:terminal] = SyntaxSuggest::DEFAULT_VALUE + + @io = io + @argv = argv + @exit_obj = exit_obj + end + + def call + if @argv.empty? + # Display help if raw command + parser.parse! %w[--help] + return + else + # Mutates @argv + parse + return if options[:exit] + end + + file_name = @argv.first + if file_name.nil? + @io.puts "No file given" + @exit_obj.exit(1) + return + end + + file = Pathname(file_name) + if !file.exist? + @io.puts "file not found: #{file.expand_path} " + @exit_obj.exit(1) + return + end + + @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir] + + display = SyntaxSuggest.call( + io: @io, + source: file.read, + filename: file.expand_path, + terminal: options.fetch(:terminal, SyntaxSuggest::DEFAULT_VALUE), + record_dir: options[:record_dir] + ) + + if display.document_ok? + @exit_obj.exit(0) + else + @exit_obj.exit(1) + end + end + + def parse + parser.parse!(@argv) + + self + end + + def parser + @parser ||= OptionParser.new do |opts| + opts.banner = <<~EOM + Usage: syntax_suggest [options] + + Parses a ruby source file and searches for syntax error(s) such as + unexpected `end', expecting end-of-input. + + Example: + + $ syntax_suggest dog.rb + + # ... + + ❯ 10 defdog + ❯ 15 end + + ENV options: + + SYNTAX_SUGGEST_RECORD_DIR= + + Records the steps used to search for a syntax error + to the given directory + + Options: + EOM + + opts.version = SyntaxSuggest::VERSION + + opts.on("--help", "Help - displays this message") do |v| + @io.puts opts + options[:exit] = true + @exit_obj.exit + end + + opts.on("--record ", "Records the steps used to search for a syntax error to the given directory") do |v| + options[:record_dir] = v + end + + opts.on("--terminal", "Enable terminal highlighting") do |v| + options[:terminal] = true + end + + opts.on("--no-terminal", "Disable terminal highlighting") do |v| + options[:terminal] = false + end + end + end + end +end diff --git a/lib/syntax_suggest/code_block.rb b/lib/syntax_suggest/code_block.rb new file mode 100644 index 00000000000000..61e7986da47146 --- /dev/null +++ b/lib/syntax_suggest/code_block.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Multiple lines form a singular CodeBlock + # + # Source code is made of multiple CodeBlocks. + # + # Example: + # + # code_block.to_s # => + # # def foo + # # puts "foo" + # # end + # + # code_block.valid? # => true + # code_block.in_valid? # => false + # + # + class CodeBlock + UNSET = Object.new.freeze + attr_reader :lines, :starts_at, :ends_at + + def initialize(lines: []) + @lines = Array(lines) + @valid = UNSET + @deleted = false + @starts_at = @lines.first.number + @ends_at = @lines.last.number + end + + def delete + @deleted = true + end + + def deleted? + @deleted + end + + def visible_lines + @lines.select(&:visible?).select(&:not_empty?) + end + + def mark_invisible + @lines.map(&:mark_invisible) + end + + def is_end? + to_s.strip == "end" + end + + def hidden? + @lines.all?(&:hidden?) + end + + # This is used for frontier ordering, we are searching from + # the largest indentation to the smallest. This allows us to + # populate an array with multiple code blocks then call `sort!` + # on it without having to specify the sorting criteria + def <=>(other) + out = current_indent <=> other.current_indent + return out if out != 0 + + # Stable sort + starts_at <=> other.starts_at + end + + def current_indent + @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0 + end + + def invalid? + !valid? + end + + def valid? + if @valid == UNSET + # Performance optimization + # + # If all the lines were previously hidden + # and we expand to capture additional empty + # lines then the result cannot be invalid + # + # That means there's no reason to re-check all + # lines with ripper (which is expensive). + # Benchmark in commit message + @valid = if lines.all? { |l| l.hidden? || l.empty? } + true + else + SyntaxSuggest.valid?(lines.map(&:original).join) + end + else + @valid + end + end + + def to_s + @lines.join + end + end +end diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb new file mode 100644 index 00000000000000..8e93b32514e0a9 --- /dev/null +++ b/lib/syntax_suggest/code_frontier.rb @@ -0,0 +1,178 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # The main function of the frontier is to hold the edges of our search and to + # evaluate when we can stop searching. + + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # The Code frontier is a critical part of the second step + # + # ## Knowing where we've been + # + # Once a code block is generated it is added onto the frontier. Then it will be + # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a + # smaller block will cause the smaller block to be evicted. + # + # CodeFrontier#<<(block) # Adds block to frontier + # CodeFrontier#pop # Removes block from frontier + # + # ## Knowing where we can go + # + # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line` + # when called, this method returns, a line of code with the highest indentation. + # + # The returned line of code can be used to build a CodeBlock and then that code block + # is added back to the frontier. Then, the lines are removed from the + # "unvisited" so we don't double-create the same block. + # + # CodeFrontier#next_indent_line # Shows next line + # CodeFrontier#register_indent_block(block) # Removes lines from unvisited + # + # ## Knowing when to stop + # + # The frontier knows how to check the entire document for a syntax error. When blocks + # are added onto the frontier, they're removed from the document. When all code containing + # syntax errors has been added to the frontier, the document will be parsable without a + # syntax error and the search can stop. + # + # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors + # + # ## Filtering false positives + # + # Once the search is completed, the frontier may have multiple blocks that do not contain + # the syntax error. To limit the result to the smallest subset of "invalid blocks" call: + # + # CodeFrontier#detect_invalid_blocks + # + class CodeFrontier + def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines)) + @code_lines = code_lines + @unvisited = unvisited + @queue = PriorityEngulfQueue.new + + @check_next = true + end + + def count + @queue.length + end + + # Performance optimization + # + # Parsing with ripper is expensive + # If we know we don't have any blocks with invalid + # syntax, then we know we cannot have found + # the incorrect syntax yet. + # + # When an invalid block is added onto the frontier + # check document state + private def can_skip_check? + check_next = @check_next + @check_next = false + + if check_next + false + else + true + end + end + + # Returns true if the document is valid with all lines + # removed. By default it checks all blocks in present in + # the frontier array, but can be used for arbitrary arrays + # of codeblocks as well + def holds_all_syntax_errors?(block_array = @queue, can_cache: true) + return false if can_cache && can_skip_check? + + without_lines = block_array.to_a.flat_map do |block| + block.lines + end + + SyntaxSuggest.valid_without?( + without_lines: without_lines, + code_lines: @code_lines + ) + end + + # Returns a code block with the largest indentation possible + def pop + @queue.pop + end + + def next_indent_line + @unvisited.peek + end + + def expand? + return false if @queue.empty? + return true if @unvisited.empty? + + frontier_indent = @queue.peek.current_indent + unvisited_indent = next_indent_line.indent + + if ENV["SYNTAX_SUGGEST_DEBUG"] + puts "```" + puts @queue.peek.to_s + puts "```" + puts " @frontier indent: #{frontier_indent}" + puts " @unvisited indent: #{unvisited_indent}" + end + + # Expand all blocks before moving to unvisited lines + frontier_indent >= unvisited_indent + end + + # Keeps track of what lines have been added to blocks and which are not yet + # visited. + def register_indent_block(block) + @unvisited.visit_block(block) + self + end + + # When one element fully encapsulates another we remove the smaller + # block from the frontier. This prevents double expansions and all-around + # weird behavior. However this guarantee is quite expensive to maintain + def register_engulf_block(block) + end + + # Add a block to the frontier + # + # This method ensures the frontier always remains sorted (in indentation order) + # and that each code block's lines are removed from the indentation hash so we + # don't re-evaluate the same line multiple times. + def <<(block) + @unvisited.visit_block(block) + + @queue.push(block) + + @check_next = true if block.invalid? + + self + end + + # Example: + # + # combination([:a, :b, :c, :d]) + # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]] + def self.combination(array) + guesses = [] + 1.upto(array.length).each do |size| + guesses.concat(array.combination(size).to_a) + end + guesses + end + + # Given that we know our syntax error exists somewhere in our frontier, we want to find + # the smallest possible set of blocks that contain all the syntax errors + def detect_invalid_blocks + self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array| + holds_all_syntax_errors?(block_array, can_cache: false) + end || [] + end + end +end diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb new file mode 100644 index 00000000000000..dc738ab12890da --- /dev/null +++ b/lib/syntax_suggest/code_line.rb @@ -0,0 +1,239 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Represents a single line of code of a given source file + # + # This object contains metadata about the line such as + # amount of indentation, if it is empty or not, and + # lexical data, such as if it has an `end` or a keyword + # in it. + # + # Visibility of lines can be toggled off. Marking a line as invisible + # indicates that it should not be used for syntax checks. + # It's functionally the same as commenting it out. + # + # Example: + # + # line = CodeLine.from_source("def foo\n").first + # line.number => 1 + # line.empty? # => false + # line.visible? # => true + # line.mark_invisible + # line.visible? # => false + # + class CodeLine + TRAILING_SLASH = ("\\" + $/).freeze + + # Returns an array of CodeLine objects + # from the source string + def self.from_source(source, lines: nil) + lines ||= source.lines + lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } + lines.map.with_index do |line, index| + CodeLine.new( + line: line, + index: index, + lex: lex_array_for_line[index + 1] + ) + end + end + + attr_reader :line, :index, :lex, :line_number, :indent + def initialize(line:, index:, lex:) + @lex = lex + @line = line + @index = index + @original = line + @line_number = @index + 1 + strip_line = line.dup + strip_line.lstrip! + + if strip_line.empty? + @empty = true + @indent = 0 + else + @empty = false + @indent = line.length - strip_line.length + end + + set_kw_end + end + + # Used for stable sort via indentation level + # + # Ruby's sort is not "stable" meaning that when + # multiple elements have the same value, they are + # not guaranteed to return in the same order they + # were put in. + # + # So when multiple code lines have the same indentation + # level, they're sorted by their index value which is unique + # and consistent. + # + # This is mostly needed for consistency of the test suite + def indent_index + @indent_index ||= [indent, index] + end + alias_method :number, :line_number + + # Returns true if the code line is determined + # to contain a keyword that matches with an `end` + # + # For example: `def`, `do`, `begin`, `ensure`, etc. + def is_kw? + @is_kw + end + + # Returns true if the code line is determined + # to contain an `end` keyword + def is_end? + @is_end + end + + # Used to hide lines + # + # The search alorithm will group lines into blocks + # then if those blocks are determined to represent + # valid code they will be hidden + def mark_invisible + @line = "" + end + + # Means the line was marked as "invisible" + # Confusingly, "empty" lines are visible...they + # just don't contain any source code other than a newline ("\n"). + def visible? + !line.empty? + end + + # Opposite or `visible?` (note: different than `empty?`) + def hidden? + !visible? + end + + # An `empty?` line is one that was originally left + # empty in the source code, while a "hidden" line + # is one that we've since marked as "invisible" + def empty? + @empty + end + + # Opposite of `empty?` (note: different than `visible?`) + def not_empty? + !empty? + end + + # Renders the given line + # + # Also allows us to represent source code as + # an array of code lines. + # + # When we have an array of code line elements + # calling `join` on the array will call `to_s` + # on each element, which essentially converts + # it back into it's original source string. + def to_s + line + end + + # When the code line is marked invisible + # we retain the original value of it's line + # this is useful for debugging and for + # showing extra context + # + # DisplayCodeWithLineNumbers will render + # all lines given to it, not just visible + # lines, it uses the original method to + # obtain them. + attr_reader :original + + # Comparison operator, needed for equality + # and sorting + def <=>(other) + index <=> other.index + end + + # [Not stable API] + # + # Lines that have a `on_ignored_nl` type token and NOT + # a `BEG` type seem to be a good proxy for the ability + # to join multiple lines into one. + # + # This predicate method is used to determine when those + # two criteria have been met. + # + # The one known case this doesn't handle is: + # + # Ripper.lex <<~EOM + # a && + # b || + # c + # EOM + # + # For some reason this introduces `on_ignore_newline` but with BEG type + def ignore_newline_not_beg? + @ignore_newline_not_beg + end + + # Determines if the given line has a trailing slash + # + # lines = CodeLine.from_source(<<~EOM) + # it "foo" \ + # EOM + # expect(lines.first.trailing_slash?).to eq(true) + # + def trailing_slash? + last = @lex.last + return false unless last + return false unless last.type == :on_sp + + last.token == TRAILING_SLASH + end + + # Endless method detection + # + # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab + # Detecting a "oneliner" seems to need a state machine. + # This can be done by looking mostly at the "state" (last value): + # + # ENDFN -> BEG (token = '=' ) -> END + # + private def set_kw_end + oneliner_count = 0 + in_oneliner_def = nil + + kw_count = 0 + end_count = 0 + + @ignore_newline_not_beg = false + @lex.each do |lex| + kw_count += 1 if lex.is_kw? + end_count += 1 if lex.is_end? + + if lex.type == :on_ignored_nl + @ignore_newline_not_beg = !lex.expr_beg? + end + + if in_oneliner_def.nil? + in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) + elsif lex.state.allbits?(Ripper::EXPR_ENDFN) + # Continue + elsif lex.state.allbits?(Ripper::EXPR_BEG) + in_oneliner_def = :BODY if lex.token == "=" + elsif lex.state.allbits?(Ripper::EXPR_END) + # We found an endless method, count it + oneliner_count += 1 if in_oneliner_def == :BODY + + in_oneliner_def = nil + else + in_oneliner_def = nil + end + end + + kw_count -= oneliner_count + + @is_kw = (kw_count - end_count) > 0 + @is_end = (end_count - kw_count) > 0 + end + end +end diff --git a/lib/syntax_suggest/code_search.rb b/lib/syntax_suggest/code_search.rb new file mode 100644 index 00000000000000..2a86dfea90eb18 --- /dev/null +++ b/lib/syntax_suggest/code_search.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Searches code for a syntax error + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # This class handles the part. + # + # The bulk of the heavy lifting is done in: + # + # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching) + # - ParseBlocksFromLine (Creates blocks into the frontier) + # - BlockExpand (Expands existing blocks to search more code) + # + # ## Syntax error detection + # + # When the frontier holds the syntax error, we can stop searching + # + # search = CodeSearch.new(<<~EOM) + # def dog + # def lol + # end + # EOM + # + # search.call + # + # search.invalid_blocks.map(&:to_s) # => + # # => ["def lol\n"] + # + class CodeSearch + private + + attr_reader :frontier + + public + + attr_reader :invalid_blocks, :record_dir, :code_lines + + def initialize(source, record_dir: DEFAULT_VALUE) + record_dir = if record_dir == DEFAULT_VALUE + ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"] ? "tmp" : nil + else + record_dir + end + + if record_dir + @record_dir = SyntaxSuggest.record_dir(record_dir) + @write_count = 0 + end + + @tick = 0 + @source = source + @name_tick = Hash.new { |hash, k| hash[k] = 0 } + @invalid_blocks = [] + + @code_lines = CleanDocument.new(source: source).call.lines + + @frontier = CodeFrontier.new(code_lines: @code_lines) + @block_expand = BlockExpand.new(code_lines: @code_lines) + @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines) + end + + # Used for debugging + def record(block:, name: "record") + return unless @record_dir + @name_tick[name] += 1 + filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}-(#{block.starts_at}__#{block.ends_at}).txt" + if ENV["SYNTAX_SUGGEST_DEBUG"] + puts "\n\n==== #{filename} ====" + puts "\n```#{block.starts_at}..#{block.ends_at}" + puts block.to_s + puts "```" + puts " block indent: #{block.current_indent}" + end + @record_dir.join(filename).open(mode: "a") do |f| + document = DisplayCodeWithLineNumbers.new( + lines: @code_lines.select(&:visible?), + terminal: false, + highlight_lines: block.lines + ).call + + f.write(" Block lines: #{block.starts_at..block.ends_at} (#{name}) \n\n#{document}") + end + end + + def push(block, name:) + record(block: block, name: name) + + block.mark_invisible if block.valid? + frontier << block + end + + # Parses the most indented lines into blocks that are marked + # and added to the frontier + def create_blocks_from_untracked_lines + max_indent = frontier.next_indent_line&.indent + + while (line = frontier.next_indent_line) && (line.indent == max_indent) + @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block| + push(block, name: "add") + end + end + end + + # Given an already existing block in the frontier, expand it to see + # if it contains our invalid syntax + def expand_existing + block = frontier.pop + return unless block + + record(block: block, name: "before-expand") + + block = @block_expand.call(block) + push(block, name: "expand") + end + + # Main search loop + def call + until frontier.holds_all_syntax_errors? + @tick += 1 + + if frontier.expand? + expand_existing + else + create_blocks_from_untracked_lines + end + end + + @invalid_blocks.concat(frontier.detect_invalid_blocks) + @invalid_blocks.sort_by! { |block| block.starts_at } + self + end + end +end diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb new file mode 100644 index 00000000000000..40f5fe13759c77 --- /dev/null +++ b/lib/syntax_suggest/core_ext.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` +if SyntaxError.method_defined?(:detailed_message) + module SyntaxSuggest + class MiniStringIO + def initialize(isatty: $stderr.isatty) + @string = +"" + @isatty = isatty + end + + attr_reader :isatty + def puts(value = $/, **) + @string << value + end + + attr_reader :string + end + end + + SyntaxError.prepend Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + file = if highlight + SyntaxSuggest::PathnameFromMessage.new(super(highlight: false, **kwargs)).call.name + else + SyntaxSuggest::PathnameFromMessage.new(message).call.name + end + + io = SyntaxSuggest::MiniStringIO.new + + if file + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation + message + else + message + end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end + + # Ignore internal errors + message + end + } +else + autoload :Pathname, "pathname" + + # Monkey patch kernel to ensure that all `require` calls call the same + # method + module Kernel + module_function + + alias_method :syntax_suggest_original_require, :require + alias_method :syntax_suggest_original_require_relative, :require_relative + alias_method :syntax_suggest_original_load, :load + + def load(file, wrap = false) + syntax_suggest_original_load(file) + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + + def require(file) + syntax_suggest_original_require(file) + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + + def require_relative(file) + if Pathname.new(file).absolute? + syntax_suggest_original_require file + else + relative_from = caller_locations(1..1).first + relative_from_path = relative_from.absolute_path || relative_from.path + syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) + end + rescue SyntaxError => e + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + SyntaxSuggest.handle_error(e) + end + end +end diff --git a/lib/syntax_suggest/display_code_with_line_numbers.rb b/lib/syntax_suggest/display_code_with_line_numbers.rb new file mode 100644 index 00000000000000..23f4b2d1eeace7 --- /dev/null +++ b/lib/syntax_suggest/display_code_with_line_numbers.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Outputs code with highlighted lines + # + # Whatever is passed to this class will be rendered + # even if it is "marked invisible" any filtering of + # output should be done before calling this class. + # + # DisplayCodeWithLineNumbers.new( + # lines: lines, + # highlight_lines: [lines[2], lines[3]] + # ).call + # # => + # 1 + # 2 def cat + # ❯ 3 Dir.chdir + # ❯ 4 end + # 5 end + # 6 + class DisplayCodeWithLineNumbers + TERMINAL_HIGHLIGHT = "\e[1;3m" # Bold, italics + TERMINAL_END = "\e[0m" + + def initialize(lines:, highlight_lines: [], terminal: false) + @lines = Array(lines).sort + @terminal = terminal + @highlight_line_hash = Array(highlight_lines).each_with_object({}) { |line, h| h[line] = true } + @digit_count = @lines.last&.line_number.to_s.length + end + + def call + @lines.map do |line| + format_line(line) + end.join + end + + private def format_line(code_line) + # Handle trailing slash lines + code_line.original.lines.map.with_index do |contents, i| + format( + empty: code_line.empty?, + number: (code_line.number + i).to_s, + contents: contents, + highlight: @highlight_line_hash[code_line] + ) + end.join + end + + private def format(contents:, number:, empty:, highlight: false) + string = +"" + string << if highlight + "❯ " + else + " " + end + + string << number.rjust(@digit_count).to_s + if empty + string << contents + else + string << " " + string << TERMINAL_HIGHLIGHT if @terminal && highlight + string << contents + string << TERMINAL_END if @terminal + end + string + end + end +end diff --git a/lib/syntax_suggest/display_invalid_blocks.rb b/lib/syntax_suggest/display_invalid_blocks.rb new file mode 100644 index 00000000000000..bc1143f4b0d5da --- /dev/null +++ b/lib/syntax_suggest/display_invalid_blocks.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require_relative "capture_code_context" +require_relative "display_code_with_line_numbers" + +module SyntaxSuggest + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(code_lines:, blocks:, io: $stderr, filename: nil, terminal: DEFAULT_VALUE) + @io = io + @blocks = Array(blocks) + @filename = filename + @code_lines = code_lines + + @terminal = terminal == DEFAULT_VALUE ? io.isatty : terminal + end + + def document_ok? + @blocks.none? { |b| !b.hidden? } + end + + def call + if document_ok? + @io.puts "Syntax OK" + return self + end + + if filename + @io.puts("--> #{filename}") + @io.puts + end + @blocks.each do |block| + display_block(block) + end + + self + end + + private def display_block(block) + # Build explanation + explain = ExplainSyntax.new( + code_lines: block.lines + ).call + + # Enhance code output + # Also handles several ambiguious cases + lines = CaptureCodeContext.new( + blocks: block, + code_lines: @code_lines + ).call + + # Build code output + document = DisplayCodeWithLineNumbers.new( + lines: lines, + terminal: @terminal, + highlight_lines: block.lines + ).call + + # Output syntax error explanation + explain.errors.each do |e| + @io.puts e + end + @io.puts + + # Output code + @io.puts(document) + end + + private def code_with_context + lines = CaptureCodeContext.new( + blocks: @blocks, + code_lines: @code_lines + ).call + + DisplayCodeWithLineNumbers.new( + lines: lines, + terminal: @terminal, + highlight_lines: @invalid_lines + ).call + end + end +end diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb new file mode 100644 index 00000000000000..142ed2e269ea14 --- /dev/null +++ b/lib/syntax_suggest/explain_syntax.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require_relative "left_right_lex_count" + +module SyntaxSuggest + # Explains syntax errors based on their source + # + # example: + # + # source = "def foo; puts 'lol'" # Note missing end + # explain ExplainSyntax.new( + # code_lines: CodeLine.from_source(source) + # ).call + # explain.errors.first + # # => "Unmatched keyword, missing `end' ?" + # + # When the error cannot be determined by lexical counting + # then ripper is run against the input and the raw ripper + # errors returned. + # + # Example: + # + # source = "1 * " # Note missing a second number + # explain ExplainSyntax.new( + # code_lines: CodeLine.from_source(source) + # ).call + # explain.errors.first + # # => "syntax error, unexpected end-of-input" + class ExplainSyntax + INVERSE = { + "{" => "}", + "}" => "{", + "[" => "]", + "]" => "[", + "(" => ")", + ")" => "(", + "|" => "|" + }.freeze + + def initialize(code_lines:) + @code_lines = code_lines + @left_right = LeftRightLexCount.new + @missing = nil + end + + def call + @code_lines.each do |line| + line.lex.each do |lex| + @left_right.count_lex(lex) + end + end + + self + end + + # Returns an array of missing elements + # + # For example this: + # + # ExplainSyntax.new(code_lines: lines).missing + # # => ["}"] + # + # Would indicate that the source is missing + # a `}` character in the source code + def missing + @missing ||= @left_right.missing + end + + # Converts a missing string to + # an human understandable explanation. + # + # Example: + # + # explain.why("}") + # # => "Unmatched `{', missing `}' ?" + # + def why(miss) + case miss + when "keyword" + "Unmatched `end', missing keyword (`do', `def`, `if`, etc.) ?" + when "end" + "Unmatched keyword, missing `end' ?" + else + inverse = INVERSE.fetch(miss) { + raise "Unknown explain syntax char or key: #{miss.inspect}" + } + "Unmatched `#{inverse}', missing `#{miss}' ?" + end + end + + # Returns an array of syntax error messages + # + # If no missing pairs are found it falls back + # on the original ripper error messages + def errors + if missing.empty? + return RipperErrors.new(@code_lines.map(&:original).join).call.errors + end + + missing.map { |miss| why(miss) } + end + end +end diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_lex_count.rb new file mode 100644 index 00000000000000..6fcae7482b83b0 --- /dev/null +++ b/lib/syntax_suggest/left_right_lex_count.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Find mis-matched syntax based on lexical count + # + # Used for detecting missing pairs of elements + # each keyword needs an end, each '{' needs a '}' + # etc. + # + # Example: + # + # left_right = LeftRightLexCount.new + # left_right.count_kw + # left_right.missing.first + # # => "end" + # + # left_right = LeftRightLexCount.new + # source = "{ a: b, c: d" # Note missing '}' + # LexAll.new(source: source).each do |lex| + # left_right.count_lex(lex) + # end + # left_right.missing.first + # # => "}" + class LeftRightLexCount + def initialize + @kw_count = 0 + @end_count = 0 + + @count_for_char = { + "{" => 0, + "}" => 0, + "[" => 0, + "]" => 0, + "(" => 0, + ")" => 0, + "|" => 0 + } + end + + def count_kw + @kw_count += 1 + end + + def count_end + @end_count += 1 + end + + # Count source code characters + # + # Example: + # + # left_right = LeftRightLexCount.new + # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # left_right.count_for_char("{") + # # => 1 + # left_right.count_for_char("}") + # # => 0 + def count_lex(lex) + case lex.type + when :on_tstring_content + # ^^^ + # Means it's a string or a symbol `"{"` rather than being + # part of a data structure (like a hash) `{ a: b }` + # ignore it. + when :on_words_beg, :on_symbos_beg, :on_qwords_beg, + :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + # ^^^ + # Handle shorthand syntaxes like `%Q{ i am a string }` + # + # The start token will be the full thing `%Q{` but we + # need to count it as if it's a `{`. Any token + # can be used + char = lex.token[-1] + @count_for_char[char] += 1 if @count_for_char.key?(char) + when :on_embexpr_beg + # ^^^ + # Embedded string expressions like `"#{foo} <-embed"` + # are parsed with chars: + # + # `#{` as :on_embexpr_beg + # `}` as :on_embexpr_end + # + # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end + # because sometimes the lexer thinks something is an embed + # string end, when it is not like `lol = }` (no clue why). + # + # When we see `#{` count it as a `{` or we will + # have a mis-match count. + # + case lex.token + when "\#{" + @count_for_char["{"] += 1 + end + else + @end_count += 1 if lex.is_end? + @kw_count += 1 if lex.is_kw? + @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + end + end + + def count_for_char(char) + @count_for_char[char] + end + + # Returns an array of missing syntax characters + # or `"end"` or `"keyword"` + # + # left_right.missing + # # => ["}"] + def missing + out = missing_pairs + out << missing_pipe + out << missing_keyword_end + out.compact! + out + end + + PAIRS = { + "{" => "}", + "[" => "]", + "(" => ")" + }.freeze + + # Opening characters like `{` need closing characters # like `}`. + # + # When a mis-match count is detected, suggest the + # missing member. + # + # For example if there are 3 `}` and only two `{` + # return `"{"` + private def missing_pairs + PAIRS.map do |(left, right)| + case @count_for_char[left] <=> @count_for_char[right] + when 1 + right + when 0 + nil + when -1 + left + end + end + end + + # Keywords need ends and ends need keywords + # + # If we have more keywords, there's a missing `end` + # if we have more `end`-s, there's a missing keyword + private def missing_keyword_end + case @kw_count <=> @end_count + when 1 + "end" + when 0 + nil + when -1 + "keyword" + end + end + + # Pipes come in pairs. + # If there's an odd number of pipes then we + # are missing one + private def missing_pipe + if @count_for_char["|"].odd? + "|" + end + end + end +end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb new file mode 100644 index 00000000000000..132cba9f5d3b4c --- /dev/null +++ b/lib/syntax_suggest/lex_all.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Ripper.lex is not guaranteed to lex the entire source document + # + # lex = LexAll.new(source: source) + # lex.each do |value| + # puts value.line + # end + class LexAll + include Enumerable + + def initialize(source:, source_lines: nil) + @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos) + lineno = @lex.last.pos.first + 1 + source_lines ||= source.lines + last_lineno = source_lines.length + + until lineno >= last_lineno + lines = source_lines[lineno..-1] + + @lex.concat( + Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos) + ) + lineno = @lex.last.pos.first + 1 + end + + last_lex = nil + @lex.map! { |elem| + last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex) + } + end + + def to_a + @lex + end + + def each + return @lex.each unless block_given? + @lex.each do |x| + yield x + end + end + + def [](index) + @lex[index] + end + + def last + @lex.last + end + end +end + +require_relative "lex_value" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb new file mode 100644 index 00000000000000..008cc105b5398c --- /dev/null +++ b/lib/syntax_suggest/lex_value.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Value object for accessing lex values + # + # This lex: + # + # [1, 0], :on_ident, "describe", CMDARG + # + # Would translate into: + # + # lex.line # => 1 + # lex.type # => :on_indent + # lex.token # => "describe" + class LexValue + attr_reader :line, :type, :token, :state + + def initialize(line, type, token, state, last_lex = nil) + @line = line + @type = type + @token = token + @state = state + + set_kw_end(last_lex) + end + + private def set_kw_end(last_lex) + @is_end = false + @is_kw = false + return if type != :on_kw + # + return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 + + case token + when "if", "unless", "while", "until" + # Only count if/unless when it's not a "trailing" if/unless + # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 + @is_kw = true unless expr_label? + when "def", "case", "for", "begin", "class", "module", "do" + @is_kw = true + when "end" + @is_end = true + end + end + + def fname? + state.allbits?(Ripper::EXPR_FNAME) + end + + def ignore_newline? + type == :on_ignored_nl + end + + def is_end? + @is_end + end + + def is_kw? + @is_kw + end + + def expr_beg? + state.anybits?(Ripper::EXPR_BEG) + end + + def expr_label? + state.allbits?(Ripper::EXPR_LABEL) + end + end +end diff --git a/lib/syntax_suggest/parse_blocks_from_indent_line.rb b/lib/syntax_suggest/parse_blocks_from_indent_line.rb new file mode 100644 index 00000000000000..d1071732fe2a7a --- /dev/null +++ b/lib/syntax_suggest/parse_blocks_from_indent_line.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # This class is responsible for generating initial code blocks + # that will then later be expanded. + # + # The biggest concern when guessing code blocks, is accidentally + # grabbing one that contains only an "end". In this example: + # + # def dog + # begonn # mispelled `begin` + # puts "bark" + # end + # end + # + # The following lines would be matched (from bottom to top): + # + # 1) end + # + # 2) puts "bark" + # end + # + # 3) begonn + # puts "bark" + # end + # + # At this point it has no where else to expand, and it will yield this inner + # code as a block + class ParseBlocksFromIndentLine + attr_reader :code_lines + + def initialize(code_lines:) + @code_lines = code_lines + end + + # Builds blocks from bottom up + def each_neighbor_block(target_line) + scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line)) + .skip(:empty?) + .skip(:hidden?) + .scan_while { |line| line.indent >= target_line.indent } + + neighbors = scan.code_block.lines + + block = CodeBlock.new(lines: neighbors) + if neighbors.length <= 2 || block.valid? + yield block + else + until neighbors.empty? + lines = [neighbors.pop] + while (block = CodeBlock.new(lines: lines)) && block.invalid? && neighbors.any? + lines.prepend neighbors.pop + end + + yield block if block + end + end + end + end +end diff --git a/lib/syntax_suggest/pathname_from_message.rb b/lib/syntax_suggest/pathname_from_message.rb new file mode 100644 index 00000000000000..ea1a90856e90ed --- /dev/null +++ b/lib/syntax_suggest/pathname_from_message.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Converts a SyntaxError message to a path + # + # Handles the case where the filename has a colon in it + # such as on a windows file system: https://github.com/zombocom/syntax_suggest/issues/111 + # + # Example: + # + # message = "/tmp/scratch:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + # puts PathnameFromMessage.new(message).call.name + # # => "/tmp/scratch.rb" + # + class PathnameFromMessage + EVAL_RE = /^\(eval\):\d+/ + STREAMING_RE = /^-:\d+/ + attr_reader :name + + def initialize(message, io: $stderr) + @line = message.lines.first + @parts = @line.split(":") + @guess = [] + @name = nil + @io = io + end + + def call + if skip_missing_file_name? + if ENV["SYNTAX_SUGGEST_DEBUG"] + @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}" + end + else + until stop? + @guess << @parts.shift + @name = Pathname(@guess.join(":")) + end + + if @parts.empty? + @io.puts "SyntaxSuggest: Could not find filename from #{@line.inspect}" + @name = nil + end + end + + self + end + + def stop? + return true if @parts.empty? + return false if @guess.empty? + + @name&.exist? + end + + def skip_missing_file_name? + @line.match?(EVAL_RE) || @line.match?(STREAMING_RE) + end + end +end diff --git a/lib/syntax_suggest/priority_engulf_queue.rb b/lib/syntax_suggest/priority_engulf_queue.rb new file mode 100644 index 00000000000000..2d1e9b1b631b4e --- /dev/null +++ b/lib/syntax_suggest/priority_engulf_queue.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Keeps track of what elements are in the queue in + # priority and also ensures that when one element + # engulfs/covers/eats another that the larger element + # evicts the smaller element + class PriorityEngulfQueue + def initialize + @queue = PriorityQueue.new + end + + def to_a + @queue.to_a + end + + def empty? + @queue.empty? + end + + def length + @queue.length + end + + def peek + @queue.peek + end + + def pop + @queue.pop + end + + def push(block) + prune_engulf(block) + @queue << block + flush_deleted + + self + end + + private def flush_deleted + while @queue&.peek&.deleted? + @queue.pop + end + end + + private def prune_engulf(block) + # If we're about to pop off the same block, we can skip deleting + # things from the frontier this iteration since we'll get it + # on the next iteration + return if @queue.peek && (block <=> @queue.peek) == 1 + + if block.starts_at != block.ends_at # A block of size 1 cannot engulf another + @queue.to_a.each { |b| + if b.starts_at >= block.starts_at && b.ends_at <= block.ends_at + b.delete + true + end + } + end + end + end +end diff --git a/lib/syntax_suggest/priority_queue.rb b/lib/syntax_suggest/priority_queue.rb new file mode 100644 index 00000000000000..1abda2a444d0cd --- /dev/null +++ b/lib/syntax_suggest/priority_queue.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Holds elements in a priority heap on insert + # + # Instead of constantly calling `sort!`, put + # the element where it belongs the first time + # around + # + # Example: + # + # queue = PriorityQueue.new + # queue << 33 + # queue << 44 + # queue << 1 + # + # puts queue.peek # => 44 + # + class PriorityQueue + attr_reader :elements + + def initialize + @elements = [] + end + + def <<(element) + @elements << element + bubble_up(last_index, element) + end + + def pop + exchange(0, last_index) + max = @elements.pop + bubble_down(0) + max + end + + def length + @elements.length + end + + def empty? + @elements.empty? + end + + def peek + @elements.first + end + + def to_a + @elements + end + + # Used for testing, extremely not performant + def sorted + out = [] + elements = @elements.dup + while (element = pop) + out << element + end + @elements = elements + out.reverse + end + + private def last_index + @elements.size - 1 + end + + private def bubble_up(index, element) + return if index <= 0 + + parent_index = (index - 1) / 2 + parent = @elements[parent_index] + + return if (parent <=> element) >= 0 + + exchange(index, parent_index) + bubble_up(parent_index, element) + end + + private def bubble_down(index) + child_index = (index * 2) + 1 + + return if child_index > last_index + + not_the_last_element = child_index < last_index + left_element = @elements[child_index] + right_element = @elements[child_index + 1] + + child_index += 1 if not_the_last_element && (right_element <=> left_element) == 1 + + return if (@elements[index] <=> @elements[child_index]) >= 0 + + exchange(index, child_index) + bubble_down(child_index) + end + + def exchange(source, target) + a = @elements[source] + b = @elements[target] + @elements[source] = b + @elements[target] = a + end + end +end diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb new file mode 100644 index 00000000000000..48eb206e4874b7 --- /dev/null +++ b/lib/syntax_suggest/ripper_errors.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Capture parse errors from ripper + # + # Example: + # + # puts RipperErrors.new(" def foo").call.errors + # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] + class RipperErrors < Ripper + attr_reader :errors + + # Comes from ripper, called + # on every parse error, msg + # is a string + def on_parse_error(msg) + @errors ||= [] + @errors << msg + end + + alias_method :on_alias_error, :on_parse_error + alias_method :on_assign_error, :on_parse_error + alias_method :on_class_name_error, :on_parse_error + alias_method :on_param_error, :on_parse_error + alias_method :compile_error, :on_parse_error + + def call + @run_once ||= begin + @errors = [] + parse + true + end + self + end + end +end diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec new file mode 100644 index 00000000000000..acf9be7710150e --- /dev/null +++ b/lib/syntax_suggest/syntax_suggest.gemspec @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +begin + require_relative "lib/syntax_suggest/version" +rescue LoadError # Fallback to load version file in ruby core repository + require_relative "version" +end + +Gem::Specification.new do |spec| + spec.name = "syntax_suggest" + spec.version = SyntaxSuggest::VERSION + spec.authors = ["schneems"] + spec.email = ["richard.schneeman+foo@gmail.com"] + + spec.summary = "Find syntax errors in your source in a snap" + spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' + spec.homepage = "https://github.com/zombocom/syntax_suggest.git" + spec.license = "MIT" + spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0") + + spec.metadata["homepage_uri"] = spec.homepage + spec.metadata["source_code_uri"] = "https://github.com/zombocom/syntax_suggest.git" + + # Specify which files should be added to the gem when it is released. + # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do + `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|assets)/}) } + end + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] +end diff --git a/lib/syntax_suggest/unvisited_lines.rb b/lib/syntax_suggest/unvisited_lines.rb new file mode 100644 index 00000000000000..32808db63402ae --- /dev/null +++ b/lib/syntax_suggest/unvisited_lines.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Tracks which lines various code blocks have expanded to + # and which are still unexplored + class UnvisitedLines + def initialize(code_lines:) + @unvisited = code_lines.sort_by(&:indent_index) + @visited_lines = {} + @visited_lines.compare_by_identity + end + + def empty? + @unvisited.empty? + end + + def peek + @unvisited.last + end + + def pop + @unvisited.pop + end + + def visit_block(block) + block.lines.each do |line| + next if @visited_lines[line] + @visited_lines[line] = true + end + + while @visited_lines[@unvisited.last] + @unvisited.pop + end + end + end +end diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb new file mode 100644 index 00000000000000..a5176dcf2e732e --- /dev/null +++ b/lib/syntax_suggest/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module SyntaxSuggest + VERSION = "0.0.1" +end diff --git a/libexec/syntax_suggest b/libexec/syntax_suggest new file mode 100755 index 00000000000000..e4a0b0b658e67f --- /dev/null +++ b/libexec/syntax_suggest @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby + +require_relative "../lib/syntax_suggest/api" + +SyntaxSuggest::Cli.new( + argv: ARGV +).call diff --git a/marshal.c b/marshal.c index 43102a54c5b78b..1eeebf7729c6fe 100644 --- a/marshal.c +++ b/marshal.c @@ -28,6 +28,7 @@ #include "internal/encoding.h" #include "internal/error.h" #include "internal/hash.h" +#include "internal/numeric.h" #include "internal/object.h" #include "internal/struct.h" #include "internal/symbol.h" @@ -171,6 +172,7 @@ struct dump_arg { st_table *data; st_table *compat_tbl; st_table *encodings; + unsigned long num_entries; }; struct dump_call_arg { @@ -754,6 +756,60 @@ w_objivar(VALUE obj, struct dump_call_arg *arg) w_ivar_each(obj, num, arg); } +// Optimized dump for fixnum larger than 31-bits +static void +w_bigfixnum(VALUE obj, struct dump_arg *arg) +{ + RUBY_ASSERT(FIXNUM_P(obj)); + + w_byte(TYPE_BIGNUM, arg); + +#if SIZEOF_LONG == SIZEOF_VALUE + long num, slen_num; + num = FIX2LONG(obj); +#else + long long num, slen_num; + num = NUM2LL(obj); +#endif + + char sign = num < 0 ? '-' : '+'; + w_byte(sign, arg); + + // Guaranteed not to overflow, as FIXNUM is 1-bit less than long + if (num < 0) num = -num; + + // calculate the size in shorts + int slen = 0; + { + slen_num = num; + while (slen_num) { + slen++; + slen_num = SHORTDN(slen_num); + } + } + + RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2); + + w_long((long)slen, arg); + + for (int i = 0; i < slen; i++) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); + } + + // We aren't adding this object to the link table, but we need to increment + // the index. + arg->num_entries++; + + RUBY_ASSERT(num == 0); +} + +static void +w_remember(VALUE obj, struct dump_arg *arg) +{ + st_add_direct(arg->data, obj, arg->num_entries++); +} + static void w_object(VALUE obj, struct dump_arg *arg, int limit) { @@ -767,17 +823,6 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) rb_raise(rb_eArgError, "exceed depth limit"); } - if (limit > 0) limit--; - c_arg.limit = limit; - c_arg.arg = arg; - c_arg.obj = obj; - - if (st_lookup(arg->data, obj, &num)) { - w_byte(TYPE_LINK, arg); - w_long((long)num, arg); - return; - } - if (NIL_P(obj)) { w_byte(TYPE_NIL, arg); } @@ -797,19 +842,32 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) w_long(FIX2LONG(obj), arg); } else { - w_object(rb_int2big(FIX2LONG(obj)), arg, limit); + w_bigfixnum(obj, arg); } #endif } else if (SYMBOL_P(obj)) { w_symbol(obj, arg); } - else if (FLONUM_P(obj)) { - st_add_direct(arg->data, obj, arg->data->num_entries); - w_byte(TYPE_FLOAT, arg); - w_float(RFLOAT_VALUE(obj), arg); - } else { + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if (limit > 0) limit--; + c_arg.limit = limit; + c_arg.arg = arg; + c_arg.obj = obj; + + if (FLONUM_P(obj)) { + w_remember(obj, arg); + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + return; + } + VALUE v; if (!RBASIC_CLASS(obj)) { @@ -818,7 +876,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) } if (rb_obj_respond_to(obj, s_mdump, TRUE)) { - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); v = dump_funcall(arg, obj, s_mdump, 0, 0); w_class(TYPE_USRMARSHAL, obj, arg, FALSE); @@ -848,11 +906,11 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) if (hasiv) { w_ivar(hasiv, ivobj, encname, &c_arg); } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); return; } - st_add_direct(arg->data, obj, arg->data->num_entries); + w_remember(obj, arg); hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj); { @@ -1044,6 +1102,7 @@ clear_dump_arg(struct dump_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + arg->num_entries = 0; if (arg->compat_tbl) { st_free_table(arg->compat_tbl); arg->compat_tbl = 0; @@ -1126,6 +1185,7 @@ rb_marshal_dump_limited(VALUE obj, VALUE port, int limit) arg->dest = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->num_entries = 0; arg->compat_tbl = 0; arg->encodings = 0; arg->str = rb_str_buf_new(0); @@ -1881,10 +1941,28 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ sign = r_byte(arg); len = r_long(arg); - data = r_bytes0(len * 2, arg); - v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, - INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); - rb_str_resize(data, 0L); + + if (SIZEOF_VALUE >= 8 && len <= 4) { + // Representable within uintptr, likely FIXNUM + VALUE num = 0; + for (int i = 0; i < len; i++) { + num |= (VALUE)r_byte(arg) << (i * 16); + num |= (VALUE)r_byte(arg) << (i * 16 + 8); + } +#if SIZEOF_VALUE == SIZEOF_LONG + v = ULONG2NUM(num); +#else + v = ULL2NUM(num); +#endif + if (sign == '-') { + v = rb_int_uminus(v); + } + } else { + data = r_bytes0(len * 2, arg); + v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0, + INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); + rb_str_resize(data, 0L); + } v = r_entry(v, arg); v = r_leave(v, arg, false); } @@ -2260,7 +2338,8 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze) return v; } -static VALUE marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze) +static VALUE +marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze) { return rb_marshal_load_with_proc(source, proc, RTEST(freeze)); } diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index c38b9c62a03651..595d54dfab3dde 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -9,15 +9,16 @@ from __future__ import print_function import lldb import os +import inspect +import sys import shlex import platform +import glob -HEAP_PAGE_ALIGN_LOG = 16 - -HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) -HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) -HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN +from lldb_rb.constants import * +# BEGIN FUNCTION STYLE DECLS +# This will be refactored to use class style decls in the misc/commands dir class BackTrace: VM_FRAME_MAGIC_METHOD = 0x11110001 VM_FRAME_MAGIC_BLOCK = 0x22220001 @@ -468,19 +469,6 @@ def check_bits(page, bitmap_name, bitmap_index, bitmap_bit, v): else: return ' ' -def heap_page(debugger, command, ctx, result, internal_dict): - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() - - val = frame.EvaluateExpression(command) - page = get_page(lldb, target, val) - page_type = target.FindFirstType("struct heap_page").GetPointerType() - page.Cast(page_type) - append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) - def heap_page_body(debugger, command, ctx, result, internal_dict): target = debugger.GetSelectedTarget() process = target.GetProcess() @@ -723,37 +711,37 @@ def rb_id2str(debugger, command, result, internal_dict): pos = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE id_str = rb_ary_entry(target, ary, pos, result) lldb_inspect(debugger, target, result, id_str) +# END FUNCTION STYLE DECLS -def rb_rclass_ext(debugger, command, result, internal_dict): - if not ('RUBY_Qfalse' in globals()): - lldb_init(debugger) - target = debugger.GetSelectedTarget() - process = target.GetProcess() - thread = process.GetSelectedThread() - frame = thread.GetSelectedFrame() +load_dir, _ = os.path.split(os.path.realpath(__file__)) - uintptr_t = target.FindFirstType("uintptr_t") - rclass_t = target.FindFirstType("struct RClass") - rclass_ext_t = target.FindFirstType("rb_classext_t") - - rclass_addr = target.EvaluateExpression(command).Cast(uintptr_t) - rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) - debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) +for fname in glob.glob(f"{load_dir}/lldb_rb/commands/*_command.py"): + _, basename = os.path.split(fname) + mname, _ = os.path.splitext(basename) + exec(f"import lldb_rb.commands.{mname}") def __lldb_init_module(debugger, internal_dict): + # Register all classes that subclass RbBaseCommand + + for memname, mem in inspect.getmembers(sys.modules["lldb_rb.rb_base_command"]): + if inspect.isclass(mem): + for sclass in mem.__subclasses__(): + sclass.register_lldb_command(debugger, f"{__name__}.{sclass.__module__}") + + + ## FUNCTION INITS - These should be removed when converted to class commands debugger.HandleCommand("command script add -f lldb_cruby.lldb_rp rp") debugger.HandleCommand("command script add -f lldb_cruby.count_objects rb_count_objects") debugger.HandleCommand("command script add -f lldb_cruby.stack_dump_raw SDR") debugger.HandleCommand("command script add -f lldb_cruby.dump_node dump_node") - debugger.HandleCommand("command script add -f lldb_cruby.heap_page heap_page") debugger.HandleCommand("command script add -f lldb_cruby.heap_page_body heap_page_body") debugger.HandleCommand("command script add -f lldb_cruby.rb_backtrace rbbt") debugger.HandleCommand("command script add -f lldb_cruby.dump_page dump_page") debugger.HandleCommand("command script add -f lldb_cruby.dump_page_rvalue dump_page_rvalue") debugger.HandleCommand("command script add -f lldb_cruby.rb_id2str rb_id2str") - debugger.HandleCommand("command script add -f lldb_cruby.rb_rclass_ext rclass_ext") - lldb_init(debugger) + lldb_rb.rb_base_command.RbBaseCommand.lldb_init(debugger) + print("lldb scripts for ruby has been installed.") diff --git a/misc/lldb_rb/commands/command_template.py b/misc/lldb_rb/commands/command_template.py new file mode 100644 index 00000000000000..21014a993e4d71 --- /dev/null +++ b/misc/lldb_rb/commands/command_template.py @@ -0,0 +1,30 @@ +# This is a command template for implementing a helper function inside LLDB. To +# use this file +# 1. Copy it and rename the copy so it ends with `_command.py`. +# 2. Rename the class to something descriptive that ends with Command. +# 3. Change the program variable to be a descriptive command name +# 4. Ensure you are inheriting from RbBaseCommand or another command that +# implements the same interface + +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +# This test command inherits from RbBaseCommand which provides access to Ruby +# globals and utility helpers +class TestCommand(RbBaseCommand): + # program is the keyword the user will type in lldb to execute this command + program = "test" + + # help_string will be displayed in lldb when the user uses the help functions + help_string = "This is a test command to show how to implement lldb commands" + + # call is where our command logic will be implemented + def call(self, debugger, command, exe_ctx, result): + # This method will be called once the LLDB environment has been setup. + # You will have access to self.target, self.process, self.frame, and + # self.thread + # + # This is where we should implement our command logic + pass diff --git a/misc/lldb_rb/commands/heap_page_command.py b/misc/lldb_rb/commands/heap_page_command.py new file mode 100644 index 00000000000000..edb74a415bf1bc --- /dev/null +++ b/misc/lldb_rb/commands/heap_page_command.py @@ -0,0 +1,26 @@ +import lldb + +from lldb_rb.constants import * +from lldb_rb.rb_base_command import RbBaseCommand + +class HeapPageCommand(RbBaseCommand): + program = "heap_page" + help_string = "prints out 'struct heap_page' for a VALUE pointer in the page" + + def call(self, debugger, command, exe_ctx, result): + self.t_heap_page_body = self.target.FindFirstType("struct heap_page_body") + self.t_heap_page_ptr = self.target.FindFirstType("struct heap_page").GetPointerType() + + page = self._get_page(self.frame.EvaluateExpression(command)) + page.Cast(self.t_heap_page_ptr) + + self._append_command_output(debugger, "p (struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + self._append_command_output(debugger, "p *(struct heap_page *) %0#x" % page.GetValueAsUnsigned(), result) + + def _get_page(self, val): + addr = val.GetValueAsUnsigned() + page_addr = addr & ~(HEAP_PAGE_ALIGN_MASK) + address = lldb.SBAddress(page_addr, self.target) + body = self.target.CreateValueFromAddress("page", address, self.t_heap_page_body) + + return body.GetValueForExpressionPath("->header.page") diff --git a/misc/lldb_rb/commands/rclass_ext_command.py b/misc/lldb_rb/commands/rclass_ext_command.py new file mode 100644 index 00000000000000..8bae91145764e8 --- /dev/null +++ b/misc/lldb_rb/commands/rclass_ext_command.py @@ -0,0 +1,14 @@ +from lldb_rb.rb_base_command import RbBaseCommand + +class RclassExtCommand(RbBaseCommand): + program = "rclass_ext" + help_string = "retrieves and prints the rb_classext_struct for the VALUE pointer passed in" + + def call(self, debugger, command, exe_ctx, result): + uintptr_t = self.target.FindFirstType("uintptr_t") + rclass_t = self.target.FindFirstType("struct RClass") + rclass_ext_t = self.target.FindFirstType("rb_classext_t") + + rclass_addr = self.target.EvaluateExpression(command).Cast(uintptr_t) + rclass_ext_addr = (rclass_addr.GetValueAsUnsigned() + rclass_t.GetByteSize()) + debugger.HandleCommand("p *(rb_classext_t *)%0#x" % rclass_ext_addr) diff --git a/misc/lldb_rb/constants.py b/misc/lldb_rb/constants.py new file mode 100644 index 00000000000000..ec3050a399888b --- /dev/null +++ b/misc/lldb_rb/constants.py @@ -0,0 +1,4 @@ +HEAP_PAGE_ALIGN_LOG = 16 +HEAP_PAGE_ALIGN_MASK = (~(~0 << HEAP_PAGE_ALIGN_LOG)) +HEAP_PAGE_ALIGN = (1 << HEAP_PAGE_ALIGN_LOG) +HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN diff --git a/misc/lldb_rb/rb_base_command.py b/misc/lldb_rb/rb_base_command.py new file mode 100644 index 00000000000000..bf98b6761255bc --- /dev/null +++ b/misc/lldb_rb/rb_base_command.py @@ -0,0 +1,69 @@ +import lldb +from pydoc import locate + +class RbBaseCommand: + @classmethod + def register_lldb_command(cls, debugger, module_name): + # Add any commands contained in this module to LLDB + command = f"command script add -c {module_name}.{cls.__name__} {cls.program}" + debugger.HandleCommand(command) + + @classmethod + def lldb_init(cls, debugger): + target = debugger.GetSelectedTarget() + global SIZEOF_VALUE + SIZEOF_VALUE = target.FindFirstType("VALUE").GetByteSize() + + value_types = [] + g = globals() + + imemo_types = target.FindFirstType("enum imemo_type") + + for member in imemo_types.GetEnumMembers(): + g[member.GetName()] = member.GetValueAsUnsigned() + + for enum in target.FindFirstGlobalVariable("ruby_dummy_gdb_enums"): + enum = enum.GetType() + members = enum.GetEnumMembers() + for i in range(0, members.GetSize()): + member = members.GetTypeEnumMemberAtIndex(i) + name = member.GetName() + value = member.GetValueAsUnsigned() + g[name] = value + + if name.startswith("RUBY_T_"): + value_types.append(name) + g["value_types"] = value_types + + def __init__(self, debugger, _internal_dict): + self.internal_dict = _internal_dict + + def __call__(self, debugger, command, exe_ctx, result): + if not ("RUBY_Qfalse" in globals()): + RbBaseCommand.lldb_init(debugger) + + self.build_environment(debugger) + self.call(debugger, command, exe_ctx, result) + + def call(self, debugger, command, exe_ctx, result): + raise NotImplementedError("subclasses must implement call") + + def get_short_help(self): + return self.__class__.help_string + + def get_long_help(self): + return self.__class__.help_string + + def build_environment(self, debugger): + self.target = debugger.GetSelectedTarget() + self.process = self.target.GetProcess() + self.thread = self.process.GetSelectedThread() + self.frame = self.thread.GetSelectedFrame() + + def _append_command_output(self, debugger, command, result): + output1 = result.GetOutput() + debugger.GetCommandInterpreter().HandleCommand(command, result) + output2 = result.GetOutput() + result.Clear() + result.write(output1) + result.write(output2) diff --git a/mjit.c b/mjit.c index 98f4af3d18fa71..a873b3d9f84576 100644 --- a/mjit.c +++ b/mjit.c @@ -76,6 +76,7 @@ #include "internal/cont.h" #include "internal/file.h" #include "internal/hash.h" +#include "internal/process.h" #include "internal/warnings.h" #include "vm_sync.h" #include "ractor_core.h" @@ -98,14 +99,9 @@ #include "insns_info.inc" #include "internal/compile.h" -#ifdef _WIN32 -#include -#include -#else #include #include #include -#endif #include #ifdef HAVE_FCNTL_H #include @@ -122,34 +118,11 @@ # define MAXPATHLEN 1024 #endif -#ifdef _WIN32 -#define dlopen(name,flag) ((void*)LoadLibrary(name)) -#define dlerror() strerror(rb_w32_map_errno(GetLastError())) -#define dlsym(handle,name) ((void*)GetProcAddress((handle),(name))) -#define dlclose(handle) (!FreeLibrary(handle)) -#define RTLD_NOW -1 - -#define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid)) -#define WIFEXITED(S) ((S) != STILL_ACTIVE) -#define WEXITSTATUS(S) (S) -#define WIFSIGNALED(S) (0) -typedef intptr_t pid_t; -#endif - // Atomically set function pointer if possible. #define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val) #define MJIT_TMP_PREFIX "_ruby_mjit_" -// JIT compaction requires the header transformation because linking multiple .o files -// doesn't work without having `static` in the same function definitions. We currently -// don't support transforming the MJIT header on Windows. -#ifdef _WIN32 -# define USE_JIT_COMPACTION 0 -#else -# define USE_JIT_COMPACTION 1 -#endif - // Linked list of struct rb_mjit_unit. struct rb_mjit_unit_list { struct ccan_list_head head; @@ -237,15 +210,8 @@ static struct rb_mjit_unit *current_cc_unit = NULL; // PID of currently running C compiler process. 0 if nothing is running. static pid_t current_cc_pid = 0; // TODO: make this part of unit? -#ifndef _MSC_VER // Name of the header file. static char *header_file; -#endif - -#ifdef _WIN32 -// Linker option to enable libruby. -static char *libruby_pathflag; -#endif #include "mjit_config.h" @@ -261,7 +227,7 @@ static char *libruby_pathflag; // Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on cygwin, AIX, and OpenBSD. // This seems to improve MJIT performance on GCC. -#if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__) +#if defined __GNUC__ && !defined __clang__ && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__) # define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib", #else # define GCC_NOSTDLIB_FLAGS // empty @@ -286,7 +252,7 @@ static const char *const CC_LINKER_ARGS[] = { }; static const char *const CC_LIBS[] = { -#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(__CYGWIN__) MJIT_LIBS // mswin, cygwin #endif #if defined __GNUC__ && !defined __clang__ @@ -371,22 +337,6 @@ remove_file(const char *filename) } } -// Lazily delete .so files. -static void -clean_temp_files(struct rb_mjit_unit *unit) -{ -#if defined(_WIN32) - if (unit->so_file) { - char *so_file = unit->so_file; - - unit->so_file = NULL; - // unit->so_file is set only when mjit_opts.save_temps is false. - remove_file(so_file); - free(so_file); - } -#endif -} - // This is called in the following situations: // 1) On dequeue or `unload_units()`, associated ISeq is already GCed. // 2) The unit is not called often and unloaded by `unload_units()`. @@ -409,7 +359,6 @@ free_unit(struct rb_mjit_unit *unit) if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror()); } - clean_temp_files(unit); free(unit); } @@ -431,10 +380,12 @@ CRITICAL_SECTION_FINISH(int level, const char *msg) rb_native_mutex_unlock(&mjit_engine_mutex); } +static pid_t mjit_pid = 0; + static int sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix) { - return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix); + return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, mjit_pid, id, suffix); } // Return time in milliseconds as a double. @@ -554,22 +505,6 @@ start_process(const char *abspath, char *const *argv) } pid_t pid; -#ifdef _WIN32 - extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd); - int out_fd = 0; - if (mjit_opts.verbose <= 1) { - // Discard cl.exe's outputs like: - // _ruby_mjit_p12u3.c - // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp - out_fd = dev_null; - } - - pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd); - if (pid == 0) { - verbose(1, "MJIT: Failed to create process: %s", dlerror()); - return -1; - } -#else if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */ umask(0077); if (mjit_opts.verbose == 0) { @@ -587,7 +522,6 @@ start_process(const char *abspath, char *const *argv) verbose(1, "MJIT: Error in execv: %s", abspath); _exit(1); } -#endif (void)close(dev_null); return pid; } @@ -627,14 +561,7 @@ exec_process(const char *path, char *const argv[]) static void remove_so_file(const char *so_file, struct rb_mjit_unit *unit) { -#if defined(_WIN32) - // Windows can't remove files while it's used. - unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()` - if (unit->so_file == NULL) - mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno)); -#else remove_file(so_file); -#endif } // Print _mjitX, but make a human-readable funcname when --mjit-debug is used @@ -681,87 +608,6 @@ static const int c_file_access_mode = #define append_str(p, str) append_str2(p, str, sizeof(str)-1) #define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str)) -#ifdef _MSC_VER -// Compile C file to so. It returns true if it succeeds. (mswin) -static bool -compile_c_to_so(const char *c_file, const char *so_file) -{ - const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL }; - char *p; - - // files[0] = "-Fe*.dll" - files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1)); - p = append_lit(p, "-Fe"); - p = append_str2(p, so_file, strlen(so_file)); - *p = '\0'; - - // files[1] = "-Fo*.obj" - // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory. - files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1)); - char *obj_file = p = append_lit(p, "-Fo"); - p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT)); - p = append_lit(p, ".obj"); - *p = '\0'; - - // files[2] = "-Yu*.pch" - files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1)); - p = append_lit(p, "-Yu"); - p = append_str2(p, pch_file, strlen(pch_file)); - *p = '\0'; - - // files[3] = "C:/.../rb_mjit_header-*.obj" - files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1)); - p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch")); - p = append_lit(p, ".obj"); - *p = '\0'; - - // files[4] = "-Tc*.c" - files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1)); - p = append_lit(p, "-Tc"); - p = append_str2(p, c_file, strlen(c_file)); - *p = '\0'; - - // files[5] = "-Fd*.pdb" - // Generate .pdb file in temporary directory instead of cwd. - files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".pdb") + 1)); - p = append_lit(p, "-Fd"); - p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT)); - p = append_lit(p, ".pdb"); - *p = '\0'; - - // files[6] = "-Z7" - // Put this last to override any debug options that came previously. - files[6] = p = alloca(sizeof(char) * rb_strlen_lit("-Z7") + 1); - p = append_lit(p, "-Z7"); - *p = '\0'; - - char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, - files, CC_LIBS, CC_DLDFLAGS_ARGS); - if (args == NULL) - return false; - - int exit_code = exec_process(cc_path, args); - free(args); - - if (exit_code == 0) { - // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this? - if (!mjit_opts.save_temps) { - char *before_dot; - remove_file(obj_file); - - before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj"); - append_lit(before_dot, ".lib"); remove_file(obj_file); - append_lit(before_dot, ".exp"); remove_file(obj_file); - append_lit(before_dot, ".pdb"); remove_file(obj_file); - } - } - else { - verbose(2, "compile_c_to_so: compile error: %d", exit_code); - } - return exit_code == 0; -} -#else // _MSC_VER - // The function producing the pre-compiled header. static void make_pch(void) @@ -798,14 +644,11 @@ make_pch(void) } } -static pid_t -start_compiling_c_to_so(const char *c_file, const char *so_file) +static int +compile_c_to_so(const char *c_file, const char *so_file) { const char *so_args[] = { "-o", so_file, -# ifdef _WIN32 - libruby_pathflag, -# endif # ifdef __clang__ "-include-pch", pch_file, # endif @@ -821,22 +664,16 @@ start_compiling_c_to_so(const char *c_file, const char *so_file) char **args = form_args(8, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, cc_added_args, so_args, loader_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS); - if (args == NULL) return -1; + if (args == NULL) return 1; - rb_vm_t *vm = GET_VM(); - rb_native_mutex_lock(&vm->waitpid_lock); - - pid_t pid = start_process(cc_path, args); - mjit_add_waiting_pid(vm, pid); - - rb_native_mutex_unlock(&vm->waitpid_lock); + int exit_code = exec_process(cc_path, args); + if (!mjit_opts.save_temps) + remove_file(c_file); free(args); - return pid; + return exit_code; } -#endif // _MSC_VER -#if USE_JIT_COMPACTION static void compile_prelude(FILE *f); // Compile all JIT code into a single .c file @@ -888,8 +725,8 @@ mjit_compact(char* c_file) // Compile all cached .c files and build a single .so file. Reload all JIT func from it. // This improves the code locality for better performance in terms of iTLB and iCache. -static pid_t -start_mjit_compact(struct rb_mjit_unit *unit) +static int +mjit_compact_unit(struct rb_mjit_unit *unit) { static const char c_ext[] = ".c"; static const char so_ext[] = DLEXT; @@ -900,9 +737,30 @@ start_mjit_compact(struct rb_mjit_unit *unit) bool success = mjit_compact(c_file); if (success) { - return start_compiling_c_to_so(c_file, so_file); + return compile_c_to_so(c_file, so_file); + } + return 1; +} + +static pid_t +start_mjit_compact(struct rb_mjit_unit *unit) +{ + rb_vm_t *vm = GET_VM(); + rb_native_mutex_lock(&vm->waitpid_lock); + + pid_t pid = rb_fork(); + if (pid == 0) { + rb_native_mutex_unlock(&vm->waitpid_lock); + + int exit_code = mjit_compact_unit(unit); + exit(exit_code); + } + else { + mjit_add_waiting_pid(vm, pid); + rb_native_mutex_unlock(&vm->waitpid_lock); + + return pid; } - return -1; } static void @@ -942,7 +800,6 @@ load_compact_funcs_from_so(struct rb_mjit_unit *unit, char *c_file, char *so_fil } verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - current_cc_ms, active_units.length, c_file, so_file); } -#endif // USE_JIT_COMPACTION static void * load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit) @@ -996,17 +853,12 @@ compile_prelude(FILE *f) } fprintf(f, "\"\n"); #endif - -#ifdef _WIN32 - fprintf(f, "void _pei386_runtime_relocator(void){}\n"); - fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n"); -#endif } // Compile ISeq in UNIT and return function pointer of JIT-ed code. // It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong. -static pid_t -start_mjit_compile(struct rb_mjit_unit *unit) +static int +mjit_compile_unit(struct rb_mjit_unit *unit) { static const char c_ext[] = ".c"; static const char so_ext[] = DLEXT; @@ -1022,7 +874,7 @@ start_mjit_compile(struct rb_mjit_unit *unit) int e = errno; if (fd >= 0) (void)close(fd); verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e)); - return -1; + return 1; } // print #include of MJIT header, etc. @@ -1047,82 +899,32 @@ start_mjit_compile(struct rb_mjit_unit *unit) if (!mjit_opts.save_temps) remove_file(c_file); verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - return -1; + return 1; } - return start_compiling_c_to_so(c_file, so_file); + return compile_c_to_so(c_file, so_file); } -#ifdef _WIN32 -// Compile ISeq in UNIT and return function pointer of JIT-ed code. -// It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong. -static mjit_func_t -convert_unit_to_func(struct rb_mjit_unit *unit) +static pid_t +start_mjit_compile(struct rb_mjit_unit *unit) { - static const char c_ext[] = ".c"; - static const char so_ext[] = DLEXT; - char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN]; - - sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext); - sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext); - sprint_funcname(funcname, unit); - - FILE *f; - int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600); - if (fd < 0 || (f = fdopen(fd, "w")) == NULL) { - int e = errno; - if (fd >= 0) (void)close(fd); - verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e)); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; - } - - // print #include of MJIT header, etc. - compile_prelude(f); - - // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true. - long iseq_lineno = 0; - if (FIXNUM_P(ISEQ_BODY(unit->iseq)->location.first_lineno)) - // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG. - iseq_lineno = FIX2LONG(ISEQ_BODY(unit->iseq)->location.first_lineno); - char *iseq_label = alloca(RSTRING_LEN(ISEQ_BODY(unit->iseq)->location.label) + 1); - char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1); - strcpy(iseq_label, RSTRING_PTR(ISEQ_BODY(unit->iseq)->location.label)); - strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq))); - - verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno); - bool success = mjit_compile(f, unit->iseq, funcname, unit->id); - - fclose(f); - if (!success) { - if (!mjit_opts.save_temps) - remove_file(c_file); - verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; - } + rb_vm_t *vm = GET_VM(); + rb_native_mutex_lock(&vm->waitpid_lock); - double start_time = real_ms_time(); - success = compile_c_to_so(c_file, so_file); - if (!mjit_opts.save_temps) - remove_file(c_file); - double end_time = real_ms_time(); + pid_t pid = rb_fork(); + if (pid == 0) { + rb_native_mutex_unlock(&vm->waitpid_lock); - if (!success) { - verbose(2, "Failed to generate so: %s", so_file); - return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC; + int exit_code = mjit_compile_unit(unit); + exit(exit_code); } + else { + mjit_add_waiting_pid(vm, pid); + rb_native_mutex_unlock(&vm->waitpid_lock); - void *func = load_func_from_so(so_file, funcname, unit); - if (!mjit_opts.save_temps) - remove_so_file(so_file, unit); - - if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) { - verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s", - end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file); + return pid; } - return (mjit_func_t)func; } -#endif // Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`. // This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc. @@ -1372,7 +1174,6 @@ free_list(struct rb_mjit_unit_list *list, bool close_handle_p) if (unit->handle && dlclose(unit->handle)) { mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror()); } - clean_temp_files(unit); free(unit); } else { @@ -1470,15 +1271,6 @@ check_unit_queue(void) struct rb_mjit_unit *unit = get_from_list(&unit_queue); if (unit == NULL) return; -#ifdef _WIN32 - // Synchronously compile methods on Windows. - // mswin: No SIGCHLD, MinGW: directly compiling .c to .so doesn't work - mjit_func_t func = convert_unit_to_func(unit); - MJIT_ATOMIC_SET(ISEQ_BODY(unit->iseq)->jit_func, func); - if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) { - add_to_list(unit, &active_units); - } -#else current_cc_ms = real_ms_time(); current_cc_unit = unit; current_cc_pid = start_mjit_compile(unit); @@ -1494,7 +1286,6 @@ check_unit_queue(void) if (mjit_opts.wait) { mjit_wait(unit->iseq->body); } -#endif } // Create unit for `iseq`. This function may be called from an MJIT worker. @@ -1521,7 +1312,6 @@ create_unit(const rb_iseq_t *iseq) static void check_compaction(void) { -#if USE_JIT_COMPACTION // Allow only `max_cache_size / 100` times (default: 100) of compaction. // Note: GC of compacted code has not been implemented yet. int max_compact_size = mjit_opts.max_cache_size / 100; @@ -1543,7 +1333,6 @@ check_compaction(void) // TODO: check -1 } } -#endif } // Check the current CC process if any, and start a next C compiler process as needed. @@ -1556,8 +1345,6 @@ mjit_notify_waitpid(int status) // Delete .c file char c_file[MAXPATHLEN]; sprint_uniq_filename(c_file, (int)sizeof(c_file), current_cc_unit->id, MJIT_TMP_PREFIX, ".c"); - if (!mjit_opts.save_temps) - remove_file(c_file); // Check the result bool success = false; @@ -1578,12 +1365,8 @@ mjit_notify_waitpid(int status) char so_file[MAXPATHLEN]; sprint_uniq_filename(so_file, (int)sizeof(so_file), current_cc_unit->id, MJIT_TMP_PREFIX, DLEXT); if (current_cc_unit->compact_p) { // Compact unit -#if USE_JIT_COMPACTION load_compact_funcs_from_so(current_cc_unit, c_file, so_file); current_cc_unit = NULL; -#else - RUBY_ASSERT(!current_cc_unit->compact_p); -#endif } else { // Normal unit // Load the function from so @@ -1795,16 +1578,6 @@ init_header_filename(void) const char *basedir = ""; size_t baselen = 0; char *p; -#ifdef _WIN32 - static const char libpathflag[] = -# ifdef _MSC_VER - "-LIBPATH:" -# else - "-L" -# endif - ; - const size_t libpathflag_len = sizeof(libpathflag) - 1; -#endif #ifdef LOAD_RELATIVE basedir_val = ruby_prefix_path; @@ -1846,7 +1619,6 @@ init_header_filename(void) } else #endif -#ifndef _MSC_VER { // A name of the header file included in any C file generated by MJIT for iseqs. static const char header_name[] = MJIT_HEADER_INSTALL_DIR "/" MJIT_MIN_HEADER_NAME; @@ -1866,56 +1638,15 @@ init_header_filename(void) } pch_file = get_uniq_filename(0, MJIT_TMP_PREFIX "h", ".h.gch"); -#else - { - static const char pch_name[] = MJIT_HEADER_INSTALL_DIR "/" MJIT_PRECOMPILED_HEADER_NAME; - const size_t pch_name_len = sizeof(pch_name) - 1; - - pch_file = xmalloc(baselen + pch_name_len + 1); - p = append_str2(pch_file, basedir, baselen); - p = append_str2(p, pch_name, pch_name_len + 1); - if ((fd = rb_cloexec_open(pch_file, O_RDONLY, 0)) < 0) { - verbose(1, "Cannot access precompiled header file: %s", pch_file); - xfree(pch_file); - pch_file = NULL; - return false; - } - (void)close(fd); - } -#endif - -#ifdef _WIN32 - basedir_val = ruby_archlibdir_path; - basedir = StringValuePtr(basedir_val); - baselen = RSTRING_LEN(basedir_val); - libruby_pathflag = p = xmalloc(libpathflag_len + baselen + 1); - p = append_str(p, libpathflag); - p = append_str2(p, basedir, baselen); - *p = '\0'; -#endif return true; } -#ifdef _WIN32 -UINT rb_w32_system_tmpdir(WCHAR *path, UINT len); -#endif - static char * system_default_tmpdir(void) { // c.f. ext/etc/etc.c:etc_systmpdir() -#ifdef _WIN32 - WCHAR tmppath[_MAX_PATH]; - UINT len = rb_w32_system_tmpdir(tmppath, numberof(tmppath)); - if (len) { - int blen = WideCharToMultiByte(CP_UTF8, 0, tmppath, len, NULL, 0, NULL, NULL); - char *tmpdir = xmalloc(blen + 1); - WideCharToMultiByte(CP_UTF8, 0, tmppath, len, tmpdir, blen, NULL, NULL); - tmpdir[blen] = '\0'; - return tmpdir; - } -#elif defined _CS_DARWIN_USER_TEMP_DIR +#if defined _CS_DARWIN_USER_TEMP_DIR char path[MAXPATHLEN]; size_t len = confstr(_CS_DARWIN_USER_TEMP_DIR, path, sizeof(path)); if (len > 0) { @@ -1943,19 +1674,17 @@ check_tmpdir(const char *dir) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #endif if (!S_ISDIR(st.st_mode)) return FALSE; -#ifndef _WIN32 -# ifndef S_IWOTH +#ifndef S_IWOTH # define S_IWOTH 002 -# endif +#endif if (st.st_mode & S_IWOTH) { -# ifdef S_ISVTX +#ifdef S_ISVTX if (!(st.st_mode & S_ISVTX)) return FALSE; -# else +#else return FALSE; -# endif +#endif } if (access(dir, W_OK)) return FALSE; -#endif return TRUE; } @@ -2092,6 +1821,7 @@ mjit_init(const struct mjit_options *opts) mjit_opts = *opts; mjit_enabled = true; mjit_call_p = true; + mjit_pid = getpid(); // Normalize options if (mjit_opts.min_calls == 0) @@ -2102,11 +1832,7 @@ mjit_init(const struct mjit_options *opts) mjit_opts.max_cache_size = MIN_CACHE_SIZE; // Initialize variables for compilation -#ifdef _MSC_VER - pch_status = PCH_SUCCESS; // has prebuilt precompiled header -#else pch_status = PCH_NOT_READY; -#endif cc_path = CC_COMMON_ARGS[0]; verbose(2, "MJIT: CC defaults to %s", cc_path); cc_common_args = xmalloc(sizeof(CC_COMMON_ARGS)); @@ -2150,10 +1876,8 @@ mjit_init(const struct mjit_options *opts) // Initialize worker thread start_worker(); -#ifndef _MSC_VER // TODO: Consider running C compiler asynchronously make_pch(); -#endif } static void @@ -2272,12 +1996,10 @@ mjit_finish(bool close_handle_p) mjit_dump_total_calls(); #endif -#ifndef _MSC_VER // mswin has prebuilt precompiled header if (!mjit_opts.save_temps && getpid() == pch_owner_pid) remove_file(pch_file); xfree(header_file); header_file = NULL; -#endif xfree((void *)cc_common_args); cc_common_args = NULL; for (char **flag = cc_added_args; *flag != NULL; flag++) xfree(*flag); diff --git a/mjit.h b/mjit.h index fad18208fbb900..045612d7be92a0 100644 --- a/mjit.h +++ b/mjit.h @@ -17,7 +17,6 @@ #include "debug_counter.h" #include "ruby.h" #include "vm_core.h" -#include "yjit.h" // Special address values of a function generated from the // corresponding iseq by MJIT: @@ -103,101 +102,6 @@ extern void mjit_cont_free(struct mjit_cont *cont); extern void mjit_mark_cc_entries(const struct rb_iseq_constant_body *const body); extern void mjit_notify_waitpid(int status); -# ifdef MJIT_HEADER -NOINLINE(static COLDFUNC VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); -# else -static inline VALUE mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); -# endif -static VALUE -mjit_exec_slowpath(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) -{ - uintptr_t func_i = (uintptr_t)(body->jit_func); - ASSUME(func_i <= LAST_JIT_ISEQ_FUNC); - switch ((enum rb_mjit_iseq_func)func_i) { - case NOT_ADDED_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_added); - if (body->total_calls == mjit_opts.min_calls) { - rb_mjit_add_iseq_to_process(iseq); - if (UNLIKELY(mjit_opts.wait)) { - return rb_mjit_wait_call(ec, body); - } - } - break; - case NOT_READY_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_ready); - break; - case NOT_COMPILED_JIT_ISEQ_FUNC: - RB_DEBUG_COUNTER_INC(mjit_exec_not_compiled); - break; - default: // to avoid warning with LAST_JIT_ISEQ_FUNC - break; - } - return Qundef; -} - -// Try to execute the current iseq in ec. Use JIT code if it is ready. -// If it is not, add ISEQ to the compilation queue and return Qundef for MJIT. -// YJIT compiles on the thread running the iseq. -static inline VALUE -mjit_exec(rb_execution_context_t *ec) -{ - const rb_iseq_t *iseq = ec->cfp->iseq; - struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); - bool yjit_enabled = false; -#ifndef MJIT_HEADER - // Don't want to compile with YJIT or use code generated by YJIT - // when running inside code generated by MJIT. - yjit_enabled = rb_yjit_enabled_p(); -#endif - - if (mjit_call_p || yjit_enabled) { - body->total_calls++; - } - -#ifndef MJIT_HEADER - if (yjit_enabled && !mjit_call_p && body->total_calls == rb_yjit_call_threshold()) { - // If we couldn't generate any code for this iseq, then return - // Qundef so the interpreter will handle the call. - if (!rb_yjit_compile_iseq(iseq, ec)) { - return Qundef; - } - } -#endif - - if (!(mjit_call_p || yjit_enabled)) - return Qundef; - - RB_DEBUG_COUNTER_INC(mjit_exec); - - mjit_func_t func = body->jit_func; - - // YJIT tried compiling this function once before and couldn't do - // it, so return Qundef so the interpreter handles it. - if (yjit_enabled && func == 0) { - return Qundef; - } - - if (UNLIKELY((uintptr_t)func <= LAST_JIT_ISEQ_FUNC)) { -# ifdef MJIT_HEADER - RB_DEBUG_COUNTER_INC(mjit_frame_JT2VM); -# else - RB_DEBUG_COUNTER_INC(mjit_frame_VM2VM); -# endif - return mjit_exec_slowpath(ec, iseq, body); - } - -# ifdef MJIT_HEADER - RB_DEBUG_COUNTER_INC(mjit_frame_JT2JT); -# else - RB_DEBUG_COUNTER_INC(mjit_frame_VM2JT); -# endif - RB_DEBUG_COUNTER_INC(mjit_exec_call_func); - // Under SystemV x64 calling convention - // ec -> RDI - // cfp -> RSI - return func(ec, ec->cfp); -} - void mjit_child_after_fork(void); # ifdef MJIT_HEADER @@ -216,7 +120,7 @@ static inline struct mjit_cont *mjit_cont_new(rb_execution_context_t *ec){return static inline void mjit_cont_free(struct mjit_cont *cont){} static inline void mjit_free_iseq(const rb_iseq_t *iseq){} static inline void mjit_mark(void){} -static inline VALUE mjit_exec(rb_execution_context_t *ec) { return Qundef; /* unreachable */ } +static inline VALUE jit_exec(rb_execution_context_t *ec) { return Qundef; /* unreachable */ } static inline void mjit_child_after_fork(void){} #define mjit_enabled false diff --git a/mjit_compile.c b/mjit_compiler.c similarity index 99% rename from mjit_compile.c rename to mjit_compiler.c index 2c7996c2589398..c02397e53ce1c5 100644 --- a/mjit_compile.c +++ b/mjit_compiler.c @@ -1,6 +1,6 @@ /********************************************************************** - mjit_compile.c - MRI method JIT compiler + mjit_compiler.c - MRI method JIT compiler Copyright (C) 2017 Takashi Kokubun . @@ -21,6 +21,7 @@ #include "internal/variable.h" #include "mjit.h" #include "mjit_unit.h" +#include "yjit.h" #include "vm_core.h" #include "vm_callinfo.h" #include "vm_exec.h" @@ -369,7 +370,7 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) } // Simulate `opt_pc` in setup_parameters_complex. Other PCs which may be passed by catch tables - // are not considered since vm_exec doesn't call mjit_exec for catch tables. + // are not considered since vm_exec doesn't call jit_exec for catch tables. if (body->param.flags.has_opt) { int i; fprintf(f, "\n"); @@ -586,9 +587,6 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname, int id) return false; } -#ifdef _WIN32 - fprintf(f, "__declspec(dllexport)\n"); -#endif fprintf(f, "VALUE\n%s(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", funcname); bool success = mjit_compile_body(f, iseq, &status); fprintf(f, "\n} // end of %s\n", funcname); diff --git a/mjit_unit.h b/mjit_unit.h index 2e23a8d5fc60c4..8f80a070e07dc3 100644 --- a/mjit_unit.h +++ b/mjit_unit.h @@ -1,5 +1,5 @@ -#ifndef INTERNAL_MJIT_H -#define INTERNAL_MJIT_H +#ifndef MJIT_UNIT_H +#define MJIT_UNIT_H #include "ccan/list/list.h" @@ -26,4 +26,4 @@ struct rb_mjit_unit { unsigned int cc_entries_size; // ISEQ_BODY(iseq)->ci_size + ones of inlined iseqs }; -#endif /* INTERNAL_MJIT_H */ +#endif /* MJIT_UNIT_H */ diff --git a/numeric.rb b/numeric.rb index 9f2200d2a87642..c2091465f88fd5 100644 --- a/numeric.rb +++ b/numeric.rb @@ -227,6 +227,23 @@ def zero? Primitive.attr! 'inline' Primitive.cexpr! 'rb_int_zero_p(self)' end + + # call-seq: + # ceildiv(other) -> integer + # + # Returns the result of division +self+ by +other+. The result is rounded up to the nearest integer. + # + # 3.ceildiv(3) # => 1 + # 4.ceildiv(3) # => 2 + # + # 4.ceildiv(-3) # => -1 + # -4.ceildiv(3) # => -1 + # -4.ceildiv(-3) # => 2 + # + # 3.ceildiv(1.2) # => 3 + def ceildiv(other) + -div(-other) + end end # call-seq: diff --git a/object.c b/object.c index 84a31a4acde3ac..358dba9c32ed4a 100644 --- a/object.c +++ b/object.c @@ -1992,13 +1992,7 @@ rb_class_new_instance_kw(int argc, const VALUE *argv, VALUE klass, int kw_splat) VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass) { - VALUE obj; - Check_Type(klass, T_CLASS); - - obj = rb_class_alloc(klass); - rb_obj_call_init_kw(obj, argc, argv, RB_NO_KEYWORDS); - - return obj; + return rb_class_new_instance_kw(argc, argv, klass, RB_NO_KEYWORDS); } /** diff --git a/parse.y b/parse.y index b4c3106b8cf141..e6fee29595e26e 100644 --- a/parse.y +++ b/parse.y @@ -5035,9 +5035,7 @@ ssym : tSYMBEG sym ; sym : fname - | tIVAR - | tGVAR - | tCVAR + | nonlocal_var ; dsym : tSYMBEG string_contents tSTRING_END @@ -5073,10 +5071,8 @@ nonlocal_var : tIVAR ; user_variable : tIDENTIFIER - | tIVAR - | tGVAR | tCONSTANT - | tCVAR + | nonlocal_var ; keyword_variable: keyword_nil {$$ = KWD2EID(nil, $1);} @@ -5779,8 +5775,7 @@ rbracket : opt_nl ']' rbrace : opt_nl '}' ; -trailer : /* none */ - | '\n' +trailer : opt_nl | ',' ; @@ -7201,6 +7196,10 @@ tokadd_string(struct parser_params *p, { int c; bool erred = false; +#ifdef RIPPER + const int heredoc_end = (p->heredoc_end ? p->heredoc_end + 1 : 0); + int top_of_line = FALSE; +#endif #define mixed_error(enc1, enc2) \ (void)(erred || (parser_mixed_error(p, enc1, enc2), erred = true)) @@ -7211,6 +7210,12 @@ tokadd_string(struct parser_params *p, if (p->heredoc_indent > 0) { parser_update_heredoc_indent(p, c); } +#ifdef RIPPER + if (top_of_line && heredoc_end == p->ruby_sourceline) { + pushback(p, c); + break; + } +#endif if (paren && c == paren) { ++*nest; @@ -7337,6 +7342,9 @@ tokadd_string(struct parser_params *p, } } tokadd(p, c); +#ifdef RIPPER + top_of_line = (c == '\n'); +#endif } terminate: if (*enc) *encp = *enc; @@ -9299,6 +9307,7 @@ parser_yylex(struct parser_params *p) case '\004': /* ^D */ case '\032': /* ^Z */ case -1: /* end of script. */ + p->eofp = 1; return 0; /* white spaces */ @@ -13611,7 +13620,7 @@ ripper_validate_object(VALUE self, VALUE x) { if (x == Qfalse) return x; if (x == Qtrue) return x; - if (x == Qnil) return x; + if (NIL_P(x)) return x; if (x == Qundef) rb_raise(rb_eArgError, "Qundef given"); if (FIXNUM_P(x)) return x; diff --git a/proc.c b/proc.c index a525562230b3c8..f9bd469618a1c8 100644 --- a/proc.c +++ b/proc.c @@ -40,7 +40,6 @@ struct METHOD { const VALUE iclass; const rb_method_entry_t * const me; /* for bound methods, `me' should be rb_callable_method_entry_t * */ - rb_method_visibility_t visibility; }; VALUE rb_cUnboundMethod; @@ -1664,7 +1663,6 @@ mnew_missing(VALUE klass, VALUE obj, ID id, VALUE mclass) me = rb_method_entry_create(id, klass, METHOD_VISI_UNDEF, def); RB_OBJ_WRITE(method, &data->me, me); - data->visibility = METHOD_ENTRY_VISI(me); return method; } @@ -1686,7 +1684,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, VALUE method; rb_method_visibility_t visi = METHOD_VISI_UNDEF; - again: if (UNDEFINED_METHOD_ENTRY_P(me)) { if (respond_to_missing_p(klass, obj, ID2SYM(id), scope)) { return mnew_missing(klass, obj, id, mclass); @@ -1702,19 +1699,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, rb_print_inaccessible(klass, id, visi); } } - if (me->def->type == VM_METHOD_TYPE_ZSUPER) { - if (me->defined_class) { - VALUE klass = RCLASS_SUPER(RCLASS_ORIGIN(me->defined_class)); - id = me->def->original_id; - me = (rb_method_entry_t *)rb_callable_method_entry_with_refinements(klass, id, &iclass); - } - else { - VALUE klass = RCLASS_SUPER(RCLASS_ORIGIN(me->owner)); - id = me->def->original_id; - me = rb_method_entry_without_refinements(klass, id, &iclass); - } - goto again; - } method = TypedData_Make_Struct(mclass, struct METHOD, &method_data_type, data); @@ -1722,7 +1706,6 @@ mnew_internal(const rb_method_entry_t *me, VALUE klass, VALUE iclass, RB_OBJ_WRITE(method, &data->klass, klass); RB_OBJ_WRITE(method, &data->iclass, iclass); RB_OBJ_WRITE(method, &data->me, me); - data->visibility = visi; return method; } @@ -1755,6 +1738,27 @@ mnew_unbound(VALUE klass, ID id, VALUE mclass, int scope) return mnew_from_me(me, klass, iclass, Qundef, id, mclass, scope); } +static const rb_method_entry_t* +zsuper_resolve(const rb_method_entry_t *me) +{ + const rb_method_entry_t *super_me; + while (me->def->type == VM_METHOD_TYPE_ZSUPER) { + VALUE defined_class = me->defined_class ? me->defined_class : me->owner; + VALUE super_class = RCLASS_SUPER(RCLASS_ORIGIN(defined_class)); + if (!super_class) { + break; + } + ID id = me->def->original_id; + VALUE iclass; + super_me = (rb_method_entry_t *)rb_callable_method_entry_with_refinements(super_class, id, &iclass); + if (!super_me) { + break; + } + me = super_me; + } + return me; +} + static inline VALUE method_entry_defined_class(const rb_method_entry_t *me) { @@ -1815,12 +1819,14 @@ method_eq(VALUE method, VALUE other) m1 = (struct METHOD *)DATA_PTR(method); m2 = (struct METHOD *)DATA_PTR(other); - klass1 = method_entry_defined_class(m1->me); - klass2 = method_entry_defined_class(m2->me); + const rb_method_entry_t *m1_me = zsuper_resolve(m1->me); + const rb_method_entry_t *m2_me = zsuper_resolve(m2->me); + + klass1 = method_entry_defined_class(m1_me); + klass2 = method_entry_defined_class(m2_me); - if (!rb_method_entry_eq(m1->me, m2->me) || + if (!rb_method_entry_eq(m1_me, m2_me) || klass1 != klass2 || - m1->visibility != m2->visibility || m1->klass != m2->klass || m1->recv != m2->recv) { return Qfalse; @@ -1874,7 +1880,6 @@ method_unbind(VALUE obj) RB_OBJ_WRITE(method, &data->klass, orig->klass); RB_OBJ_WRITE(method, &data->iclass, orig->iclass); RB_OBJ_WRITE(method, &data->me, rb_method_entry_clone(orig->me)); - data->visibility = orig->visibility; return method; } @@ -1939,7 +1944,15 @@ method_original_name(VALUE obj) * call-seq: * meth.owner -> class_or_module * - * Returns the class or module that defines the method. + * Returns the class or module on which this method is defined. + * In other words, + * + * meth.owner.instance_methods(false).include?(meth.name) # => true + * + * holds as long as the method is not removed/undefined/replaced, + * (with private_instance_methods instead of instance_methods if the method + * is private). + * * See also Method#receiver. * * (1..3).method(:map).owner #=> Enumerable @@ -2390,7 +2403,6 @@ method_clone(VALUE self) RB_OBJ_WRITE(clone, &data->klass, orig->klass); RB_OBJ_WRITE(clone, &data->iclass, orig->iclass); RB_OBJ_WRITE(clone, &data->me, rb_method_entry_clone(orig->me)); - data->visibility = orig->visibility; return clone; } @@ -2549,7 +2561,7 @@ rb_method_call_with_block(int argc, const VALUE *argv, VALUE method, VALUE passe */ static void -convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALUE *methclass_out, VALUE *klass_out, VALUE *iclass_out, const rb_method_entry_t **me_out) +convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALUE *methclass_out, VALUE *klass_out, VALUE *iclass_out, const rb_method_entry_t **me_out, const bool clone) { VALUE methclass = data->me->owner; VALUE iclass = data->me->defined_class; @@ -2571,9 +2583,19 @@ convert_umethod_to_method_components(const struct METHOD *data, VALUE recv, VALU } } - const rb_method_entry_t *me = rb_method_entry_clone(data->me); + const rb_method_entry_t *me; + if (clone) { + me = rb_method_entry_clone(data->me); + } else { + me = data->me; + } if (RB_TYPE_P(me->owner, T_MODULE)) { + if (!clone) { + // if we didn't previously clone the method entry, then we need to clone it now + // because this branch manipualtes it in rb_method_entry_complement_defined_class + me = rb_method_entry_clone(me); + } VALUE ic = rb_class_search_ancestor(klass, me->owner); if (ic) { klass = ic; @@ -2633,7 +2655,7 @@ umethod_bind(VALUE method, VALUE recv) const rb_method_entry_t *me; const struct METHOD *data; TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me); + convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me, true); struct METHOD *bound; method = TypedData_Make_Struct(rb_cMethod, struct METHOD, &method_data_type, bound); @@ -2641,7 +2663,6 @@ umethod_bind(VALUE method, VALUE recv) RB_OBJ_WRITE(method, &bound->klass, klass); RB_OBJ_WRITE(method, &bound->iclass, iclass); RB_OBJ_WRITE(method, &bound->me, me); - bound->visibility = data->visibility; return method; } @@ -2676,8 +2697,8 @@ umethod_bind_call(int argc, VALUE *argv, VALUE method) else { VALUE methclass, klass, iclass; const rb_method_entry_t *me; - convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me); - struct METHOD bound = { recv, klass, 0, me, METHOD_ENTRY_VISI(me) }; + convert_umethod_to_method_components(data, recv, &methclass, &klass, &iclass, &me, false); + struct METHOD bound = { recv, klass, 0, me }; return call_method_data(ec, &bound, argc, argv, passed_procval, RB_PASS_CALLED_KEYWORDS); } @@ -2948,6 +2969,14 @@ rb_method_entry_location(const rb_method_entry_t *me) return method_def_location(me->def); } +static const rb_method_definition_t * +zsuper_ref_method_def(VALUE method) +{ + const struct METHOD *data; + TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); + return zsuper_resolve(data->me)->def; +} + /* * call-seq: * meth.source_location -> [String, Integer] @@ -2959,7 +2988,7 @@ rb_method_entry_location(const rb_method_entry_t *me) VALUE rb_method_location(VALUE method) { - return method_def_location(rb_method_def(method)); + return method_def_location(zsuper_ref_method_def(method)); } static const rb_method_definition_t * @@ -3047,7 +3076,7 @@ method_def_parameters(const rb_method_definition_t *def) static VALUE rb_method_parameters(VALUE method) { - return method_def_parameters(rb_method_def(method)); + return method_def_parameters(zsuper_ref_method_def(method)); } /* @@ -3110,7 +3139,7 @@ method_inspect(VALUE method) defined_class = data->me->def->body.alias.original_me->owner; } else { - defined_class = method_entry_defined_class(data->me); + defined_class = method_entry_defined_class(zsuper_resolve(data->me)); } if (RB_TYPE_P(defined_class, T_ICLASS)) { @@ -3354,51 +3383,6 @@ method_super_method(VALUE method) return mnew_internal(me, me->owner, iclass, data->recv, mid, rb_obj_class(method), FALSE, FALSE); } -/* - * call-seq: - * meth.public? -> true or false - * - * Returns whether the method is public. - */ - -static VALUE -method_public_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PUBLIC); -} - -/* - * call-seq: - * meth.protected? -> true or false - * - * Returns whether the method is protected. - */ - -static VALUE -method_protected_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PROTECTED); -} - -/* - * call-seq: - * meth.private? -> true or false - * - * Returns whether the method is private. - */ - -static VALUE -method_private_p(VALUE method) -{ - const struct METHOD *data; - TypedData_Get_Struct(method, struct METHOD, &method_data_type, data); - return RBOOL(data->visibility == METHOD_VISI_PRIVATE); -} - /* * call-seq: * local_jump_error.exit_value -> obj @@ -4339,9 +4323,6 @@ Init_Proc(void) rb_define_method(rb_cMethod, "source_location", rb_method_location, 0); rb_define_method(rb_cMethod, "parameters", rb_method_parameters, 0); rb_define_method(rb_cMethod, "super_method", method_super_method, 0); - rb_define_method(rb_cMethod, "public?", method_public_p, 0); - rb_define_method(rb_cMethod, "protected?", method_protected_p, 0); - rb_define_method(rb_cMethod, "private?", method_private_p, 0); rb_define_method(rb_mKernel, "method", rb_obj_method, 1); rb_define_method(rb_mKernel, "public_method", rb_obj_public_method, 1); rb_define_method(rb_mKernel, "singleton_method", rb_obj_singleton_method, 1); @@ -4365,9 +4346,6 @@ Init_Proc(void) rb_define_method(rb_cUnboundMethod, "source_location", rb_method_location, 0); rb_define_method(rb_cUnboundMethod, "parameters", rb_method_parameters, 0); rb_define_method(rb_cUnboundMethod, "super_method", method_super_method, 0); - rb_define_method(rb_cUnboundMethod, "public?", method_public_p, 0); - rb_define_method(rb_cUnboundMethod, "protected?", method_protected_p, 0); - rb_define_method(rb_cUnboundMethod, "private?", method_private_p, 0); /* Module#*_method */ rb_define_method(rb_cModule, "instance_method", rb_mod_instance_method, 1); diff --git a/process.c b/process.c index 5d16a3854a6b18..57df2dc06fbf89 100644 --- a/process.c +++ b/process.c @@ -3773,16 +3773,12 @@ rb_exec_atfork(void* arg, char *errmsg, size_t errmsg_buflen) return rb_exec_async_signal_safe(arg, errmsg, errmsg_buflen); /* hopefully async-signal-safe */ } -#if SIZEOF_INT == SIZEOF_LONG -#define proc_syswait (VALUE (*)(VALUE))rb_syswait -#else static VALUE proc_syswait(VALUE pid) { - rb_syswait((int)pid); + rb_syswait((rb_pid_t)pid); return Qnil; } -#endif static int move_fds_to_avoid_crash(int *fdp, int n, VALUE fds) @@ -4332,12 +4328,30 @@ rb_fork_ruby(int *status) return pid; } +static rb_pid_t +proc_fork_pid(void) +{ + rb_pid_t pid = rb_fork_ruby(NULL); + + if (pid == -1) { + rb_sys_fail("fork(2)"); + } + + return pid; +} + rb_pid_t rb_call_proc__fork(void) { - VALUE pid = rb_funcall(rb_mProcess, rb_intern("_fork"), 0); - - return NUM2PIDT(pid); + ID id__fork; + CONST_ID(id__fork, "_fork"); + if (rb_method_basic_definition_p(CLASS_OF(rb_mProcess), id__fork)) { + return proc_fork_pid(); + } + else { + VALUE pid = rb_funcall(rb_mProcess, id__fork, 0); + return NUM2PIDT(pid); + } } #endif @@ -4353,16 +4367,18 @@ rb_call_proc__fork(void) * This method is not for casual code but for application monitoring * libraries. You can add custom code before and after fork events * by overriding this method. + * + * Note: Process.daemon may be implemented using fork(2) BUT does not go + * through this method. + * Thus, depending on your reason to hook into this method, you + * may also want to hook into that one. + * See {this issue}[https://bugs.ruby-lang.org/issues/18911] for a + * more detailed discussion of this. */ VALUE rb_proc__fork(VALUE _obj) { - rb_pid_t pid = rb_fork_ruby(NULL); - - if (pid == -1) { - rb_sys_fail("fork(2)"); - } - + rb_pid_t pid = proc_fork_pid(); return PIDT2NUM(pid); } diff --git a/ractor.c b/ractor.c index 0306736c18a5e8..0eddc165fa4b23 100644 --- a/ractor.c +++ b/ractor.c @@ -74,7 +74,9 @@ static void ractor_lock_self(rb_ractor_t *cr, const char *file, int line) { VM_ASSERT(cr == GET_RACTOR()); +#if RACTOR_CHECK_MODE > 0 VM_ASSERT(cr->sync.locked_by != cr->pub.self); +#endif ractor_lock(cr, file, line); } @@ -94,7 +96,9 @@ static void ractor_unlock_self(rb_ractor_t *cr, const char *file, int line) { VM_ASSERT(cr == GET_RACTOR()); +#if RACTOR_CHECK_MODE > 0 VM_ASSERT(cr->sync.locked_by == cr->pub.self); +#endif ractor_unlock(cr, file, line); } diff --git a/ractor.rb b/ractor.rb index 953d3ceddc197f..8e229d47005521 100644 --- a/ractor.rb +++ b/ractor.rb @@ -1,4 +1,4 @@ -# Ractor is a Actor-model abstraction for Ruby that provides thread-safe parallel execution. +# Ractor is an Actor-model abstraction for Ruby that provides thread-safe parallel execution. # # Ractor.new can make a new Ractor, and it will run in parallel. # diff --git a/ractor_core.h b/ractor_core.h index 412971decfc418..a065f5f809d0cf 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -5,7 +5,7 @@ #include "vm_debug.h" #ifndef RACTOR_CHECK_MODE -#define RACTOR_CHECK_MODE (0 || VM_CHECK_MODE || RUBY_DEBUG) +#define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE) #endif enum rb_ractor_basket_type { diff --git a/re.c b/re.c index a633d1bb7b312d..5091f9a124de62 100644 --- a/re.c +++ b/re.c @@ -3499,13 +3499,18 @@ rb_reg_match2(VALUE re) * * With no block given, returns the MatchData object * that describes the match, if any, or +nil+ if none; - * the search begins at the given byte +offset+ in +self+: + * the search begins at the given character +offset+ in +string+: * * /abra/.match('abracadabra') # => # * /abra/.match('abracadabra', 4) # => # * /abra/.match('abracadabra', 8) # => nil * /abra/.match('abracadabra', 800) # => nil * + * string = "\u{5d0 5d1 5e8 5d0}cadabra" + * /abra/.match(string, 7) #=> # + * /abra/.match(string, 8) #=> nil + * /abra/.match(string.b, 8) #=> # + * * With a block given, calls the block if and only if a match is found; * returns the block's value: * diff --git a/ruby.c b/ruby.c index 337c218230278b..698ac25202d036 100644 --- a/ruby.c +++ b/ruby.c @@ -98,6 +98,8 @@ void rb_warning_category_update(unsigned int mask, unsigned int bits); SEP \ X(did_you_mean) \ SEP \ + X(syntax_suggest) \ + SEP \ X(rubyopt) \ SEP \ X(frozen_string_literal) \ @@ -117,7 +119,7 @@ void rb_warning_category_update(unsigned int mask, unsigned int bits); enum feature_flag_bits { EACH_FEATURES(DEFINE_FEATURE, COMMA), feature_debug_flag_first, -#if defined(MJIT_FORCE_ENABLE) || !YJIT_BUILD +#if defined(MJIT_FORCE_ENABLE) || !USE_YJIT DEFINE_FEATURE(jit) = feature_mjit, #else DEFINE_FEATURE(jit) = feature_yjit, @@ -258,7 +260,7 @@ usage(const char *name, int help, int highlight, int columns) #define M(shortopt, longopt, desc) RUBY_OPT_MESSAGE(shortopt, longopt, desc) -#if YJIT_BUILD +#if USE_YJIT # define PLATFORM_JIT_OPTION "--yjit" #else # define PLATFORM_JIT_OPTION "--mjit" @@ -288,7 +290,7 @@ usage(const char *name, int help, int highlight, int columns) #if USE_MJIT M("--mjit", "", "enable C compiler-based JIT compiler (experimental)"), #endif -#if YJIT_BUILD +#if USE_YJIT M("--yjit", "", "enable in-process JIT compiler (experimental)"), #endif #if USE_MMTK @@ -320,12 +322,13 @@ usage(const char *name, int help, int highlight, int columns) M("gems", "", "rubygems (only for debugging, default: "DEFAULT_RUBYGEMS_ENABLED")"), M("error_highlight", "", "error_highlight (default: "DEFAULT_RUBYGEMS_ENABLED")"), M("did_you_mean", "", "did_you_mean (default: "DEFAULT_RUBYGEMS_ENABLED")"), + M("syntax_suggest", "", "syntax_suggest (default: "DEFAULT_RUBYGEMS_ENABLED")"), M("rubyopt", "", "RUBYOPT environment variable (default: enabled)"), M("frozen-string-literal", "", "freeze all string literals (default: disabled)"), #if USE_MJIT M("mjit", "", "C compiler-based JIT compiler (default: disabled)"), #endif -#if YJIT_BUILD +#if USE_YJIT M("yjit", "", "in-process JIT compiler (default: disabled)"), #endif #if USE_MMTK @@ -339,7 +342,7 @@ usage(const char *name, int help, int highlight, int columns) #if USE_MJIT extern const struct ruby_opt_message mjit_option_messages[]; #endif -#if YJIT_BUILD +#if USE_YJIT static const struct ruby_opt_message yjit_options[] = { #if YJIT_STATS M("--yjit-stats", "", "Enable collecting YJIT statistics"), @@ -386,7 +389,7 @@ usage(const char *name, int help, int highlight, int columns) for (i = 0; mjit_option_messages[i].str; ++i) SHOW(mjit_option_messages[i]); #endif -#if YJIT_BUILD +#if USE_YJIT printf("%s""YJIT options (experimental):%s\n", sb, se); for (i = 0; i < numberof(yjit_options); ++i) SHOW(yjit_options[i]); @@ -655,7 +658,13 @@ ruby_init_loadpath(void) #if defined(LOAD_RELATIVE) || defined(__MACH__) VALUE libruby_path = runtime_libruby_path(); # if defined(__MACH__) - rb_libruby_selfpath = libruby_path; + VALUE selfpath = libruby_path; +# if defined(LOAD_RELATIVE) + selfpath = rb_str_dup(selfpath); +# endif + rb_obj_hide(selfpath); + OBJ_FREEZE_RAW(selfpath); + rb_libruby_selfpath = selfpath; rb_gc_register_address(&rb_libruby_selfpath); # endif #endif @@ -1067,7 +1076,7 @@ set_option_encoding_once(const char *type, VALUE *name, const char *e, long elen #define yjit_opt_match_arg(s, l, name) \ opt_match(s, l, name) && (*(s) && *(s+1) ? 1 : (rb_raise(rb_eRuntimeError, "--yjit-" name " needs an argument"), 0)) -#if YJIT_BUILD +#if USE_YJIT static bool setup_yjit_options(const char *s) { @@ -1472,7 +1481,7 @@ proc_options(long argc, char **argv, ruby_cmdline_options_t *opt, int envopt) #endif } else if (is_option_with_optarg("yjit", '-', true, false, false)) { -#if YJIT_BUILD +#if USE_YJIT FEATURE_SET(opt->features, FEATURE_BIT(yjit)); setup_yjit_options(s); #else @@ -1565,7 +1574,8 @@ void rb_call_builtin_inits(void); #if RBIMPL_HAS_ATTRIBUTE(weak) __attribute__((weak)) #endif -void Init_extra_exts(void) +void +Init_extra_exts(void) { } @@ -1582,6 +1592,9 @@ ruby_opt_init(ruby_cmdline_options_t *opt) if (opt->features.set & FEATURE_BIT(did_you_mean)) { rb_define_module("DidYouMean"); } + if (opt->features.set & FEATURE_BIT(syntax_suggest)) { + rb_define_module("SyntaxSuggest"); + } } rb_warning_category_update(opt->warn.mask, opt->warn.set); @@ -1864,7 +1877,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) rb_warning("-K is specified; it is for 1.8 compatibility and may cause odd behavior"); if (!(FEATURE_SET_BITS(opt->features) & feature_jit_mask)) { -#if YJIT_BUILD +#if USE_YJIT if (!FEATURE_USED_P(opt->features, yjit) && getenv("RUBY_YJIT_ENABLE")) { FEATURE_SET(opt->features, FEATURE_BIT(yjit)); } @@ -1880,7 +1893,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) opt->mjit.on = TRUE; /* set mjit.on for ruby_show_version() API and check to call mjit_init() */ } #endif -#if YJIT_BUILD +#if USE_YJIT if (FEATURE_SET_P(opt->features, yjit)) { rb_yjit_init(); } diff --git a/rubystub.c b/rubystub.c index e7f46e78a561fb..75aeca18699ed3 100644 --- a/rubystub.c +++ b/rubystub.c @@ -1,4 +1,5 @@ #include "internal.h" +#include "internal/missing.h" #if defined HAVE_DLADDR #include #endif diff --git a/spec/bundler/bundler/bundler_spec.rb b/spec/bundler/bundler/bundler_spec.rb index aeadcf9720b525..9e79bc165f15ec 100644 --- a/spec/bundler/bundler/bundler_spec.rb +++ b/spec/bundler/bundler/bundler_spec.rb @@ -167,9 +167,9 @@ allow(::Bundler::FileUtils).to receive(:remove_entry_secure).and_raise(ArgumentError) allow(File).to receive(:world_writable?).and_return(true) message = < Bundler::Source::Git.new({}), + } + end + + it "returns formatted string with exclamation mark" do + expect(dependency.to_lock).to eq(" test_gem (= 1.0.0)!") + end + end + end +end diff --git a/spec/bundler/commands/init_spec.rb b/spec/bundler/commands/init_spec.rb index 683a453c7d7577..eaf8fa170ade5d 100644 --- a/spec/bundler/commands/init_spec.rb +++ b/spec/bundler/commands/init_spec.rb @@ -42,7 +42,7 @@ context "when the dir is not writable by the current user" do let(:subdir) { "child_dir" } - it "notifies the user that it can not write to it" do + it "notifies the user that it cannot write to it" do FileUtils.mkdir bundled_app(subdir) # chmod a-w it mode = File.stat(bundled_app(subdir)).mode ^ 0o222 diff --git a/spec/bundler/commands/install_spec.rb b/spec/bundler/commands/install_spec.rb index 7bf36ee0204422..56945346e15143 100644 --- a/spec/bundler/commands/install_spec.rb +++ b/spec/bundler/commands/install_spec.rb @@ -285,7 +285,7 @@ end it "installs gems for windows" do - simulate_platform mswin + simulate_platform x86_mswin32 install_gemfile <<-G source "#{file_uri_for(gem_repo1)}" @@ -293,7 +293,7 @@ G run "require 'platform_specific' ; puts PLATFORM_SPECIFIC" - expect(out).to eq("1.0.0 MSWIN") + expect(out).to eq("1.0 x86-mswin32") end end diff --git a/spec/bundler/commands/lock_spec.rb b/spec/bundler/commands/lock_spec.rb index b314169a9850a7..007e53f4e2e1d5 100644 --- a/spec/bundler/commands/lock_spec.rb +++ b/spec/bundler/commands/lock_spec.rb @@ -217,7 +217,7 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([java, mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([java, x86_mingw32, specific_local_platform].uniq) end it "supports adding new platforms with force_ruby_platform = true" do @@ -241,7 +241,7 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to contain_exactly(rb, linux, java, mingw) + expect(lockfile.platforms).to contain_exactly(rb, linux, java, x86_mingw32) end it "supports adding the `ruby` platform" do @@ -262,12 +262,12 @@ def read_lockfile(file = "Gemfile.lock") allow(Bundler::SharedHelpers).to receive(:find_gemfile).and_return(bundled_app_gemfile) lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([java, mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([java, x86_mingw32, specific_local_platform].uniq) bundle "lock --remove-platform java" lockfile = Bundler::LockfileParser.new(read_lockfile) - expect(lockfile.platforms).to match_array([mingw, specific_local_platform].uniq) + expect(lockfile.platforms).to match_array([x86_mingw32, specific_local_platform].uniq) end it "errors when removing all platforms" do @@ -280,7 +280,7 @@ def read_lockfile(file = "Gemfile.lock") build_repo4 do build_gem "ffi", "1.9.14" build_gem "ffi", "1.9.14" do |s| - s.platform = mingw + s.platform = x86_mingw32 end build_gem "gssapi", "0.1" @@ -312,7 +312,7 @@ def read_lockfile(file = "Gemfile.lock") gem "gssapi" G - simulate_platform(mingw) { bundle :lock } + simulate_platform(x86_mingw32) { bundle :lock } expect(lockfile).to eq <<~G GEM diff --git a/spec/bundler/commands/platform_spec.rb b/spec/bundler/commands/platform_spec.rb index 0b964eac8cd255..4e8e3946fe14cc 100644 --- a/spec/bundler/commands/platform_spec.rb +++ b/spec/bundler/commands/platform_spec.rb @@ -234,6 +234,29 @@ expect(out).to eq("ruby 1.0.0") end + it "handles when there is a lockfile with no requirement" do + gemfile <<-G + source "#{file_uri_for(gem_repo1)}" + G + + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo1)}/ + specs: + + PLATFORMS + ruby + + DEPENDENCIES + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "platform --ruby" + expect(out).to eq("No ruby version specified") + end + it "handles when there is a requirement in the gemfile" do gemfile <<-G source "#{file_uri_for(gem_repo1)}" diff --git a/spec/bundler/commands/remove_spec.rb b/spec/bundler/commands/remove_spec.rb index 093130f7d5f805..d757e0be4bbbab 100644 --- a/spec/bundler/commands/remove_spec.rb +++ b/spec/bundler/commands/remove_spec.rb @@ -522,7 +522,7 @@ end end - context "when gems can not be removed from other gemfile" do + context "when gems cannot be removed from other gemfile" do it "shows error" do create_file "Gemfile-other", <<-G gem "rails"; gem "rack" @@ -574,7 +574,7 @@ end context "when gem present in gemfiles but could not be removed from one from one of them" do - it "removes gem which can be removed and shows warning for file from which it can not be removed" do + it "removes gem which can be removed and shows warning for file from which it cannot be removed" do create_file "Gemfile-other", <<-G gem "rack" G diff --git a/spec/bundler/commands/update_spec.rb b/spec/bundler/commands/update_spec.rb index 8ca537ac10da74..11ff49bf89a3da 100644 --- a/spec/bundler/commands/update_spec.rb +++ b/spec/bundler/commands/update_spec.rb @@ -301,6 +301,66 @@ expect(lockfile).to eq(previous_lockfile) end + it "does not downgrade direct dependencies when run with --conservative" do + build_repo4 do + build_gem "oauth2", "2.0.6" do |s| + s.add_dependency "faraday", ">= 0.17.3", "< 3.0" + end + + build_gem "oauth2", "1.4.10" do |s| + s.add_dependency "faraday", ">= 0.17.3", "< 3.0" + s.add_dependency "multi_json", "~> 1.3" + end + + build_gem "faraday", "2.5.2" + + build_gem "multi_json", "1.15.0" + + build_gem "quickbooks-ruby", "1.0.19" do |s| + s.add_dependency "oauth2", "~> 1.4" + end + + build_gem "quickbooks-ruby", "0.1.9" do |s| + s.add_dependency "oauth2" + end + end + + gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + + gem "oauth2" + gem "quickbooks-ruby" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + faraday (2.5.2) + multi_json (1.15.0) + oauth2 (1.4.10) + faraday (>= 0.17.3, < 3.0) + multi_json (~> 1.3) + quickbooks-ruby (1.0.19) + oauth2 (~> 1.4) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + oauth2 + quickbooks-ruby + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update --conservative --verbose" + + expect(out).not_to include("Installing quickbooks-ruby 0.1.9") + expect(out).to include("Installing quickbooks-ruby 1.0.19").and include("Installing oauth2 1.4.10") + end + it "does not downgrade indirect dependencies unnecessarily" do build_repo4 do build_gem "a" do |s| diff --git a/spec/bundler/install/gemfile/gemspec_spec.rb b/spec/bundler/install/gemfile/gemspec_spec.rb index 941f1c6db9038f..7e2e7c345a8248 100644 --- a/spec/bundler/install/gemfile/gemspec_spec.rb +++ b/spec/bundler/install/gemfile/gemspec_spec.rb @@ -436,7 +436,7 @@ simulate_new_machine simulate_platform("jruby") { bundle "install" } - simulate_platform(x64_mingw) { bundle "install" } + simulate_platform(x64_mingw32) { bundle "install" } end context "on ruby" do diff --git a/spec/bundler/install/gemfile/platform_spec.rb b/spec/bundler/install/gemfile/platform_spec.rb index a357a922723d93..62e6bda4cd9449 100644 --- a/spec/bundler/install/gemfile/platform_spec.rb +++ b/spec/bundler/install/gemfile/platform_spec.rb @@ -501,7 +501,7 @@ RSpec.describe "when a gem has no architecture" do it "still installs correctly" do - simulate_platform mswin + simulate_platform x86_mswin32 build_repo2 do # The rcov gem is platform mswin32, but has no arch diff --git a/spec/bundler/install/gemfile/specific_platform_spec.rb b/spec/bundler/install/gemfile/specific_platform_spec.rb index 48349aaef4353a..699672f357e087 100644 --- a/spec/bundler/install/gemfile/specific_platform_spec.rb +++ b/spec/bundler/install/gemfile/specific_platform_spec.rb @@ -227,9 +227,9 @@ it "adds the foreign platform" do setup_multiplatform_gem install_gemfile(google_protobuf) - bundle "lock --add-platform=#{x64_mingw}" + bundle "lock --add-platform=#{x64_mingw32}" - expect(the_bundle.locked_gems.platforms).to eq([x64_mingw, pl("x86_64-darwin-15")]) + expect(the_bundle.locked_gems.platforms).to eq([x64_mingw32, pl("x86_64-darwin-15")]) expect(the_bundle.locked_gems.specs.map(&:full_name)).to eq(%w[ google-protobuf-3.0.0.alpha.5.0.5.1-universal-darwin google-protobuf-3.0.0.alpha.5.0.5.1-x64-mingw32 @@ -374,6 +374,189 @@ ERROR end + it "automatically fixes the lockfile if RUBY platform is locked and some gem has no RUBY variant available" do + build_repo4 do + build_gem("sorbet-static-and-runtime", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet", "= 0.5.10160" + s.add_runtime_dependency "sorbet-runtime", "= 0.5.10160" + end + + build_gem("sorbet", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet-static", "= 0.5.10160" + end + + build_gem("sorbet-runtime", "0.5.10160") + + build_gem("sorbet-static", "0.5.10160") do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "sorbet-static-and-runtime" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms_for([specific_local_platform, "ruby"])} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + end + + it "automatically fixes the lockfile if only RUBY platform is locked and some gem has no RUBY variant available" do + build_repo4 do + build_gem("sorbet-static-and-runtime", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet", "= 0.5.10160" + s.add_runtime_dependency "sorbet-runtime", "= 0.5.10160" + end + + build_gem("sorbet", "0.5.10160") do |s| + s.add_runtime_dependency "sorbet-static", "= 0.5.10160" + end + + build_gem("sorbet-runtime", "0.5.10160") + + build_gem("sorbet-static", "0.5.10160") do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "sorbet-static-and-runtime" + G + + lockfile <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + ruby + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + + bundle "update" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + sorbet (0.5.10160) + sorbet-static (= 0.5.10160) + sorbet-runtime (0.5.10160) + sorbet-static (0.5.10160-#{Gem::Platform.local}) + sorbet-static-and-runtime (0.5.10160) + sorbet (= 0.5.10160) + sorbet-runtime (= 0.5.10160) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + sorbet-static-and-runtime + + BUNDLED WITH + #{Bundler::VERSION} + L + end + + it "does not remove ruby if gems for other platforms, and not present in the lockfile, exist in the Gemfile" do + build_repo4 do + build_gem "nokogiri", "1.13.8" + build_gem "nokogiri", "1.13.8" do |s| + s.platform = Gem::Platform.local + end + end + + gemfile <<~G + source "#{file_uri_for(gem_repo4)}" + + gem "nokogiri" + + gem "tzinfo", "~> 1.2", platform: :#{not_local_tag} + G + + original_lockfile = <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + nokogiri (1.13.8) + nokogiri (1.13.8-#{Gem::Platform.local}) + + PLATFORMS + #{lockfile_platforms_for([specific_local_platform, "ruby"])} + + DEPENDENCIES + nokogiri + tzinfo (~> 1.2) + + BUNDLED WITH + #{Bundler::VERSION} + L + + lockfile original_lockfile + + bundle "lock --update" + + expect(lockfile).to eq(original_lockfile) + end + it "can fallback to a source gem when platform gems are incompatible with current ruby version" do setup_multiplatform_gem_with_source_gem diff --git a/spec/bundler/install/gems/dependency_api_spec.rb b/spec/bundler/install/gems/dependency_api_spec.rb index 79317a7fad3aad..9a83e5ffad8b39 100644 --- a/spec/bundler/install/gems/dependency_api_spec.rb +++ b/spec/bundler/install/gems/dependency_api_spec.rb @@ -119,7 +119,7 @@ end it "falls back when the API errors out" do - simulate_platform mswin + simulate_platform x86_mswin32 build_repo2 do # The rcov gem is platform mswin32, but has no arch @@ -443,6 +443,22 @@ def require(*args) expect(the_bundle).to include_gems "back_deps 1.0" end + it "does not fetch all marshaled specs" do + build_repo2 do + build_gem "foo", "1.0" + build_gem "foo", "2.0" + end + + install_gemfile <<-G, :artifice => "endpoint", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo2.to_s }, :verbose => true + source "#{source_uri}" + + gem "foo" + G + + expect(out).to include("foo-2.0.gemspec.rz") + expect(out).not_to include("foo-1.0.gemspec.rz") + end + it "does not refetch if the only unmet dependency is bundler" do build_repo2 do build_gem "bundler_dep" do |s| diff --git a/spec/bundler/install/gems/resolving_spec.rb b/spec/bundler/install/gems/resolving_spec.rb index 9c0d6bfe56cba7..9f4da23162671f 100644 --- a/spec/bundler/install/gems/resolving_spec.rb +++ b/spec/bundler/install/gems/resolving_spec.rb @@ -305,6 +305,77 @@ end end + context "in a transitive dependencies in a lockfile" do + before do + build_repo2 do + build_gem "rubocop", "1.28.2" do |s| + s.required_ruby_version = ">= #{current_ruby_minor}" + + s.add_dependency "rubocop-ast", ">= 1.17.0", "< 2.0" + end + + build_gem "rubocop", "1.35.0" do |s| + s.required_ruby_version = ">= #{next_ruby_minor}" + + s.add_dependency "rubocop-ast", ">= 1.20.1", "< 2.0" + end + + build_gem "rubocop-ast", "1.17.0" do |s| + s.required_ruby_version = ">= #{current_ruby_minor}" + end + + build_gem "rubocop-ast", "1.21.0" do |s| + s.required_ruby_version = ">= #{next_ruby_minor}" + end + end + + gemfile <<-G + source "http://localgemserver.test/" + gem 'rubocop' + G + + lockfile <<~L + GEM + remote: http://localgemserver.test/ + specs: + rubocop (1.35.0) + rubocop-ast (>= 1.20.1, < 2.0) + rubocop-ast (1.21.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + parallel_tests + + BUNDLED WITH + #{Bundler::VERSION} + L + end + + it "automatically updates lockfile to use the older compatible versions" do + bundle "install --verbose", :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo2.to_s } + + expect(lockfile).to eq <<~L + GEM + remote: http://localgemserver.test/ + specs: + rubocop (1.28.2) + rubocop-ast (>= 1.17.0, < 2.0) + rubocop-ast (1.17.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + rubocop + + BUNDLED WITH + #{Bundler::VERSION} + L + end + end + it "gives a meaningful error on ruby version mismatches between dependencies" do build_repo4 do build_gem "requires-old-ruby" do |s| @@ -352,13 +423,13 @@ s.required_ruby_version = "> 9000" end build_gem "rack", "1.2" do |s| - s.platform = mingw + s.platform = x86_mingw32 s.required_ruby_version = "> 9000" end build_gem "rack", "1.2" end - simulate_platform mingw do + simulate_platform x86_mingw32 do install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } ruby "#{Gem.ruby_version}" source "http://localgemserver.test/" diff --git a/spec/bundler/install/yanked_spec.rb b/spec/bundler/install/yanked_spec.rb index 44fbb0bda3a5f1..09a5ba0be1fa60 100644 --- a/spec/bundler/install/yanked_spec.rb +++ b/spec/bundler/install/yanked_spec.rb @@ -43,6 +43,63 @@ end end +RSpec.context "when resolving a bundle that includes yanked gems, but unlocking an unrelated gem" do + before(:each) do + build_repo4 do + build_gem "foo", "10.0.0" + + build_gem "bar", "1.0.0" + build_gem "bar", "2.0.0" + end + + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo4)} + specs: + foo (9.0.0) + bar (1.0.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + foo + bar + + BUNDLED WITH + #{Bundler::VERSION} + L + + gemfile <<-G + source "#{file_uri_for(gem_repo4)}" + gem "foo" + gem "bar" + G + end + + it "does not update the yanked gem" do + bundle "lock --update bar" + + expect(lockfile).to eq <<~L + GEM + remote: #{file_uri_for(gem_repo4)}/ + specs: + bar (2.0.0) + foo (9.0.0) + + PLATFORMS + #{lockfile_platforms} + + DEPENDENCIES + bar + foo + + BUNDLED WITH + #{Bundler::VERSION} + L + end +end + RSpec.context "when using gem before installing" do it "does not suggest the author has yanked the gem" do gemfile <<-G diff --git a/spec/bundler/resolver/basic_spec.rb b/spec/bundler/resolver/basic_spec.rb index 7182d1e29c6ffc..ee62dc3577aba8 100644 --- a/spec/bundler/resolver/basic_spec.rb +++ b/spec/bundler/resolver/basic_spec.rb @@ -233,7 +233,7 @@ it "resolves foo only to latest patch - changing dependency declared case" do # bar is locked AND a declared dependency in the Gemfile, so it will not move, and therefore # foo can only move up to 1.4.4. - @base << Bundler::LazySpecification.new("bar", "2.0.3", nil) + @base << build_spec("bar", "2.0.3").first should_conservative_resolve_and_include :patch, ["foo"], %w[foo-1.4.4 bar-2.0.3] end diff --git a/spec/bundler/resolver/platform_spec.rb b/spec/bundler/resolver/platform_spec.rb index 8eaed4220abb5f..418293365cd985 100644 --- a/spec/bundler/resolver/platform_spec.rb +++ b/spec/bundler/resolver/platform_spec.rb @@ -82,6 +82,79 @@ should_resolve_as %w[foo-1.0.0-x64-mingw32] end + describe "on a linux platform", :rubygems => ">= 3.1.0.pre.1" do + # Ruby's platform is *-linux => platform's libc is glibc, so not musl + # Ruby's platform is *-linux-musl => platform's libc is musl, so not glibc + # Gem's platform is *-linux => gem is glibc + maybe musl compatible + # Gem's platform is *-linux-musl => gem is musl compatible but not glibc + + it "favors the platform version-specific gem on a version-specifying linux platform" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux-musl] + end + + it "favors the version-less gem over the version-specific gem on a gnu linux platform" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + + it "ignores the platform version-specific gem on a gnu linux platform" do + @index = build_index do + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_not_resolve + end + + it "falls back to the platform version-less gem on a linux platform with a version" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + + it "falls back to the ruby platform gem on a gnu linux platform when only a version-specifying gem is available" do + @index = build_index do + gem "foo", "1.0.0" + gem "foo", "1.0.0", "x86_64-linux-musl" + end + dep "foo" + platforms "x86_64-linux" + + should_resolve_as %w[foo-1.0.0] + end + + it "falls back to the platform version-less gem on a version-specifying linux platform and no ruby platform gem is available" do + @index = build_index do + gem "foo", "1.0.0", "x86_64-linux" + end + dep "foo" + platforms "x86_64-linux-musl" + + should_resolve_as %w[foo-1.0.0-x86_64-linux] + end + end + it "takes the latest ruby gem if the platform specific gem doesn't match the required_ruby_version" do @index = build_index do gem "foo", "1.0.0" diff --git a/spec/bundler/runtime/inline_spec.rb b/spec/bundler/runtime/inline_spec.rb index dd22c86f90457a..e3cf5020ecdd99 100644 --- a/spec/bundler/runtime/inline_spec.rb +++ b/spec/bundler/runtime/inline_spec.rb @@ -355,6 +355,20 @@ def confirm(msg, newline = nil) expect(err).to be_empty end + it "still installs if the application has `bundle package` no_install config set" do + bundle "config set --local no_install true" + + script <<-RUBY + gemfile do + source "#{file_uri_for(gem_repo1)}" + gem "rack" + end + RUBY + + expect(last_command).to be_success + expect(system_gem_path("gems/rack-1.0.0")).to exist + end + it "preserves previous BUNDLE_GEMFILE value" do ENV["BUNDLE_GEMFILE"] = "" script <<-RUBY diff --git a/spec/bundler/runtime/platform_spec.rb b/spec/bundler/runtime/platform_spec.rb index a7161c9cfea263..84c8dfcab3ec63 100644 --- a/spec/bundler/runtime/platform_spec.rb +++ b/spec/bundler/runtime/platform_spec.rb @@ -386,7 +386,7 @@ s.add_dependency "platform_specific" end end - simulate_windows x64_mingw do + simulate_windows x64_mingw32 do lockfile <<-L GEM remote: #{file_uri_for(gem_repo2)}/ @@ -412,4 +412,36 @@ expect(the_bundle).to include_gem "platform_specific 1.0 x64-mingw32" end end + + %w[x86-mswin32 x64-mswin64 x86-mingw32 x64-mingw32 x64-mingw-ucrt].each do |arch| + it "allows specifying platform windows on #{arch} arch" do + platform = send(arch.tr("-", "_")) + + simulate_windows platform do + lockfile <<-L + GEM + remote: #{file_uri_for(gem_repo1)}/ + specs: + platform_specific (1.0-#{platform}) + requires_platform_specific (1.0) + platform_specific + + PLATFORMS + #{platform} + + DEPENDENCIES + requires_platform_specific + L + + install_gemfile <<-G + source "#{file_uri_for(gem_repo1)}" + gem "platform_specific", :platforms => [:windows] + G + + bundle "install" + + expect(the_bundle).to include_gems "platform_specific 1.0 #{platform}" + end + end + end end diff --git a/spec/bundler/support/artifice/compact_index_api_missing.rb b/spec/bundler/support/artifice/compact_index_api_missing.rb index 6514fde01e4263..2fd8b6d2e982cb 100644 --- a/spec/bundler/support/artifice/compact_index_api_missing.rb +++ b/spec/bundler/support/artifice/compact_index_api_missing.rb @@ -6,12 +6,7 @@ class CompactIndexApiMissing < CompactIndexAPI get "/fetch/actual/gem/:id" do - warn params[:id] - if params[:id] == "rack-1.0.gemspec.rz" - halt 404 - else - File.binread("#{gem_repo2}/quick/Marshal.4.8/#{params[:id]}") - end + halt 404 end end diff --git a/spec/bundler/support/builders.rb b/spec/bundler/support/builders.rb index a4d4c9f085366b..2af11e9874bb22 100644 --- a/spec/bundler/support/builders.rb +++ b/spec/bundler/support/builders.rb @@ -110,19 +110,27 @@ def build_repo1 build_gem "platform_specific" do |s| s.platform = "x86-mswin32" - s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0.0 MSWIN'" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x86-mswin32'" + end + + build_gem "platform_specific" do |s| + s.platform = "x64-mswin64" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mswin64'" end build_gem "platform_specific" do |s| s.platform = "x86-mingw32" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x86-mingw32'" end build_gem "platform_specific" do |s| s.platform = "x64-mingw32" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mingw32'" end build_gem "platform_specific" do |s| s.platform = "x64-mingw-ucrt" + s.write "lib/platform_specific.rb", "PLATFORM_SPECIFIC = '1.0 x64-mingw-ucrt'" end build_gem "platform_specific" do |s| diff --git a/spec/bundler/support/helpers.rb b/spec/bundler/support/helpers.rb index af6e33885348fe..f4ee93ccc0a639 100644 --- a/spec/bundler/support/helpers.rb +++ b/spec/bundler/support/helpers.rb @@ -445,7 +445,7 @@ def simulate_platform(platform) ENV["BUNDLER_SPEC_PLATFORM"] = old if block_given? end - def simulate_windows(platform = mswin) + def simulate_windows(platform = x86_mswin32) old = ENV["BUNDLER_SPEC_WINDOWS"] ENV["BUNDLER_SPEC_WINDOWS"] = "true" simulate_platform platform do diff --git a/spec/bundler/support/indexes.rb b/spec/bundler/support/indexes.rb index 55d798a90a11ff..c496679ee63562 100644 --- a/spec/bundler/support/indexes.rb +++ b/spec/bundler/support/indexes.rb @@ -33,6 +33,10 @@ def resolve(args = []) Bundler::Resolver.resolve(deps, source_requirements, *args) end + def should_not_resolve + expect { resolve }.to raise_error(Bundler::GemNotFound) + end + def should_resolve_as(specs) got = resolve got = got.map(&:full_name).sort diff --git a/spec/bundler/support/platforms.rb b/spec/bundler/support/platforms.rb index 1ad7778403f350..d3aefe004ac3d8 100644 --- a/spec/bundler/support/platforms.rb +++ b/spec/bundler/support/platforms.rb @@ -24,20 +24,32 @@ def linux Gem::Platform.new(["x86", "linux", nil]) end - def mswin + def x86_mswin32 Gem::Platform.new(["x86", "mswin32", nil]) end - def mingw + def x64_mswin64 + Gem::Platform.new(["x64", "mswin64", nil]) + end + + def x86_mingw32 Gem::Platform.new(["x86", "mingw32", nil]) end - def x64_mingw + def x64_mingw32 Gem::Platform.new(["x64", "mingw32", nil]) end + def x64_mingw_ucrt + Gem::Platform.new(["x64", "mingw", "ucrt"]) + end + + def windows_platforms + [x86_mswin32, x64_mswin64, x86_mingw32, x64_mingw32, x64_mingw_ucrt] + end + def all_platforms - [rb, java, linux, mswin, mingw, x64_mingw] + [rb, java, linux, windows_platforms].flatten end def local diff --git a/spec/mspec/lib/mspec/runner/actions/leakchecker.rb b/spec/mspec/lib/mspec/runner/actions/leakchecker.rb index 596b120d9f18ce..69181b71d3a4ad 100644 --- a/spec/mspec/lib/mspec/runner/actions/leakchecker.rb +++ b/spec/mspec/lib/mspec/runner/actions/leakchecker.rb @@ -173,7 +173,8 @@ def check_tempfile_leak def find_threads Thread.list.find_all {|t| - t != Thread.current && t.alive? + t != Thread.current && t.alive? && + !(t.thread_variable?(:"\0__detached_thread__") && t.thread_variable_get(:"\0__detached_thread__")) } end diff --git a/spec/ruby/CONTRIBUTING.md b/spec/ruby/CONTRIBUTING.md index 20258e5c36adbc..adfc2fb0ca4c87 100644 --- a/spec/ruby/CONTRIBUTING.md +++ b/spec/ruby/CONTRIBUTING.md @@ -175,9 +175,10 @@ end #### Guard for bug -In case there is a bug in MRI but the expected behavior is obvious. +In case there is a bug in MRI and the fix will be backported to previous versions. +If it is not backported or not likely, use `ruby_version_is` instead. First, file a bug at https://bugs.ruby-lang.org/. -It is better to use a `ruby_version_is` guard if there was a release with the fix. +The problem is `ruby_bug` would make non-MRI implementations fail this spec while MRI itself does not pass it, so it should only be used if the bug is/will be fixed and backported. ```ruby ruby_bug '#13669', ''...'3.2' do diff --git a/spec/ruby/README.md b/spec/ruby/README.md index 55b248a6c31d39..24b4719fdda515 100644 --- a/spec/ruby/README.md +++ b/spec/ruby/README.md @@ -144,10 +144,9 @@ The file `/etc/services` is required for socket specs (package `netbase` on Debi ### Socket specs from rubysl-socket -Most specs under `library/socket` were imported from [the rubysl-socket project](https://github.com/rubysl/rubysl-socket). +Most specs under `library/socket` were imported from the rubysl-socket project (which is no longer on GitHub). The 3 copyright holders of rubysl-socket, Yorick Peterse, Chuck Remes and -Brian Shirai, [agreed to relicense those specs](https://github.com/rubysl/rubysl-socket/issues/15) -under the MIT license in ruby/spec. +Brian Shirai, agreed to relicense those specs under the MIT license in ruby/spec. ### History and RubySpec diff --git a/spec/ruby/core/array/shared/slice.rb b/spec/ruby/core/array/shared/slice.rb index 3b09fdcbc6fc2c..8fb33738b9ce6f 100644 --- a/spec/ruby/core/array/shared/slice.rb +++ b/spec/ruby/core/array/shared/slice.rb @@ -784,6 +784,102 @@ def to.to_int() -2 end a.send(@method, (...-9)).should == [] end + ruby_version_is "3.2" do + describe "can be sliced with Enumerator::ArithmeticSequence" do + it "with infinite/inverted ranges and negative steps" do + @array = [0, 1, 2, 3, 4, 5] + @array.send(@method, (2..).step(-1)).should == [2, 1, 0] + @array.send(@method, (2..).step(-2)).should == [2, 0] + @array.send(@method, (2..).step(-3)).should == [2] + @array.send(@method, (2..).step(-4)).should == [2] + + @array.send(@method, (-3..).step(-1)).should == [3, 2, 1, 0] + @array.send(@method, (-3..).step(-2)).should == [3, 1] + @array.send(@method, (-3..).step(-3)).should == [3, 0] + @array.send(@method, (-3..).step(-4)).should == [3] + @array.send(@method, (-3..).step(-5)).should == [3] + + @array.send(@method, (..0).step(-1)).should == [5, 4, 3, 2, 1, 0] + @array.send(@method, (..0).step(-2)).should == [5, 3, 1] + @array.send(@method, (..0).step(-3)).should == [5, 2] + @array.send(@method, (..0).step(-4)).should == [5, 1] + @array.send(@method, (..0).step(-5)).should == [5, 0] + @array.send(@method, (..0).step(-6)).should == [5] + @array.send(@method, (..0).step(-7)).should == [5] + + @array.send(@method, (...0).step(-1)).should == [5, 4, 3, 2, 1] + @array.send(@method, (...0).step(-2)).should == [5, 3, 1] + @array.send(@method, (...0).step(-3)).should == [5, 2] + @array.send(@method, (...0).step(-4)).should == [5, 1] + @array.send(@method, (...0).step(-5)).should == [5] + @array.send(@method, (...0).step(-6)).should == [5] + + @array.send(@method, (...1).step(-1)).should == [5, 4, 3, 2] + @array.send(@method, (...1).step(-2)).should == [5, 3] + @array.send(@method, (...1).step(-3)).should == [5, 2] + @array.send(@method, (...1).step(-4)).should == [5] + @array.send(@method, (...1).step(-5)).should == [5] + + @array.send(@method, (..-5).step(-1)).should == [5, 4, 3, 2, 1] + @array.send(@method, (..-5).step(-2)).should == [5, 3, 1] + @array.send(@method, (..-5).step(-3)).should == [5, 2] + @array.send(@method, (..-5).step(-4)).should == [5, 1] + @array.send(@method, (..-5).step(-5)).should == [5] + @array.send(@method, (..-5).step(-6)).should == [5] + + @array.send(@method, (...-5).step(-1)).should == [5, 4, 3, 2] + @array.send(@method, (...-5).step(-2)).should == [5, 3] + @array.send(@method, (...-5).step(-3)).should == [5, 2] + @array.send(@method, (...-5).step(-4)).should == [5] + @array.send(@method, (...-5).step(-5)).should == [5] + + @array.send(@method, (4..1).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (4..1).step(-2)).should == [4, 2] + @array.send(@method, (4..1).step(-3)).should == [4, 1] + @array.send(@method, (4..1).step(-4)).should == [4] + @array.send(@method, (4..1).step(-5)).should == [4] + + @array.send(@method, (4...1).step(-1)).should == [4, 3, 2] + @array.send(@method, (4...1).step(-2)).should == [4, 2] + @array.send(@method, (4...1).step(-3)).should == [4] + @array.send(@method, (4...1).step(-4)).should == [4] + + @array.send(@method, (-2..1).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (-2..1).step(-2)).should == [4, 2] + @array.send(@method, (-2..1).step(-3)).should == [4, 1] + @array.send(@method, (-2..1).step(-4)).should == [4] + @array.send(@method, (-2..1).step(-5)).should == [4] + + @array.send(@method, (-2...1).step(-1)).should == [4, 3, 2] + @array.send(@method, (-2...1).step(-2)).should == [4, 2] + @array.send(@method, (-2...1).step(-3)).should == [4] + @array.send(@method, (-2...1).step(-4)).should == [4] + + @array.send(@method, (4..-5).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (4..-5).step(-2)).should == [4, 2] + @array.send(@method, (4..-5).step(-3)).should == [4, 1] + @array.send(@method, (4..-5).step(-4)).should == [4] + @array.send(@method, (4..-5).step(-5)).should == [4] + + @array.send(@method, (4...-5).step(-1)).should == [4, 3, 2] + @array.send(@method, (4...-5).step(-2)).should == [4, 2] + @array.send(@method, (4...-5).step(-3)).should == [4] + @array.send(@method, (4...-5).step(-4)).should == [4] + + @array.send(@method, (-2..-5).step(-1)).should == [4, 3, 2, 1] + @array.send(@method, (-2..-5).step(-2)).should == [4, 2] + @array.send(@method, (-2..-5).step(-3)).should == [4, 1] + @array.send(@method, (-2..-5).step(-4)).should == [4] + @array.send(@method, (-2..-5).step(-5)).should == [4] + + @array.send(@method, (-2...-5).step(-1)).should == [4, 3, 2] + @array.send(@method, (-2...-5).step(-2)).should == [4, 2] + @array.send(@method, (-2...-5).step(-3)).should == [4] + @array.send(@method, (-2...-5).step(-4)).should == [4] + end + end + end + it "can accept nil...nil ranges" do a = [0, 1, 2, 3, 4, 5] a.send(@method, eval("(nil...nil)")).should == a diff --git a/spec/ruby/core/dir/fixtures/common.rb b/spec/ruby/core/dir/fixtures/common.rb index a8d6e69c4469d6..087f46b331e249 100644 --- a/spec/ruby/core/dir/fixtures/common.rb +++ b/spec/ruby/core/dir/fixtures/common.rb @@ -82,6 +82,7 @@ def self.mock_dir_files special/test{1}/file[1] special/{}/special + special/test\ +()[]{}/hello_world.erb ] platform_is_not :windows do diff --git a/spec/ruby/core/dir/glob_spec.rb b/spec/ruby/core/dir/glob_spec.rb index 43dac73eee805b..06b52b90fbcced 100644 --- a/spec/ruby/core/dir/glob_spec.rb +++ b/spec/ruby/core/dir/glob_spec.rb @@ -79,6 +79,7 @@ nested/ nested/.dotsubir/ special/ + special/test\ +()[]{}/ special/test{1}/ special/{}/ subdir_one/ @@ -130,6 +131,7 @@ ./nested/ ./nested/.dotsubir/ ./special/ + ./special/test\ +()[]{}/ ./special/test{1}/ ./special/{}/ ./subdir_one/ diff --git a/spec/ruby/core/dir/shared/chroot.rb b/spec/ruby/core/dir/shared/chroot.rb index b14a4336707601..8c0599fe3f175d 100644 --- a/spec/ruby/core/dir/shared/chroot.rb +++ b/spec/ruby/core/dir/shared/chroot.rb @@ -3,7 +3,7 @@ DirSpecs.create_mock_dirs @real_root = "../" * (File.dirname(__FILE__).count('/') - 1) - @ref_dir = File.join("/", Dir.new('/').entries.first) + @ref_dir = File.join("/", File.basename(Dir["/*"].first)) end after :all do @@ -14,10 +14,13 @@ DirSpecs.delete_mock_dirs end + # Pending until https://github.com/ruby/ruby/runs/8075149420 is fixed + compilations_ci = ENV["GITHUB_WORKFLOW"] == "Compilations" + it "can be used to change the process' root directory" do -> { Dir.send(@method, File.dirname(__FILE__)) }.should_not raise_error File.should.exist?("/#{File.basename(__FILE__)}") - end + end unless compilations_ci it "returns 0 if successful" do Dir.send(@method, '/').should == 0 @@ -31,7 +34,7 @@ Dir.send(@method, @real_root) File.should.exist?(@ref_dir) File.should_not.exist?("/#{File.basename(__FILE__)}") - end + end unless compilations_ci it "calls #to_path on non-String argument" do p = mock('path') diff --git a/spec/ruby/core/dir/shared/glob.rb b/spec/ruby/core/dir/shared/glob.rb index 60d4a8c97a6592..33b2828c276fec 100644 --- a/spec/ruby/core/dir/shared/glob.rb +++ b/spec/ruby/core/dir/shared/glob.rb @@ -111,6 +111,10 @@ it "matches files with backslashes in their name" do Dir.glob('special/\\\\{a,b}').should == ['special/\a'] end + + it "matches directory with special characters in their name in complex patterns" do + Dir.glob("special/test +()\\[\\]\\{\\}/hello_world{.{en},}{.{html},}{+{phone},}{.{erb},}").should == ['special/test +()[]{}/hello_world.erb'] + end end it "matches regexp special ^" do @@ -225,6 +229,7 @@ dir/ nested/ special/ + special/test\ +()[]{}/ special/test{1}/ special/{}/ subdir_one/ diff --git a/spec/ruby/core/env/shared/update.rb b/spec/ruby/core/env/shared/update.rb index 3101f9c561d8bf..7d4799955bdd52 100644 --- a/spec/ruby/core/env/shared/update.rb +++ b/spec/ruby/core/env/shared/update.rb @@ -17,10 +17,9 @@ ruby_version_is "3.2" do it "adds the multiple parameter hashes to ENV, returning ENV" do - ENV.send(@method, {"foo" => "0", "bar" => "1"}, {"baz" => "2"}).should equal(ENV) - ENV["foo"].should == "0" - ENV["bar"].should == "1" - ENV["baz"].should == "2" + ENV.send(@method, {"foo" => "multi1"}, {"bar" => "multi2"}).should equal(ENV) + ENV["foo"].should == "multi1" + ENV["bar"].should == "multi2" end end diff --git a/spec/ruby/core/io/fixtures/classes.rb b/spec/ruby/core/io/fixtures/classes.rb index 067ab59d930145..204a2a101b0033 100644 --- a/spec/ruby/core/io/fixtures/classes.rb +++ b/spec/ruby/core/io/fixtures/classes.rb @@ -7,6 +7,18 @@ module IOSpecs class SubIO < IO end + class SubIOWithRedefinedNew < IO + def self.new(...) + ScratchPad << :redefined_new_called + super + end + + def initialize(...) + ScratchPad << :call_original_initialize + super + end + end + def self.collector Proc.new { |x| ScratchPad << x } end diff --git a/spec/ruby/core/io/pipe_spec.rb b/spec/ruby/core/io/pipe_spec.rb index 2f2cf06f4d32c1..aee0d9003f4e39 100644 --- a/spec/ruby/core/io/pipe_spec.rb +++ b/spec/ruby/core/io/pipe_spec.rb @@ -25,6 +25,17 @@ @r.should be_an_instance_of(IOSpecs::SubIO) @w.should be_an_instance_of(IOSpecs::SubIO) end + + it "does not use IO.new method to create pipes and allows its overriding" do + ScratchPad.record [] + + # so redefined .new is not called, but original #initialize is + @r, @w = IOSpecs::SubIOWithRedefinedNew.pipe + ScratchPad.recorded.should == [:call_original_initialize, :call_original_initialize] # called 2 times - for each pipe (r and w) + + @r.should be_an_instance_of(IOSpecs::SubIOWithRedefinedNew) + @w.should be_an_instance_of(IOSpecs::SubIOWithRedefinedNew) + end end describe "IO.pipe" do diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index 841e693f373c47..28cab13340def9 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -104,6 +104,14 @@ str = IO.read(@fname, encoding: Encoding::ISO_8859_1) str.encoding.should == Encoding::ISO_8859_1 end + + platform_is :windows do + it "reads the file in text mode" do + # 0x1A is CTRL+Z and is EOF in Windows text mode. + File.binwrite(@fname, "\x1Abbb") + IO.read(@fname).should.empty? + end + end end describe "IO.read from a pipe" do diff --git a/spec/ruby/core/kernel/fixtures/warn_core_method.rb b/spec/ruby/core/kernel/fixtures/warn_core_method.rb index f5dee6b668aaed..fd825624047afa 100644 --- a/spec/ruby/core/kernel/fixtures/warn_core_method.rb +++ b/spec/ruby/core/kernel/fixtures/warn_core_method.rb @@ -1,6 +1,6 @@ raise 'should be run without RubyGems' if defined?(Gem) -def deprecated(n=1) +public def deprecated(n=1) # puts nil, caller(0), nil warn "use X instead", uplevel: n end diff --git a/spec/ruby/core/kernel/shared/sprintf.rb b/spec/ruby/core/kernel/shared/sprintf.rb index 84d472b0d1adaa..2db50bd686878a 100644 --- a/spec/ruby/core/kernel/shared/sprintf.rb +++ b/spec/ruby/core/kernel/shared/sprintf.rb @@ -289,21 +289,80 @@ def obj.to_i; 10; end @method.call("%c", "a").should == "a" end - it "raises ArgumentError if argument is a string of several characters" do + ruby_version_is ""..."3.2" do + it "raises ArgumentError if argument is a string of several characters" do + -> { + @method.call("%c", "abc") + }.should raise_error(ArgumentError, /%c requires a character/) + end + + it "raises ArgumentError if argument is an empty string" do + -> { + @method.call("%c", "") + }.should raise_error(ArgumentError, /%c requires a character/) + end + end + + ruby_version_is "3.2" do + it "displays only the first character if argument is a string of several characters" do + @method.call("%c", "abc").should == "a" + end + + it "displays no characters if argument is an empty string" do + @method.call("%c", "").should == "" + end + end + + it "raises TypeError if argument is not String or Integer and cannot be converted to them" do -> { - @method.call("%c", "abc") - }.should raise_error(ArgumentError) + @method.call("%c", []) + }.should raise_error(TypeError, /no implicit conversion of Array into Integer/) end - it "raises ArgumentError if argument is an empty string" do + it "raises TypeError if argument is nil" do -> { - @method.call("%c", "") - }.should raise_error(ArgumentError) + @method.call("%c", nil) + }.should raise_error(TypeError, /no implicit conversion from nil to integer/) end - it "supports Unicode characters" do - @method.call("%c", 1286).should == "Ԇ" - @method.call("%c", "ش").should == "ش" + it "tries to convert argument to String with to_str" do + obj = BasicObject.new + def obj.to_str + "a" + end + + @method.call("%c", obj).should == "a" + end + + it "tries to convert argument to Integer with to_int" do + obj = BasicObject.new + def obj.to_int + 90 + end + + @method.call("%c", obj).should == "Z" + end + + it "raises TypeError if converting to String with to_str returns non-String" do + obj = BasicObject.new + def obj.to_str + :foo + end + + -> { + @method.call("%c", obj) + }.should raise_error(TypeError, /can't convert BasicObject to String/) + end + + it "raises TypeError if converting to Integer with to_int returns non-Integer" do + obj = BasicObject.new + def obj.to_str + :foo + end + + -> { + @method.call("%c", obj) + }.should raise_error(TypeError, /can't convert BasicObject to String/) end end @@ -362,11 +421,11 @@ def obj.to_str @method.call("%4.6s", "abcdefg").should == "abcdef" end - it "formats nli with width" do + it "formats nil with width" do @method.call("%6s", nil).should == " " end - it "formats nli with precision" do + it "formats nil with precision" do @method.call("%.6s", nil).should == "" end @@ -927,4 +986,8 @@ def obj.to_str; end } end end + + it "does not raise error when passed more arguments than needed" do + sprintf("%s %d %c", "string", 2, "c", []).should == "string 2 c" + end end diff --git a/spec/ruby/core/kernel/shared/sprintf_encoding.rb b/spec/ruby/core/kernel/shared/sprintf_encoding.rb index 5ca66b9083bea3..9cedb8b662466b 100644 --- a/spec/ruby/core/kernel/shared/sprintf_encoding.rb +++ b/spec/ruby/core/kernel/shared/sprintf_encoding.rb @@ -1,3 +1,5 @@ +# Keep encoding-related specs in a separate shared example to be able to skip them in IO/File/StringIO specs. +# It's difficult to check result's encoding in the test after writing to a file/io buffer. describe :kernel_sprintf_encoding, shared: true do it "can produce a string with valid encoding" do string = @method.call("good day %{valid}", valid: "e") @@ -25,7 +27,7 @@ result.encoding.should equal(Encoding::UTF_8) end - it "raises Encoding::CompatibilityError if both encodings are ASCII compatible and there ano not ASCII characters" do + it "raises Encoding::CompatibilityError if both encodings are ASCII compatible and there are not ASCII characters" do string = "Ä %s".encode('windows-1252') argument = "Ђ".encode('windows-1251') @@ -33,4 +35,33 @@ @method.call(string, argument) }.should raise_error(Encoding::CompatibilityError) end + + describe "%c" do + it "supports Unicode characters" do + result = @method.call("%c", 1286) + result.should == "Ԇ" + result.bytes.should == [212, 134] + + result = @method.call("%c", "ش") + result.should == "ش" + result.bytes.should == [216, 180] + end + + it "raises error when a codepoint isn't representable in an encoding of a format string" do + format = "%c".encode("ASCII") + + -> { + @method.call(format, 1286) + }.should raise_error(RangeError, /out of char range/) + end + + it "uses the encoding of the format string to interpret codepoints" do + format = "%c".force_encoding("euc-jp") + result = @method.call(format, 9415601) + + result.encoding.should == Encoding::EUC_JP + result.should == "é".encode(Encoding::EUC_JP) + result.bytes.should == [143, 171, 177] + end + end end diff --git a/spec/ruby/core/regexp/compile_spec.rb b/spec/ruby/core/regexp/compile_spec.rb index 329cb4f753ce92..c41399cfbb3539 100644 --- a/spec/ruby/core/regexp/compile_spec.rb +++ b/spec/ruby/core/regexp/compile_spec.rb @@ -13,3 +13,7 @@ describe "Regexp.compile given a Regexp" do it_behaves_like :regexp_new_regexp, :compile end + +describe "Regexp.new given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :compile +end diff --git a/spec/ruby/core/regexp/new_spec.rb b/spec/ruby/core/regexp/new_spec.rb index ce662b7a4f6b0b..65f612df55311f 100644 --- a/spec/ruby/core/regexp/new_spec.rb +++ b/spec/ruby/core/regexp/new_spec.rb @@ -11,17 +11,9 @@ describe "Regexp.new given a Regexp" do it_behaves_like :regexp_new_regexp, :new - it_behaves_like :regexp_new_string_binary, :compile + it_behaves_like :regexp_new_string_binary, :new end -describe "Regexp.new given an Integer" do - it "raises a TypeError" do - -> { Regexp.new(1) }.should raise_error(TypeError) - end -end - -describe "Regexp.new given a Float" do - it "raises a TypeError" do - -> { Regexp.new(1.0) }.should raise_error(TypeError) - end +describe "Regexp.new given a non-String/Regexp" do + it_behaves_like :regexp_new_non_string_or_regexp, :new end diff --git a/spec/ruby/core/regexp/shared/new.rb b/spec/ruby/core/regexp/shared/new.rb index a6d9c4811244d7..10c2d3d3909b96 100644 --- a/spec/ruby/core/regexp/shared/new.rb +++ b/spec/ruby/core/regexp/shared/new.rb @@ -24,6 +24,32 @@ class RegexpSpecsSubclassTwo < Regexp; end end end +describe :regexp_new_non_string_or_regexp, shared: true do + it "calls #to_str method for non-String/Regexp argument" do + obj = Object.new + def obj.to_str() "a" end + + Regexp.send(@method, obj).should == /a/ + end + + it "raises TypeError if there is no #to_str method for non-String/Regexp argument" do + obj = Object.new + -> { Regexp.send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of Object into String") + + -> { Regexp.send(@method, 1) }.should raise_error(TypeError, "no implicit conversion of Integer into String") + -> { Regexp.send(@method, 1.0) }.should raise_error(TypeError, "no implicit conversion of Float into String") + -> { Regexp.send(@method, :symbol) }.should raise_error(TypeError, "no implicit conversion of Symbol into String") + -> { Regexp.send(@method, []) }.should raise_error(TypeError, "no implicit conversion of Array into String") + end + + it "raises TypeError if #to_str returns non-String value" do + obj = Object.new + def obj.to_str() [] end + + -> { Regexp.send(@method, obj) }.should raise_error(TypeError, /can't convert Object to String/) + end +end + describe :regexp_new_string, shared: true do it "uses the String argument as an unescaped literal to construct a Regexp object" do Regexp.send(@method, "^hi{2,3}fo.o$").should == /^hi{2,3}fo.o$/ @@ -97,6 +123,16 @@ class RegexpSpecsSubclassTwo < Regexp; end (r.options & Regexp::EXTENDED).should_not == 0 end + it "does not try to convert the second argument to Integer with #to_int method call" do + ScratchPad.clear + obj = Object.new + def obj.to_int() ScratchPad.record(:called) end + + Regexp.send(@method, "Hi", obj) + + ScratchPad.recorded.should == nil + end + ruby_version_is ""..."3.2" do it "treats any non-Integer, non-nil, non-false second argument as IGNORECASE" do r = Regexp.send(@method, 'Hi', Object.new) diff --git a/spec/ruby/core/string/modulo_spec.rb b/spec/ruby/core/string/modulo_spec.rb index 99c1694417de3a..bf96a8287462dc 100644 --- a/spec/ruby/core/string/modulo_spec.rb +++ b/spec/ruby/core/string/modulo_spec.rb @@ -368,8 +368,16 @@ def universal.to_f() 0.0 end ("%c" % 'A').should == "A" end - it "raises an exception for multiple character strings as argument for %c" do - -> { "%c" % 'AA' }.should raise_error(ArgumentError) + ruby_version_is ""..."3.2" do + it "raises an exception for multiple character strings as argument for %c" do + -> { "%c" % 'AA' }.should raise_error(ArgumentError) + end + end + + ruby_version_is "3.2" do + it "supports only the first character as argument for %c" do + ("%c" % 'AA').should == "A" + end end it "calls to_str on argument for %c formats" do diff --git a/spec/ruby/core/symbol/to_proc_spec.rb b/spec/ruby/core/symbol/to_proc_spec.rb index 47f2a939ab2d75..6d9c4bc622aa45 100644 --- a/spec/ruby/core/symbol/to_proc_spec.rb +++ b/spec/ruby/core/symbol/to_proc_spec.rb @@ -46,6 +46,33 @@ end end + ruby_version_is "3.2" do + it "only calls public methods" do + body = proc do + public def pub; @a << :pub end + protected def pro; @a << :pro end + private def pri; @a << :pri end + attr_reader :a + end + + @a = [] + singleton_class.class_eval(&body) + tap(&:pub) + proc{tap(&:pro)}.should raise_error(NoMethodError, /protected method `pro' called/) + proc{tap(&:pri)}.should raise_error(NoMethodError, /private method `pri' called/) + @a.should == [:pub] + + @a = [] + c = Class.new(&body) + o = c.new + o.instance_variable_set(:@a, []) + o.tap(&:pub) + proc{tap(&:pro)}.should raise_error(NoMethodError, /protected method `pro' called/) + proc{o.tap(&:pri)}.should raise_error(NoMethodError, /private method `pri' called/) + o.a.should == [:pub] + end + end + it "raises an ArgumentError when calling #call on the Proc without receiver" do -> { :object_id.to_proc.call diff --git a/spec/ruby/core/time/shared/local.rb b/spec/ruby/core/time/shared/local.rb index 43f331c4c1a073..2dba23dbd7ea23 100644 --- a/spec/ruby/core/time/shared/local.rb +++ b/spec/ruby/core/time/shared/local.rb @@ -6,16 +6,18 @@ end end +=begin platform_is_not :windows do describe "timezone changes" do - it "correctly adjusts the timezone change to 'CEST' on 'Europe/Amsterdam'" do + it "correctly adjusts the timezone change to 'CET' on 'Europe/Amsterdam'" do with_timezone("Europe/Amsterdam") do - Time.send(@method, 1940, 5, 16).to_a.should == - [0, 40, 1, 16, 5, 1940, 4, 137, true, "CEST"] + Time.send(@method, 1970, 5, 16).to_a.should == + [0, 0, 0, 16, 5, 1970, 6, 136, false, "CET"] end end end end +=end end describe :time_local_10_arg, shared: true do diff --git a/spec/ruby/core/unboundmethod/equal_value_spec.rb b/spec/ruby/core/unboundmethod/equal_value_spec.rb index 6242b048840d4b..b21677687ec655 100644 --- a/spec/ruby/core/unboundmethod/equal_value_spec.rb +++ b/spec/ruby/core/unboundmethod/equal_value_spec.rb @@ -98,4 +98,41 @@ def discard_1; :discard; end (@discard_1 == UnboundMethodSpecs::Methods.instance_method(:discard_1)).should == false end + + it "considers methods through aliasing equal" do + c = Class.new do + class << self + alias_method :n, :new + end + end + + c.method(:new).should == c.method(:n) + c.method(:n).should == Class.instance_method(:new).bind(c) + end + + # On CRuby < 3.2, the 2 specs below pass due to method/instance_method skipping zsuper methods. + # We are interested in the general pattern working, i.e. the combination of method/instance_method + # and #== exposes the wanted behavior. + it "considers methods through visibility change equal" do + c = Class.new do + class << self + private :new + end + end + + c.method(:new).should == Class.instance_method(:new).bind(c) + end + + it "considers methods through aliasing and visibility change equal" do + c = Class.new do + class << self + alias_method :n, :new + private :new + end + end + + c.method(:new).should == c.method(:n) + c.method(:n).should == Class.instance_method(:new).bind(c) + c.method(:new).should == Class.instance_method(:new).bind(c) + end end diff --git a/spec/ruby/language/block_spec.rb b/spec/ruby/language/block_spec.rb index 42652152a1bdae..d918c12beb2ec9 100644 --- a/spec/ruby/language/block_spec.rb +++ b/spec/ruby/language/block_spec.rb @@ -983,3 +983,77 @@ def a; 1; end end end end + +describe "Anonymous block forwarding" do + ruby_version_is "3.1" do + it "forwards blocks to other functions that formally declare anonymous blocks" do + eval <<-EOF + def b(&); c(&) end + def c(&); yield :non_null end + EOF + + b { |c| c }.should == :non_null + end + + it "requires the anonymous block parameter to be declared if directly passing a block" do + -> { eval "def a; b(&); end; def b; end" }.should raise_error(SyntaxError) + end + + it "works when it's the only declared parameter" do + eval <<-EOF + def inner; yield end + def block_only(&); inner(&) end + EOF + + block_only { 1 }.should == 1 + end + + it "works alongside positional parameters" do + eval <<-EOF + def inner; yield end + def pos(arg1, &); inner(&) end + EOF + + pos(:a) { 1 }.should == 1 + end + + it "works alongside positional arguments and splatted keyword arguments" do + eval <<-EOF + def inner; yield end + def pos_kwrest(arg1, **kw, &); inner(&) end + EOF + + pos_kwrest(:a, arg: 3) { 1 }.should == 1 + end + + it "works alongside positional arguments and disallowed keyword arguments" do + eval <<-EOF + def inner; yield end + def no_kw(arg1, **nil, &); inner(&) end + EOF + + no_kw(:a) { 1 }.should == 1 + end + end + + ruby_version_is "3.2" do + it "works alongside explicit keyword arguments" do + eval <<-EOF + def inner; yield end + def rest_kw(*a, kwarg: 1, &); inner(&) end + def kw(kwarg: 1, &); inner(&) end + def pos_kw_kwrest(arg1, kwarg: 1, **kw, &); inner(&) end + def pos_rkw(arg1, kwarg1:, &); inner(&) end + def all(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, &); inner(&) end + def all_kwrest(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, **kw, &); inner(&) end + EOF + + rest_kw { 1 }.should == 1 + kw { 1 }.should == 1 + pos_kw_kwrest(:a) { 1 }.should == 1 + pos_rkw(:a, kwarg1: 3) { 1 }.should == 1 + all(:a, :b, :c, :d, :e, okw1: 'x', okw2: 'y') { 1 }.should == 1 + all_kwrest(:a, :b, :c, :d, :e, okw1: 'x', okw2: 'y') { 1 }.should == 1 + end + end +end diff --git a/spec/ruby/language/keyword_arguments_spec.rb b/spec/ruby/language/keyword_arguments_spec.rb index 0c72f59d383984..8771c5806c0c36 100644 --- a/spec/ruby/language/keyword_arguments_spec.rb +++ b/spec/ruby/language/keyword_arguments_spec.rb @@ -58,6 +58,24 @@ def m(*a, kw:) m(kw: 1).should == [] -> { m(kw: 1, kw2: 2) }.should raise_error(ArgumentError, 'unknown keyword: :kw2') -> { m(kw: 1, true => false) }.should raise_error(ArgumentError, 'unknown keyword: true') + -> { m(kw: 1, a: 1, b: 2, c: 3) }.should raise_error(ArgumentError, 'unknown keywords: :a, :b, :c') + end + + it "raises ArgumentError exception when required keyword argument is not passed" do + def m(a:, b:, c:) + [a, b, c] + end + + -> { m(a: 1, b: 2) }.should raise_error(ArgumentError, /missing keyword: :c/) + -> { m() }.should raise_error(ArgumentError, /missing keywords: :a, :b, :c/) + end + + it "raises ArgumentError for missing keyword arguments even if there are extra ones" do + def m(a:) + a + end + + -> { m(b: 1) }.should raise_error(ArgumentError, /missing keyword: :a/) end it "handle * and ** at the same call site" do diff --git a/spec/ruby/language/method_spec.rb b/spec/ruby/language/method_spec.rb index d464e79403a382..acca074974743e 100644 --- a/spec/ruby/language/method_spec.rb +++ b/spec/ruby/language/method_spec.rb @@ -571,6 +571,13 @@ def m(a:) a end end end + evaluate <<-ruby do + def m(a:, **kw) [a, kw] end + ruby + + -> { m(b: 1) }.should raise_error(ArgumentError) + end + evaluate <<-ruby do def m(a: 1) a end ruby diff --git a/spec/ruby/language/proc_spec.rb b/spec/ruby/language/proc_spec.rb index 8360967ec8b38a..f8a29962b03ece 100644 --- a/spec/ruby/language/proc_spec.rb +++ b/spec/ruby/language/proc_spec.rb @@ -237,4 +237,11 @@ end end end + + describe "taking |required keyword arguments, **kw| arguments" do + it "raises ArgumentError for missing required argument" do + p = proc { |a:, **kw| [a, kw] } + -> { p.call() }.should raise_error(ArgumentError) + end + end end diff --git a/spec/ruby/library/bigdecimal/to_r_spec.rb b/spec/ruby/library/bigdecimal/to_r_spec.rb index 91d2b33993e6b1..c350beff08c765 100644 --- a/spec/ruby/library/bigdecimal/to_r_spec.rb +++ b/spec/ruby/library/bigdecimal/to_r_spec.rb @@ -13,4 +13,16 @@ r.denominator.should eql(1000000000000000000000000) end + it "returns a Rational from a BigDecimal with an exponent" do + r = BigDecimal("1E2").to_r + r.numerator.should eql(100) + r.denominator.should eql(1) + end + + it "returns a Rational from a negative BigDecimal with an exponent" do + r = BigDecimal("-1E2").to_r + r.numerator.should eql(-100) + r.denominator.should eql(1) + end + end diff --git a/spec/ruby/library/datetime/to_time_spec.rb b/spec/ruby/library/datetime/to_time_spec.rb index a11b6e30e1c62c..95eca864dadc6e 100644 --- a/spec/ruby/library/datetime/to_time_spec.rb +++ b/spec/ruby/library/datetime/to_time_spec.rb @@ -7,10 +7,10 @@ end it "returns a Time representing the same instant" do - datetime = DateTime.civil(3, 12, 31, 23, 58, 59) + datetime = DateTime.civil(2012, 12, 31, 23, 58, 59) time = datetime.to_time.utc - time.year.should == 3 + time.year.should == 2012 time.month.should == 12 time.day.should == 31 time.hour.should == 23 @@ -18,6 +18,20 @@ time.sec.should == 59 end + date_version = defined?(Date::VERSION) ? Date::VERSION : '0.0.0' + version_is(date_version, '3.2.3') do + it "returns a Time representing the same instant before Gregorian" do + datetime = DateTime.civil(1582, 10, 4, 23, 58, 59) + time = datetime.to_time.utc + time.year.should == 1582 + time.month.should == 10 + time.day.should == 14 + time.hour.should == 23 + time.min.should == 58 + time.sec.should == 59 + end + end + it "preserves the same time regardless of local time or zone" do date = DateTime.new(2012, 12, 24, 12, 23, 00, '+03:00') diff --git a/spec/ruby/library/stringio/open_spec.rb b/spec/ruby/library/stringio/open_spec.rb index acab6e9056299d..3068e19435903a 100644 --- a/spec/ruby/library/stringio/open_spec.rb +++ b/spec/ruby/library/stringio/open_spec.rb @@ -167,10 +167,14 @@ io.should equal(ret) end - it "sets the mode to read-write" do + it "sets the mode to read-write (r+)" do io = StringIO.open("example") io.closed_read?.should be_false io.closed_write?.should be_false + + io = StringIO.new("example") + io.printf("%d", 123) + io.string.should == "123mple" end it "tries to convert the passed Object to a String using #to_str" do @@ -195,10 +199,14 @@ io.should equal(ret) end - it "sets the mode to read-write" do + it "sets the mode to read-write (r+)" do io = StringIO.open io.closed_read?.should be_false io.closed_write?.should be_false + + io = StringIO.new("example") + io.printf("%d", 123) + io.string.should == "123mple" end it "uses an empty String as the StringIO backend" do diff --git a/spec/ruby/library/stringio/printf_spec.rb b/spec/ruby/library/stringio/printf_spec.rb index 9dd1a3b4104772..f3f669a1855266 100644 --- a/spec/ruby/library/stringio/printf_spec.rb +++ b/spec/ruby/library/stringio/printf_spec.rb @@ -4,7 +4,7 @@ describe "StringIO#printf" do before :each do - @io = StringIO.new('example') + @io = StringIO.new() end it "returns nil" do @@ -12,9 +12,9 @@ end it "pads self with \\000 when the current position is after the end" do - @io.pos = 10 + @io.pos = 3 @io.printf("%d", 123) - @io.string.should == "example\000\000\000123" + @io.string.should == "\000\000\000123" end it "performs format conversion" do @@ -39,6 +39,27 @@ end end +describe "StringIO#printf when in read-write mode" do + before :each do + @io = StringIO.new("example", "r+") + end + + it "starts from the beginning" do + @io.printf("%s", "abcdefghijk") + @io.string.should == "abcdefghijk" + end + + it "does not truncate existing string" do + @io.printf("%s", "abc") + @io.string.should == "abcmple" + end + + it "correctly updates self's position" do + @io.printf("%s", "abc") + @io.pos.should eql(3) + end +end + describe "StringIO#printf when in append mode" do before :each do @io = StringIO.new("example", "a") diff --git a/spec/ruby/library/stringio/read_nonblock_spec.rb b/spec/ruby/library/stringio/read_nonblock_spec.rb index 2a8f926bd06475..d4ec56d9aadafa 100644 --- a/spec/ruby/library/stringio/read_nonblock_spec.rb +++ b/spec/ruby/library/stringio/read_nonblock_spec.rb @@ -5,10 +5,21 @@ describe "StringIO#read_nonblock when passed length, buffer" do it_behaves_like :stringio_read, :read_nonblock + + it "accepts :exception option" do + io = StringIO.new("example") + io.read_nonblock(3, buffer = "", exception: true) + buffer.should == "exa" + end end describe "StringIO#read_nonblock when passed length" do it_behaves_like :stringio_read_length, :read_nonblock + + it "accepts :exception option" do + io = StringIO.new("example") + io.read_nonblock(3, exception: true).should == "exa" + end end describe "StringIO#read_nonblock when passed nil" do diff --git a/spec/ruby/library/stringio/write_nonblock_spec.rb b/spec/ruby/library/stringio/write_nonblock_spec.rb index 4f4c5039fe248c..a457b976679ee6 100644 --- a/spec/ruby/library/stringio/write_nonblock_spec.rb +++ b/spec/ruby/library/stringio/write_nonblock_spec.rb @@ -8,6 +8,12 @@ describe "StringIO#write_nonblock when passed [String]" do it_behaves_like :stringio_write_string, :write_nonblock + + it "accepts :exception option" do + io = StringIO.new("12345", "a") + io.write_nonblock("67890", exception: true) + io.string.should == "1234567890" + end end describe "StringIO#write_nonblock when self is not writable" do diff --git a/spec/ruby/library/time/to_datetime_spec.rb b/spec/ruby/library/time/to_datetime_spec.rb index 0e37a61108b9b9..6025950b5941b3 100644 --- a/spec/ruby/library/time/to_datetime_spec.rb +++ b/spec/ruby/library/time/to_datetime_spec.rb @@ -3,9 +3,9 @@ describe "Time#to_datetime" do it "returns a DateTime representing the same instant" do - time = Time.utc(3, 12, 31, 23, 58, 59) + time = Time.utc(2012, 12, 31, 23, 58, 59) datetime = time.to_datetime - datetime.year.should == 3 + datetime.year.should == 2012 datetime.month.should == 12 datetime.day.should == 31 datetime.hour.should == 23 @@ -13,6 +13,20 @@ datetime.sec.should == 59 end + date_version = defined?(Date::VERSION) ? Date::VERSION : '0.0.0' + version_is(date_version, '3.2.3') do + it "returns a DateTime representing the same instant before Gregorian" do + time = Time.utc(1582, 10, 14, 23, 58, 59) + datetime = time.to_datetime + datetime.year.should == 1582 + datetime.month.should == 10 + datetime.day.should == 4 + datetime.hour.should == 23 + datetime.min.should == 58 + datetime.sec.should == 59 + end + end + it "roundtrips" do time = Time.utc(3, 12, 31, 23, 58, 59) datetime = time.to_datetime diff --git a/spec/ruby/library/zlib/deflate/deflate_spec.rb b/spec/ruby/library/zlib/deflate/deflate_spec.rb index 828880f8d8daf0..50a563ef6f4bb8 100644 --- a/spec/ruby/library/zlib/deflate/deflate_spec.rb +++ b/spec/ruby/library/zlib/deflate/deflate_spec.rb @@ -58,6 +58,11 @@ Array.new(31, 0) + [24, 128, 0, 0, 1]).pack('C*') end + + it "has a binary encoding" do + @deflator.deflate("").encoding.should == Encoding::BINARY + @deflator.finish.encoding.should == Encoding::BINARY + end end describe "Zlib::Deflate#deflate" do diff --git a/spec/ruby/library/zlib/inflate/inflate_spec.rb b/spec/ruby/library/zlib/inflate/inflate_spec.rb index cc33bd4c325aaf..79b72bf91c821f 100644 --- a/spec/ruby/library/zlib/inflate/inflate_spec.rb +++ b/spec/ruby/library/zlib/inflate/inflate_spec.rb @@ -39,6 +39,13 @@ @inflator.finish.should == 'uncompressed_data' end + it "has a binary encoding" do + data = [120, 156, 99, 96, 128, 1, 0, 0, 10, 0, 1].pack('C*') + unzipped = @inflator.inflate data + @inflator.finish.encoding.should == Encoding::BINARY + unzipped.encoding.should == Encoding::BINARY + end + end describe "Zlib::Inflate.inflate" do diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index ae557b03d76a0a..aa632b963b8086 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -63,6 +63,48 @@ end end + describe "rb_enc_strlen" do + before :each do + @str = 'こにちわ' # Each codepoint in this string is 3 bytes in UTF-8 + end + + it "returns the correct string length for the encoding" do + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_8).should == 4 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::BINARY).should == 12 + end + + it "returns the string length based on a fixed-width encoding's character length, even if the encoding is incompatible" do + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16BE).should == 6 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_16LE).should == 6 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32BE).should == 3 + @s.rb_enc_strlen(@str, @str.bytesize, Encoding::UTF_32LE).should == 3 + end + + it "does not consider strings to be NUL-terminated" do + s = "abc\0def" + @s.rb_enc_strlen(s, s.bytesize, Encoding::US_ASCII).should == 7 + @s.rb_enc_strlen(s, s.bytesize, Encoding::UTF_8).should == 7 + end + + describe "handles broken strings" do + it "combines valid character and invalid character counts in UTF-8" do + # The result is 3 because `rb_enc_strlen` counts the first valid character and then adds + # the byte count for the invalid character that follows for 1 + 2. + @s.rb_enc_strlen(@str, 5, Encoding::UTF_8).should == 3 + end + + it "combines valid character and invalid character counts in UTF-16" do + @s.rb_enc_strlen(@str, 5, Encoding::UTF_16BE).should == 3 + end + + it "rounds up for fixed-width encodings" do + @s.rb_enc_strlen(@str, 7, Encoding::UTF_32BE).should == 2 + @s.rb_enc_strlen(@str, 7, Encoding::UTF_32LE).should == 2 + @s.rb_enc_strlen(@str, 5, Encoding::BINARY).should == 5 + end + end + end + describe "rb_enc_find" do it "returns the encoding of an Encoding" do @s.rb_enc_find("UTF-8").should == "UTF-8" diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index c49f6cde7e6e00..865fc484be2f19 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -301,6 +301,14 @@ static VALUE encoding_spec_rb_enc_codelen(VALUE self, VALUE code, VALUE encoding return INT2FIX(rb_enc_codelen(c, enc)); } +static VALUE encoding_spec_rb_enc_strlen(VALUE self, VALUE str, VALUE length, VALUE encoding) { + int l = FIX2INT(length); + char *p = RSTRING_PTR(str); + char *e = p + l; + + return LONG2FIX(rb_enc_strlen(p, e, rb_to_encoding(encoding))); +} + void Init_encoding_spec(void) { VALUE cls; native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*)); @@ -335,6 +343,7 @@ void Init_encoding_spec(void) { rb_define_method(cls, "rb_enc_compatible", encoding_spec_rb_enc_compatible, 2); rb_define_method(cls, "rb_enc_copy", encoding_spec_rb_enc_copy, 2); rb_define_method(cls, "rb_enc_codelen", encoding_spec_rb_enc_codelen, 2); + rb_define_method(cls, "rb_enc_strlen", encoding_spec_rb_enc_strlen, 3); rb_define_method(cls, "rb_enc_find", encoding_spec_rb_enc_find, 1); rb_define_method(cls, "rb_enc_find_index", encoding_spec_rb_enc_find_index, 1); rb_define_method(cls, "rb_enc_isalnum", encoding_spec_rb_enc_isalnum, 2); diff --git a/spec/ruby/optional/capi/ext/string_spec.c b/spec/ruby/optional/capi/ext/string_spec.c index b9a4a55853d419..9cbb50484df1d9 100644 --- a/spec/ruby/optional/capi/ext/string_spec.c +++ b/spec/ruby/optional/capi/ext/string_spec.c @@ -437,6 +437,12 @@ VALUE string_spec_RSTRING_PTR_read(VALUE self, VALUE str, VALUE path) { return capacities; } +VALUE string_spec_RSTRING_PTR_null_terminate(VALUE self, VALUE str, VALUE min_length) { + char* ptr = RSTRING_PTR(str); + char* end = ptr + RSTRING_LEN(str); + return rb_str_new(end, FIX2LONG(min_length)); +} + VALUE string_spec_StringValue(VALUE self, VALUE str) { return StringValue(str); } @@ -662,6 +668,7 @@ void Init_string_spec(void) { rb_define_method(cls, "RSTRING_PTR_after_funcall", string_spec_RSTRING_PTR_after_funcall, 2); rb_define_method(cls, "RSTRING_PTR_after_yield", string_spec_RSTRING_PTR_after_yield, 1); rb_define_method(cls, "RSTRING_PTR_read", string_spec_RSTRING_PTR_read, 2); + rb_define_method(cls, "RSTRING_PTR_null_terminate", string_spec_RSTRING_PTR_null_terminate, 2); rb_define_method(cls, "StringValue", string_spec_StringValue, 1); rb_define_method(cls, "SafeStringValue", string_spec_SafeStringValue, 1); rb_define_method(cls, "rb_str_hash", string_spec_rb_str_hash, 1); diff --git a/spec/ruby/optional/capi/ext/util_spec.c b/spec/ruby/optional/capi/ext/util_spec.c index a7269353c2901b..95ba71ea9dc11f 100644 --- a/spec/ruby/optional/capi/ext/util_spec.c +++ b/spec/ruby/optional/capi/ext/util_spec.c @@ -7,15 +7,18 @@ extern "C" { #endif VALUE util_spec_rb_scan_args(VALUE self, VALUE argv, VALUE fmt, VALUE expected, VALUE acc) { - int i, result, argc = (int)RARRAY_LEN(argv); - VALUE args[6], failed, a1, a2, a3, a4, a5, a6; - - failed = rb_intern("failed"); - a1 = a2 = a3 = a4 = a5 = a6 = failed; - - for(i = 0; i < argc; i++) { - args[i] = rb_ary_entry(argv, i); - } + int result, argc; + VALUE a1, a2, a3, a4, a5, a6; + + argc = (int) RARRAY_LEN(argv); + VALUE* args = RARRAY_PTR(argv); + /* the line above can be replaced with this for Ruby implementations which do not support RARRAY_PTR() yet + VALUE args[6]; + for(int i = 0; i < argc; i++) { + args[i] = rb_ary_entry(argv, i); + } */ + + a1 = a2 = a3 = a4 = a5 = a6 = INT2FIX(-1); #ifdef RB_SCAN_ARGS_KEYWORDS if (*RSTRING_PTR(fmt) == 'k') { diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb index 7ad4d10ee4b1ec..0558fc9f7ddd41 100644 --- a/spec/ruby/optional/capi/string_spec.rb +++ b/spec/ruby/optional/capi/string_spec.rb @@ -97,6 +97,32 @@ def inspect end end + describe "rb_str_set_len on a UTF-16 String" do + before :each do + @str = "abcdefghij".force_encoding(Encoding::UTF_16BE) + # Make sure to unshare the string + @s.rb_str_modify(@str) + end + + it "inserts two NULL bytes at the length" do + @s.rb_str_set_len(@str, 4).b.should == "abcd".b + @s.rb_str_set_len(@str, 8).b.should == "abcd\x00\x00gh".b + end + end + + describe "rb_str_set_len on a UTF-32 String" do + before :each do + @str = "abcdefghijkl".force_encoding(Encoding::UTF_32BE) + # Make sure to unshare the string + @s.rb_str_modify(@str) + end + + it "inserts four NULL bytes at the length" do + @s.rb_str_set_len(@str, 4).b.should == "abcd".b + @s.rb_str_set_len(@str, 12).b.should == "abcd\x00\x00\x00\x00ijkl".b + end + end + describe "rb_str_buf_new" do it "returns the equivalent of an empty string" do buf = @s.rb_str_buf_new(10, nil) @@ -592,6 +618,12 @@ def inspect capacities[0].should < capacities[1] str.should == "fixture file contents to test read() with RSTRING_PTR" end + + it "terminates the string with at least (encoding min length) \\0 bytes" do + @s.RSTRING_PTR_null_terminate("abc", 1).should == "\x00" + @s.RSTRING_PTR_null_terminate("abc".encode("UTF-16BE"), 2).should == "\x00\x00" + @s.RSTRING_PTR_null_terminate("abc".encode("UTF-32BE"), 4).should == "\x00\x00\x00\x00" + end end describe "RSTRING_LEN" do diff --git a/spec/ruby/optional/capi/util_spec.rb b/spec/ruby/optional/capi/util_spec.rb index 64b08940875336..38f6f47b1a7e72 100644 --- a/spec/ruby/optional/capi/util_spec.rb +++ b/spec/ruby/optional/capi/util_spec.rb @@ -15,8 +15,9 @@ end it "assigns the required arguments scanned" do - @o.rb_scan_args([1, 2], "2", 2, @acc).should == 2 - ScratchPad.recorded.should == [1, 2] + obj = Object.new + @o.rb_scan_args([obj, 2], "2", 2, @acc).should == 2 + ScratchPad.recorded.should == [obj, 2] end it "raises an ArgumentError if there are insufficient arguments" do diff --git a/spec/ruby/security/cve_2019_8325_spec.rb b/spec/ruby/security/cve_2019_8325_spec.rb index 04692e01fec2ba..bbddb3a6cec304 100644 --- a/spec/ruby/security/cve_2019_8325_spec.rb +++ b/spec/ruby/security/cve_2019_8325_spec.rb @@ -5,8 +5,17 @@ describe "CVE-2019-8325 is resisted by" do describe "sanitising error message components" do + before :each do + @ui = Gem::SilentUI.new + end + + after :each do + @ui.close + end + it "for the 'while executing' message" do manager = Gem::CommandManager.new + manager.ui = @ui def manager.process_args(args, build_args) raise StandardError, "\e]2;nyan\a" end @@ -26,6 +35,7 @@ def manager.terminate_interaction(n) it "for the 'loading command' message" do manager = Gem::CommandManager.new + manager.ui = @ui def manager.require(x) raise 'foo' end diff --git a/spec/ruby/shared/sizedqueue/enque.rb b/spec/ruby/shared/sizedqueue/enque.rb index 6ef12349f899d3..126470594a43c4 100644 --- a/spec/ruby/shared/sizedqueue/enque.rb +++ b/spec/ruby/shared/sizedqueue/enque.rb @@ -47,4 +47,61 @@ t.join q.pop.should == 1 end + + describe "with a timeout" do + ruby_version_is "3.2" do + it "returns self if the item was pushed in time" do + q = @object.call(1) + q << 1 + + t = Thread.new { + q.send(@method, 2, timeout: 1).should == q + } + Thread.pass until t.status == "sleep" && q.num_waiting == 1 + q.pop + t.join + end + + it "does nothing if the timeout is nil" do + q = @object.call(1) + q << 1 + t = Thread.new { + q.send(@method, 2, timeout: nil).should == q + } + t.join(0.2).should == nil + q.pop + t.join + end + + it "returns nil if no item is available in time" do + q = @object.call(1) + q << 1 + t = Thread.new { + q.send(@method, 2, timeout: 0.1).should == nil + } + t.join + end + + it "raise TypeError if timeout is not a valid numeric" do + q = @object.call(1) + -> { q.send(@method, 2, timeout: "1") }.should raise_error( + TypeError, + "no implicit conversion to float from string", + ) + + -> { q.send(@method, 2, timeout: false) }.should raise_error( + TypeError, + "no implicit conversion to float from false", + ) + end + + it "raise ArgumentError if non_block = true is passed too" do + q = @object.call(1) + -> { q.send(@method, 2, true, timeout: 1) }.should raise_error( + ArgumentError, + "can't set a timeout if non_block is enabled", + ) + end + end + end end diff --git a/spec/ruby/shared/sizedqueue/new.rb b/spec/ruby/shared/sizedqueue/new.rb index 713785fb50da44..2573194efb21ae 100644 --- a/spec/ruby/shared/sizedqueue/new.rb +++ b/spec/ruby/shared/sizedqueue/new.rb @@ -1,7 +1,12 @@ describe :sizedqueue_new, shared: true do - it "raises a TypeError when the given argument is not Numeric" do - -> { @object.call("foo") }.should raise_error(TypeError) + it "raises a TypeError when the given argument doesn't respond to #to_int" do + -> { @object.call("12") }.should raise_error(TypeError) -> { @object.call(Object.new) }.should raise_error(TypeError) + + @object.call(12.9).max.should == 12 + object = Object.new + object.define_singleton_method(:to_int) { 42 } + @object.call(object).max.should == 42 end it "raises an argument error when no argument is given" do diff --git a/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt b/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt new file mode 100644 index 00000000000000..668ac4010ba2f8 --- /dev/null +++ b/spec/syntax_suggest/fixtures/derailed_require_tree.rb.txt @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +# Tree structure used to store and sort require memory costs +# RequireTree.new('get_process_mem') +module DerailedBenchmarks + class RequireTree + REQUIRED_BY = {} + + attr_reader :name + attr_writer :cost + attr_accessor :parent + + def initialize(name) + @name = name + @children = {} + @cost = 0 + + def self.reset! + REQUIRED_BY.clear + if defined?(Kernel::REQUIRE_STACK) + Kernel::REQUIRE_STACK.clear + + Kernel::REQUIRE_STACK.push(TOP_REQUIRE) + end + end + + def <<(tree) + @children[tree.name.to_s] = tree + tree.parent = self + (REQUIRED_BY[tree.name.to_s] ||= []) << self.name + end + + def [](name) + @children[name.to_s] + end + + # Returns array of child nodes + def children + @children.values + end + + def cost + @cost || 0 + end + + # Returns sorted array of child nodes from Largest to Smallest + def sorted_children + children.sort { |c1, c2| c2.cost <=> c1.cost } + end + + def to_string + str = String.new("#{name}: #{cost.round(4)} MiB") + if parent && REQUIRED_BY[self.name.to_s] + names = REQUIRED_BY[self.name.to_s].uniq - [parent.name.to_s] + if names.any? + str << " (Also required by: #{ names.first(2).join(", ") }" + str << ", and #{names.count - 2} others" if names.count > 3 + str << ")" + end + end + str + end + + # Recursively prints all child nodes + def print_sorted_children(level = 0, out = STDOUT) + return if cost < ENV['CUT_OFF'].to_f + out.puts " " * level + self.to_string + level += 1 + sorted_children.each do |child| + child.print_sorted_children(level, out) + end + end + end +end diff --git a/spec/syntax_suggest/fixtures/rexe.rb.txt b/spec/syntax_suggest/fixtures/rexe.rb.txt new file mode 100755 index 00000000000000..92e44d4d1ea4bf --- /dev/null +++ b/spec/syntax_suggest/fixtures/rexe.rb.txt @@ -0,0 +1,569 @@ +#!/usr/bin/env ruby +# +# rexe - Ruby Command Line Executor Filter +# +# Inspired by https://github.com/thisredone/rb + +# frozen_string_literal: true + + +require 'bundler' +require 'date' +require 'optparse' +require 'ostruct' +require 'shellwords' + +class Rexe + + VERSION = '1.5.1' + + PROJECT_URL = 'https://github.com/keithrbennett/rexe' + + + module Helpers + + # Try executing code. If error raised, print message (but not stack trace) & exit -1. + def try + begin + yield + rescue Exception => e + unless e.class == SystemExit + $stderr.puts("rexe: #{e}") + $stderr.puts("Use the -h option to get help.") + exit(-1) + end + end + end + end + + + class Options < Struct.new( + :input_filespec, + :input_format, + :input_mode, + :loads, + :output_format, + :output_format_tty, + :output_format_block, + :requires, + :log_format, + :noop) + + + def initialize + super + clear + end + + + def clear + self.input_filespec = nil + self.input_format = :none + self.input_mode = :none + self.output_format = :none + self.output_format_tty = :none + self.output_format_block = :none + self.loads = [] + self.requires = [] + self.log_format = :none + self.noop = false + end + end + + + + + + class Lookups + def input_modes + @input_modes ||= { + 'l' => :line, + 'e' => :enumerator, + 'b' => :one_big_string, + 'n' => :none + } + end + + + def input_formats + @input_formats ||= { + 'j' => :json, + 'm' => :marshal, + 'n' => :none, + 'y' => :yaml, + } + end + + + def input_parsers + @input_parsers ||= { + json: ->(string) { JSON.parse(string) }, + marshal: ->(string) { Marshal.load(string) }, + none: ->(string) { string }, + yaml: ->(string) { YAML.load(string) }, + } + end + + + def output_formats + @output_formats ||= { + 'a' => :amazing_print, + 'i' => :inspect, + 'j' => :json, + 'J' => :pretty_json, + 'm' => :marshal, + 'n' => :none, + 'p' => :puts, # default + 'P' => :pretty_print, + 's' => :to_s, + 'y' => :yaml, + } + end + + + def formatters + @formatters ||= { + amazing_print: ->(obj) { obj.ai + "\n" }, + inspect: ->(obj) { obj.inspect + "\n" }, + json: ->(obj) { obj.to_json }, + marshal: ->(obj) { Marshal.dump(obj) }, + none: ->(_obj) { nil }, + pretty_json: ->(obj) { JSON.pretty_generate(obj) }, + pretty_print: ->(obj) { obj.pretty_inspect }, + puts: ->(obj) { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string }, + to_s: ->(obj) { obj.to_s + "\n" }, + yaml: ->(obj) { obj.to_yaml }, + } + end + + + def format_requires + @format_requires ||= { + json: 'json', + pretty_json: 'json', + amazing_print: 'amazing_print', + pretty_print: 'pp', + yaml: 'yaml' + } + end + end + + + + class CommandLineParser + + include Helpers + + attr_reader :lookups, :options + + def initialize + @lookups = Lookups.new + @options = Options.new + end + + + # Inserts contents of REXE_OPTIONS environment variable at the beginning of ARGV. + private def prepend_environment_options + env_opt_string = ENV['REXE_OPTIONS'] + if env_opt_string + args_to_prepend = Shellwords.shellsplit(env_opt_string) + ARGV.unshift(args_to_prepend).flatten! + end + end + + + private def add_format_requires_to_requires_list + formats = [options.input_format, options.output_format, options.log_format] + requires = formats.map { |format| lookups.format_requires[format] }.uniq.compact + requires.each { |r| options.requires << r } + end + + + private def help_text + unless @help_text + @help_text ||= <<~HEREDOC + + rexe -- Ruby Command Line Executor/Filter -- v#{VERSION} -- #{PROJECT_URL} + + Executes Ruby code on the command line, + optionally automating management of standard input and standard output, + and optionally parsing input and formatting output with YAML, JSON, etc. + + rexe [options] [Ruby source code] + + Options: + + -c --clear_options Clear all previous command line options specified up to now + -f --input_file Use this file instead of stdin for preprocessed input; + if filespec has a YAML and JSON file extension, + sets input format accordingly and sets input mode to -mb + -g --log_format FORMAT Log format, logs to stderr, defaults to -gn (none) + (see -o for format options) + -h, --help Print help and exit + -i, --input_format FORMAT Input format, defaults to -in (None) + -ij JSON + -im Marshal + -in None (default) + -iy YAML + -l, --load RUBY_FILE(S) Ruby file(s) to load, comma separated; + ! to clear all, or precede a name with '-' to remove + -m, --input_mode MODE Input preprocessing mode (determines what `self` will be) + defaults to -mn (none) + -ml line; each line is ingested as a separate string + -me enumerator (each_line on STDIN or File) + -mb big string; all lines combined into one string + -mn none (default); no input preprocessing; + self is an Object.new + -n, --[no-]noop Do not execute the code (useful with -g); + For true: yes, true, y, +; for false: no, false, n + -o, --output_format FORMAT Output format, defaults to -on (no output): + -oa Amazing Print + -oi Inspect + -oj JSON + -oJ Pretty JSON + -om Marshal + -on No Output (default) + -op Puts + -oP Pretty Print + -os to_s + -oy YAML + If 2 letters are provided, 1st is for tty devices, 2nd for block + --project-url Outputs project URL on Github, then exits + -r, --require REQUIRE(S) Gems and built-in libraries to require, comma separated; + ! to clear all, or precede a name with '-' to remove + -v, --version Prints version and exits + + --------------------------------------------------------------------------------------- + + In many cases you will need to enclose your source code in single or double quotes. + + If source code is not specified, it will default to 'self', + which is most likely useful only in a filter mode (-ml, -me, -mb). + + If there is a .rexerc file in your home directory, it will be run as Ruby code + before processing the input. + + If there is a REXE_OPTIONS environment variable, its content will be prepended + to the command line so that you can specify options implicitly + (e.g. `export REXE_OPTIONS="-r amazing_print,yaml"`) + + HEREDOC + + @help_text.freeze + end + + @help_text + end + + + # File file input mode; detects the input mode (JSON, YAML, or None) from the extension. + private def autodetect_file_format(filespec) + extension = File.extname(filespec).downcase + if extension == '.json' + :json + elsif extension == '.yml' || extension == '.yaml' + :yaml + else + :none + end + end + + + private def open_resource(resource_identifier) + command = case (`uname`.chomp) + when 'Darwin' + 'open' + when 'Linux' + 'xdg-open' + else + 'start' + end + + `#{command} #{resource_identifier}` + end + + + # Using 'optparse', parses the command line. + # Settings go into this instance's properties (see Struct declaration). + def parse + + prepend_environment_options + + OptionParser.new do |parser| + + parser.on('-c', '--clear_options', "Clear all previous command line options") do |v| + options.clear + end + + parser.on('-f', '--input_file FILESPEC', + 'Use this file instead of stdin; autodetects YAML and JSON file extensions') do |v| + unless File.exist?(v) + raise "File #{v} does not exist." + end + options.input_filespec = v + options.input_format = autodetect_file_format(v) + if [:json, :yaml].include?(options.input_format) + options.input_mode = :one_big_string + end + end + + parser.on('-g', '--log_format FORMAT', 'Log format, logs to stderr, defaults to none (see -o for format options)') do |v| + options.log_format = lookups.output_formats[v] + if options.log_format.nil? + raise("Output mode was '#{v}' but must be one of #{lookups.output_formats.keys}.") + end + end + + parser.on("-h", "--help", "Show help") do |_help_requested| + puts help_text + exit + end + + parser.on('-i', '--input_format FORMAT', + 'Mode with which to parse input values (n = none (default), j = JSON, m = Marshal, y = YAML') do |v| + + options.input_format = lookups.input_formats[v] + if options.input_format.nil? + raise("Input mode was '#{v}' but must be one of #{lookups.input_formats.keys}.") + end + end + + parser.on('-l', '--load RUBY_FILE(S)', 'Ruby file(s) to load, comma separated, or ! to clear') do |v| + if v == '!' + options.loads.clear + else + loadfiles = v.split(',').map(&:strip).map { |s| File.expand_path(s) } + removes, adds = loadfiles.partition { |filespec| filespec[0] == '-' } + + existent, nonexistent = adds.partition { |filespec| File.exists?(filespec) } + if nonexistent.any? + raise("\nDid not find the following files to load: #{nonexistent}\n\n") + else + existent.each { |filespec| options.loads << filespec } + end + + removes.each { |filespec| options.loads -= [filespec[1..-1]] } + end + end + + parser.on('-m', '--input_mode MODE', + 'Mode with which to handle input (-ml, -me, -mb, -mn (default)') do |v| + + options.input_mode = lookups.input_modes[v] + if options.input_mode.nil? + raise("Input mode was '#{v}' but must be one of #{lookups.input_modes.keys}.") + end + end + + # See https://stackoverflow.com/questions/54576873/ruby-optionparser-short-code-for-boolean-option + # for an excellent explanation of this optparse incantation. + # According to the answer, valid options are: + # -n no, -n yes, -n false, -n true, -n n, -n y, -n +, but not -n -. + parser.on('-n', '--[no-]noop [FLAG]', TrueClass, "Do not execute the code (useful with -g)") do |v| + options.noop = (v.nil? ? true : v) + end + + parser.on('-o', '--output_format FORMAT', + 'Mode with which to format values for output (`-o` + [aijJmnpsy])') do |v| + options.output_format_tty = lookups.output_formats[v[0]] + options.output_format_block = lookups.output_formats[v[-1]] + options.output_format = ($stdout.tty? ? options.output_format_tty : options.output_format_block) + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '#{v}'; each must be one of #{lookups.output_formats.keys}.") + end + end + + parser.on('-r', '--require REQUIRE(S)', + 'Gems and built-in libraries (e.g. shellwords, yaml) to require, comma separated, or ! to clear') do |v| + if v == '!' + options.requires.clear + else + v.split(',').map(&:strip).each do |r| + if r[0] == '-' + options.requires -= [r[1..-1]] + else + options.requires << r + end + end + end + end + + parser.on('-v', '--version', 'Print version') do + puts VERSION + exit(0) + end + + # Undocumented feature: open Github project with default web browser on a Mac + parser.on('', '--open-project') do + open_resource(PROJECT_URL) + exit(0) + end + + parser.on('', '--project-url') do + puts PROJECT_URL + exit(0) + end + + end.parse! + + # We want to do this after all options have been processed because we don't want any clearing of the + # options (by '-c', etc.) to result in exclusion of these needed requires. + add_format_requires_to_requires_list + + options.requires = options.requires.sort.uniq + options.loads.uniq! + + options + + end + end + + + class Main + + include Helpers + + attr_reader :callable, :input_parser, :lookups, + :options, :output_formatter, + :log_formatter, :start_time, :user_source_code + + + def initialize + @lookups = Lookups.new + @start_time = DateTime.now + end + + + private def load_global_config_if_exists + filespec = File.join(Dir.home, '.rexerc') + load(filespec) if File.exists?(filespec) + end + + + private def init_parser_and_formatters + @input_parser = lookups.input_parsers[options.input_format] + @output_formatter = lookups.formatters[options.output_format] + @log_formatter = lookups.formatters[options.log_format] + end + + + # Executes the user specified code in the manner appropriate to the input mode. + # Performs any optionally specified parsing on input and formatting on output. + private def execute(eval_context_object, code) + if options.input_format != :none && options.input_mode != :none + eval_context_object = input_parser.(eval_context_object) + end + + value = eval_context_object.instance_eval(&code) + + unless options.output_format == :none + print output_formatter.(value) + end + rescue Errno::EPIPE + exit(-13) + end + + + # The global $RC (Rexe Context) OpenStruct is available in your user code. + # In order to make it possible to access this object in your loaded files, we are not creating + # it here; instead we add properties to it. This way, you can initialize an OpenStruct yourself + # in your loaded code and it will still work. If you do that, beware, any properties you add will be + # included in the log output. If the to_s of your added objects is large, that might be a pain. + private def init_rexe_context + $RC ||= OpenStruct.new + $RC.count = 0 + $RC.rexe_version = VERSION + $RC.start_time = start_time.iso8601 + $RC.source_code = user_source_code + $RC.options = options.to_h + + def $RC.i; count end # `i` aliases `count` so you can more concisely get the count in your user code + end + + + private def create_callable + eval("Proc.new { #{user_source_code} }") + end + + + private def lookup_action(mode) + input = options.input_filespec ? File.open(options.input_filespec) : STDIN + { + line: -> { input.each { |l| execute(l.chomp, callable); $RC.count += 1 } }, + enumerator: -> { execute(input.each_line, callable); $RC.count += 1 }, + one_big_string: -> { big_string = input.read; execute(big_string, callable); $RC.count += 1 }, + none: -> { execute(Object.new, callable) } + }.fetch(mode) + end + + + private def output_log_entry + if options.log_format != :none + $RC.duration_secs = Time.now - start_time.to_time + STDERR.puts(log_formatter.($RC.to_h)) + end + end + + + # Bypasses Bundler's restriction on loading gems + # (see https://stackoverflow.com/questions/55144094/bundler-doesnt-permit-using-gems-in-project-home-directory) + private def require!(the_require) + begin + require the_require + rescue LoadError => error + gem_path = `gem which #{the_require}` + if gem_path.chomp.strip.empty? + raise error # re-raise the error, can't fix it + else + load_dir = File.dirname(gem_path) + $LOAD_PATH += load_dir + require the_require + end + end + end + + + # This class' entry point. + def call + + try do + + @options = CommandLineParser.new.parse + + options.requires.each { |r| require!(r) } + load_global_config_if_exists + options.loads.each { |file| load(file) } + + @user_source_code = ARGV.join(' ') + @user_source_code = 'self' if @user_source_code == '' + + @callable = create_callable + + init_rexe_context + init_parser_and_formatters + + # This is where the user's source code will be executed; the action will in turn call `execute`. + lookup_action(options.input_mode).call unless options.noop + + output_log_entry + end + end + end +end + + +def bundler_run(&block) + # This used to be an unconditional call to with_clean_env but that method is now deprecated: + # [DEPRECATED] `Bundler.with_clean_env` has been deprecated in favor of `Bundler.with_unbundled_env`. + # If you instead want the environment before bundler was originally loaded, + # use `Bundler.with_original_env` + + if Bundler.respond_to?(:with_unbundled_env) + Bundler.with_unbundled_env { block.call } + else + Bundler.with_clean_env { block.call } + end +end + + +bundler_run { Rexe::Main.new.call } diff --git a/spec/syntax_suggest/fixtures/routes.rb.txt b/spec/syntax_suggest/fixtures/routes.rb.txt new file mode 100644 index 00000000000000..86733821c07374 --- /dev/null +++ b/spec/syntax_suggest/fixtures/routes.rb.txt @@ -0,0 +1,121 @@ +Rails.application.routes.draw do + constraints -> { Rails.application.config.non_production } do + namespace :foo do + resource :bar + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + constraints -> { Rails.application.config.non_production } do + namespace :bar do + resource :baz + end + end + + namespace :admin do + resource :session + + match "/foobar(*path)", via: :all, to: redirect { |_params, req| + uri = URI(req.path.gsub("foobar", "foobaz")) + uri.query = req.query_string.presence + uri.to_s + } +end diff --git a/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt b/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt new file mode 100644 index 00000000000000..9acdbf3a61d967 --- /dev/null +++ b/spec/syntax_suggest/fixtures/ruby_buildpack.rb.txt @@ -0,0 +1,1344 @@ +require "tmpdir" +require "digest/md5" +require "benchmark" +require "rubygems" +require "language_pack" +require "language_pack/base" +require "language_pack/ruby_version" +require "language_pack/helpers/nodebin" +require "language_pack/helpers/node_installer" +require "language_pack/helpers/yarn_installer" +require "language_pack/helpers/layer" +require "language_pack/helpers/binstub_check" +require "language_pack/version" + +# base Ruby Language Pack. This is for any base ruby app. +class LanguagePack::Ruby < LanguagePack::Base + NAME = "ruby" + LIBYAML_VERSION = "0.1.7" + LIBYAML_PATH = "libyaml-#{LIBYAML_VERSION}" + RBX_BASE_URL = "http://binaries.rubini.us/heroku" + NODE_BP_PATH = "vendor/node/bin" + + Layer = LanguagePack::Helpers::Layer + + # detects if this is a valid Ruby app + # @return [Boolean] true if it's a Ruby app + def self.use? + instrument "ruby.use" do + File.exist?("Gemfile") + end + end + + def self.bundler + @@bundler ||= LanguagePack::Helpers::BundlerWrapper.new.install + end + + def bundler + self.class.bundler + end + + def initialize(*args) + super(*args) + @fetchers[:mri] = LanguagePack::Fetcher.new(VENDOR_URL, @stack) + @fetchers[:rbx] = LanguagePack::Fetcher.new(RBX_BASE_URL, @stack) + @node_installer = LanguagePack::Helpers::NodeInstaller.new + @yarn_installer = LanguagePack::Helpers::YarnInstaller.new + end + + def name + "Ruby" + end + + def default_addons + instrument "ruby.default_addons" do + add_dev_database_addon + end + end + + def default_config_vars + instrument "ruby.default_config_vars" do + vars = { + "LANG" => env("LANG") || "en_US.UTF-8", + } + + ruby_version.jruby? ? vars.merge({ + "JRUBY_OPTS" => default_jruby_opts + }) : vars + end + end + + def default_process_types + instrument "ruby.default_process_types" do + { + "rake" => "bundle exec rake", + "console" => "bundle exec irb" + } + end + end + + def best_practice_warnings + if bundler.has_gem?("asset_sync") + warn(<<-WARNING) +You are using the `asset_sync` gem. +This is not recommended. +See https://devcenter.heroku.com/articles/please-do-not-use-asset-sync for more information. +WARNING + end + end + + def compile + instrument 'ruby.compile' do + # check for new app at the beginning of the compile + new_app? + Dir.chdir(build_path) + remove_vendor_bundle + warn_bundler_upgrade + warn_bad_binstubs + install_ruby(slug_vendor_ruby, build_ruby_path) + setup_language_pack_environment( + ruby_layer_path: File.expand_path("."), + gem_layer_path: File.expand_path("."), + bundle_path: "vendor/bundle", + bundle_default_without: "development:test" + ) + allow_git do + install_bundler_in_app(slug_vendor_base) + load_bundler_cache + build_bundler + post_bundler + create_database_yml + install_binaries + run_assets_precompile_rake_task + end + config_detect + best_practice_warnings + warn_outdated_ruby + setup_profiled(ruby_layer_path: "$HOME", gem_layer_path: "$HOME") # $HOME is set to /app at run time + setup_export + cleanup + super + end + rescue => e + warn_outdated_ruby + raise e + end + + + def build + new_app? + remove_vendor_bundle + warn_bad_binstubs + ruby_layer = Layer.new(@layer_dir, "ruby", launch: true) + install_ruby("#{ruby_layer.path}/#{slug_vendor_ruby}") + ruby_layer.metadata[:version] = ruby_version.version + ruby_layer.metadata[:patchlevel] = ruby_version.patchlevel if ruby_version.patchlevel + ruby_layer.metadata[:engine] = ruby_version.engine.to_s + ruby_layer.metadata[:engine_version] = ruby_version.engine_version + ruby_layer.write + + gem_layer = Layer.new(@layer_dir, "gems", launch: true, cache: true, build: true) + setup_language_pack_environment( + ruby_layer_path: ruby_layer.path, + gem_layer_path: gem_layer.path, + bundle_path: "#{gem_layer.path}/vendor/bundle", + bundle_default_without: "development:test" + ) + allow_git do + # TODO install bundler in separate layer + topic "Loading Bundler Cache" + gem_layer.validate! do |metadata| + valid_bundler_cache?(gem_layer.path, gem_layer.metadata) + end + install_bundler_in_app("#{gem_layer.path}/#{slug_vendor_base}") + build_bundler + # TODO post_bundler might need to be done in a new layer + bundler.clean + gem_layer.metadata[:gems] = Digest::SHA2.hexdigest(File.read("Gemfile.lock")) + gem_layer.metadata[:stack] = @stack + gem_layer.metadata[:ruby_version] = run_stdout(%q(ruby -v)).strip + gem_layer.metadata[:rubygems_version] = run_stdout(%q(gem -v)).strip + gem_layer.metadata[:buildpack_version] = BUILDPACK_VERSION + gem_layer.write + + create_database_yml + # TODO replace this with multibuildpack stuff? put binaries in their own layer? + install_binaries + run_assets_precompile_rake_task + end + setup_profiled(ruby_layer_path: ruby_layer.path, gem_layer_path: gem_layer.path) + setup_export(gem_layer) + config_detect + best_practice_warnings + cleanup + + super + end + + def cleanup + end + + def config_detect + end + +private + + # A bad shebang line looks like this: + # + # ``` + # #!/usr/bin/env ruby2.5 + # ``` + # + # Since `ruby2.5` is not a valid binary name + # + def warn_bad_binstubs + check = LanguagePack::Helpers::BinstubCheck.new(app_root_dir: Dir.pwd, warn_object: self) + check.call + end + + def default_malloc_arena_max? + return true if @metadata.exists?("default_malloc_arena_max") + return @metadata.touch("default_malloc_arena_max") if new_app? + + return false + end + + def warn_bundler_upgrade + old_bundler_version = @metadata.read("bundler_version").strip if @metadata.exists?("bundler_version") + + if old_bundler_version && old_bundler_version != bundler.version + warn(<<-WARNING, inline: true) +Your app was upgraded to bundler #{ bundler.version }. +Previously you had a successful deploy with bundler #{ old_bundler_version }. + +If you see problems related to the bundler version please refer to: +https://devcenter.heroku.com/articles/bundler-version#known-upgrade-issues + +WARNING + end + end + + # For example "vendor/bundle/ruby/2.6.0" + def self.slug_vendor_base + @slug_vendor_base ||= begin + command = %q(ruby -e "require 'rbconfig';puts \"vendor/bundle/#{RUBY_ENGINE}/#{RbConfig::CONFIG['ruby_version']}\"") + out = run_no_pipe(command, user_env: true).strip + error "Problem detecting bundler vendor directory: #{out}" unless $?.success? + out + end + end + + # the relative path to the bundler directory of gems + # @return [String] resulting path + def slug_vendor_base + instrument 'ruby.slug_vendor_base' do + @slug_vendor_base ||= self.class.slug_vendor_base + end + end + + # the relative path to the vendored ruby directory + # @return [String] resulting path + def slug_vendor_ruby + "vendor/#{ruby_version.version_without_patchlevel}" + end + + # the absolute path of the build ruby to use during the buildpack + # @return [String] resulting path + def build_ruby_path + "/tmp/#{ruby_version.version_without_patchlevel}" + end + + # fetch the ruby version from bundler + # @return [String, nil] returns the ruby version if detected or nil if none is detected + def ruby_version + instrument 'ruby.ruby_version' do + return @ruby_version if @ruby_version + new_app = !File.exist?("vendor/heroku") + last_version_file = "buildpack_ruby_version" + last_version = nil + last_version = @metadata.read(last_version_file).strip if @metadata.exists?(last_version_file) + + @ruby_version = LanguagePack::RubyVersion.new(bundler.ruby_version, + is_new: new_app, + last_version: last_version) + return @ruby_version + end + end + + def set_default_web_concurrency + <<-EOF +case $(ulimit -u) in +256) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-512} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-2} + ;; +512) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-1024} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-4} + ;; +16384) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-2560} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-8} + ;; +32768) + export HEROKU_RAM_LIMIT_MB=${HEROKU_RAM_LIMIT_MB:-6144} + export WEB_CONCURRENCY=${WEB_CONCURRENCY:-16} + ;; +*) + ;; +esac +EOF + end + + # default JRUBY_OPTS + # return [String] string of JRUBY_OPTS + def default_jruby_opts + "-Xcompile.invokedynamic=false" + end + + # sets up the environment variables for the build process + def setup_language_pack_environment(ruby_layer_path:, gem_layer_path:, bundle_path:, bundle_default_without:) + instrument 'ruby.setup_language_pack_environment' do + if ruby_version.jruby? + ENV["PATH"] += ":bin" + ENV["JRUBY_OPTS"] = env('JRUBY_BUILD_OPTS') || env('JRUBY_OPTS') + end + setup_ruby_install_env(ruby_layer_path) + + # By default Node can address 1.5GB of memory, a limitation it inherits from + # the underlying v8 engine. This can occasionally cause issues during frontend + # builds where memory use can exceed this threshold. + # + # This passes an argument to all Node processes during the build, so that they + # can take advantage of all available memory on the build dynos. + ENV["NODE_OPTIONS"] ||= "--max_old_space_size=2560" + + # TODO when buildpack-env-args rolls out, we can get rid of + # ||= and the manual setting below + default_config_vars.each do |key, value| + ENV[key] ||= value + end + + paths = [] + gem_path = "#{gem_layer_path}/#{slug_vendor_base}" + ENV["GEM_PATH"] = gem_path + ENV["GEM_HOME"] = gem_path + + ENV["DISABLE_SPRING"] = "1" + + # Rails has a binstub for yarn that doesn't work for all applications + # we need to ensure that yarn comes before local bin dir for that case + paths << yarn_preinstall_bin_path if yarn_preinstalled? + + # Need to remove `./bin` folder since it links to the wrong --prefix ruby binstubs breaking require in Ruby 1.9.2 and 1.8.7. + # Because for 1.9.2 and 1.8.7 there is a "build" ruby and a non-"build" Ruby + paths << "#{File.expand_path(".")}/bin" unless ruby_version.ruby_192_or_lower? + + paths << "#{gem_layer_path}/#{bundler_binstubs_path}" # Binstubs from bundler, eg. vendor/bundle/bin + paths << "#{gem_layer_path}/#{slug_vendor_base}/bin" # Binstubs from rubygems, eg. vendor/bundle/ruby/2.6.0/bin + paths << ENV["PATH"] + + ENV["PATH"] = paths.join(":") + + ENV["BUNDLE_WITHOUT"] = env("BUNDLE_WITHOUT") || bundle_default_without + if ENV["BUNDLE_WITHOUT"].include?(' ') + ENV["BUNDLE_WITHOUT"] = ENV["BUNDLE_WITHOUT"].tr(' ', ':') + + warn("Your BUNDLE_WITHOUT contains a space, we are converting it to a colon `:` BUNDLE_WITHOUT=#{ENV["BUNDLE_WITHOUT"]}", inline: true) + end + ENV["BUNDLE_PATH"] = bundle_path + ENV["BUNDLE_BIN"] = bundler_binstubs_path + ENV["BUNDLE_DEPLOYMENT"] = "1" + ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"] = "1" if bundler.needs_ruby_global_append_path? + end + end + + # Sets up the environment variables for subsequent processes run by + # muiltibuildpack. We can't use profile.d because $HOME isn't set up + def setup_export(layer = nil) + instrument 'ruby.setup_export' do + if layer + paths = ENV["PATH"] + else + paths = ENV["PATH"].split(":").map do |path| + /^\/.*/ !~ path ? "#{build_path}/#{path}" : path + end.join(":") + end + + # TODO ensure path exported is correct + set_export_path "PATH", paths, layer + + if layer + gem_path = "#{layer.path}/#{slug_vendor_base}" + else + gem_path = "#{build_path}/#{slug_vendor_base}" + end + set_export_path "GEM_PATH", gem_path, layer + set_export_default "LANG", "en_US.UTF-8", layer + + # TODO handle jruby + if ruby_version.jruby? + set_export_default "JRUBY_OPTS", default_jruby_opts + end + + set_export_default "BUNDLE_PATH", ENV["BUNDLE_PATH"], layer + set_export_default "BUNDLE_WITHOUT", ENV["BUNDLE_WITHOUT"], layer + set_export_default "BUNDLE_BIN", ENV["BUNDLE_BIN"], layer + set_export_default "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE", ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"], layer if bundler.needs_ruby_global_append_path? + set_export_default "BUNDLE_DEPLOYMENT", ENV["BUNDLE_DEPLOYMENT"], layer if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + end + end + + # sets up the profile.d script for this buildpack + def setup_profiled(ruby_layer_path: , gem_layer_path: ) + instrument 'setup_profiled' do + profiled_path = [] + + # Rails has a binstub for yarn that doesn't work for all applications + # we need to ensure that yarn comes before local bin dir for that case + if yarn_preinstalled? + profiled_path << yarn_preinstall_bin_path.gsub(File.expand_path("."), "$HOME") + elsif has_yarn_binary? + profiled_path << "#{ruby_layer_path}/vendor/#{@yarn_installer.binary_path}" + end + profiled_path << "$HOME/bin" # /app in production + profiled_path << "#{gem_layer_path}/#{bundler_binstubs_path}" # Binstubs from bundler, eg. vendor/bundle/bin + profiled_path << "#{gem_layer_path}/#{slug_vendor_base}/bin" # Binstubs from rubygems, eg. vendor/bundle/ruby/2.6.0/bin + profiled_path << "$PATH" + + set_env_default "LANG", "en_US.UTF-8" + set_env_override "GEM_PATH", "#{gem_layer_path}/#{slug_vendor_base}:$GEM_PATH" + set_env_override "PATH", profiled_path.join(":") + set_env_override "DISABLE_SPRING", "1" + + set_env_default "MALLOC_ARENA_MAX", "2" if default_malloc_arena_max? + + web_concurrency = env("SENSIBLE_DEFAULTS") ? set_default_web_concurrency : "" + add_to_profiled(web_concurrency, filename: "WEB_CONCURRENCY.sh", mode: "w") # always write that file, even if its empty (meaning no defaults apply), for interop with other buildpacks - and we overwrite the file rather than appending (which is the default) + + # TODO handle JRUBY + if ruby_version.jruby? + set_env_default "JRUBY_OPTS", default_jruby_opts + end + + set_env_default "BUNDLE_PATH", ENV["BUNDLE_PATH"] + set_env_default "BUNDLE_WITHOUT", ENV["BUNDLE_WITHOUT"] + set_env_default "BUNDLE_BIN", ENV["BUNDLE_BIN"] + set_env_default "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE", ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"] if bundler.needs_ruby_global_append_path? + set_env_default "BUNDLE_DEPLOYMENT", ENV["BUNDLE_DEPLOYMENT"] if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + end + end + + def warn_outdated_ruby + return unless defined?(@outdated_version_check) + + @warn_outdated ||= begin + @outdated_version_check.join + + warn_outdated_minor + warn_outdated_eol + warn_stack_upgrade + true + end + end + + def warn_stack_upgrade + return unless defined?(@ruby_download_check) + return unless @ruby_download_check.next_stack(current_stack: stack) + return if @ruby_download_check.exists_on_next_stack?(current_stack: stack) + + warn(<<~WARNING) + Your Ruby version is not present on the next stack + + You are currently using #{ruby_version.version_for_download} on #{stack} stack. + This version does not exist on #{@ruby_download_check.next_stack(current_stack: stack)}. In order to upgrade your stack you will + need to upgrade to a supported Ruby version. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + + For a list of the oldest Ruby versions present on a given stack see: + https://devcenter.heroku.com/articles/ruby-support#oldest-available-runtimes + WARNING + end + + def warn_outdated_eol + return unless @outdated_version_check.maybe_eol? + + if @outdated_version_check.eol? + warn(<<~WARNING) + EOL Ruby Version + + You are using a Ruby version that has reached its End of Life (EOL) + + We strongly suggest you upgrade to Ruby #{@outdated_version_check.suggest_ruby_eol_version} or later + + Your current Ruby version no longer receives security updates from + Ruby Core and may have serious vulnerabilities. While you will continue + to be able to deploy on Heroku with this Ruby version you must upgrade + to a non-EOL version to be eligible to receive support. + + Upgrade your Ruby version as soon as possible. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + else + # Maybe EOL + warn(<<~WARNING) + Potential EOL Ruby Version + + You are using a Ruby version that has either reached its End of Life (EOL) + or will reach its End of Life on December 25th of this year. + + We suggest you upgrade to Ruby #{@outdated_version_check.suggest_ruby_eol_version} or later + + Once a Ruby version becomes EOL, it will no longer receive + security updates from Ruby core and may have serious vulnerabilities. + + Please upgrade your Ruby version. + + For a list of supported Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + end + end + + def warn_outdated_minor + return if @outdated_version_check.latest_minor_version? + + warn(<<~WARNING) + There is a more recent Ruby version available for you to use: + + #{@outdated_version_check.suggested_ruby_minor_version} + + The latest version will include security and bug fixes. We always recommend + running the latest version of your minor release. + + Please upgrade your Ruby version. + + For all available Ruby versions see: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + WARNING + end + + # install the vendored ruby + # @return [Boolean] true if it installs the vendored ruby and false otherwise + def install_ruby(install_path, build_ruby_path = nil) + instrument 'ruby.install_ruby' do + # Could do a compare operation to avoid re-downloading ruby + return false unless ruby_version + installer = LanguagePack::Installers::RubyInstaller.installer(ruby_version).new(@stack) + + @ruby_download_check = LanguagePack::Helpers::DownloadPresence.new(ruby_version.file_name) + @ruby_download_check.call + + if ruby_version.build? + installer.fetch_unpack(ruby_version, build_ruby_path, true) + end + + installer.install(ruby_version, install_path) + + @outdated_version_check = LanguagePack::Helpers::OutdatedRubyVersion.new( + current_ruby_version: ruby_version, + fetcher: installer.fetcher + ) + @outdated_version_check.call + + @metadata.write("buildpack_ruby_version", ruby_version.version_for_download) + + topic "Using Ruby version: #{ruby_version.version_for_download}" + if !ruby_version.set + warn(<<~WARNING) + You have not declared a Ruby version in your Gemfile. + + To declare a Ruby version add this line to your Gemfile: + + ``` + ruby "#{LanguagePack::RubyVersion::DEFAULT_VERSION_NUMBER}" + ``` + + For more information see: + https://devcenter.heroku.com/articles/ruby-versions + WARNING + end + + if ruby_version.warn_ruby_26_bundler? + warn(<<~WARNING, inline: true) + There is a known bundler bug with your version of Ruby + + Your version of Ruby contains a problem with the built-in integration of bundler. If + you encounter a bundler error you need to upgrade your Ruby version. We suggest you upgrade to: + + #{@outdated_version_check.suggested_ruby_minor_version} + + For more information see: + https://devcenter.heroku.com/articles/bundler-version#known-upgrade-issues + WARNING + end + end + + true + rescue LanguagePack::Fetcher::FetchError + if @ruby_download_check.does_not_exist? + message = <<~ERROR + The Ruby version you are trying to install does not exist: #{ruby_version.version_for_download} + ERROR + else + message = <<~ERROR + The Ruby version you are trying to install does not exist on this stack. + + You are trying to install #{ruby_version.version_for_download} on #{stack}. + + Ruby #{ruby_version.version_for_download} is present on the following stacks: + + - #{@ruby_download_check.valid_stack_list.join("\n - ")} + ERROR + + if env("CI") + message << <<~ERROR + + On Heroku CI you can set your stack in the `app.json`. For example: + + ``` + "stack": "heroku-20" + ``` + ERROR + end + end + + message << <<~ERROR + + Heroku recommends you use the latest supported Ruby version listed here: + https://devcenter.heroku.com/articles/ruby-support#supported-runtimes + + For more information on syntax for declaring a Ruby version see: + https://devcenter.heroku.com/articles/ruby-versions + ERROR + + error message + end + + # TODO make this compatible with CNB + def new_app? + @new_app ||= !File.exist?("vendor/heroku") + end + + # find the ruby install path for its binstubs during build + # @return [String] resulting path or empty string if ruby is not vendored + def ruby_install_binstub_path(ruby_layer_path = ".") + @ruby_install_binstub_path ||= + if ruby_version.build? + "#{build_ruby_path}/bin" + elsif ruby_version + "#{ruby_layer_path}/#{slug_vendor_ruby}/bin" + else + "" + end + end + + # setup the environment so we can use the vendored ruby + def setup_ruby_install_env(ruby_layer_path = ".") + instrument 'ruby.setup_ruby_install_env' do + ENV["PATH"] = "#{File.expand_path(ruby_install_binstub_path(ruby_layer_path))}:#{ENV["PATH"]}" + end + end + + # installs vendored gems into the slug + def install_bundler_in_app(bundler_dir) + instrument 'ruby.install_language_pack_gems' do + FileUtils.mkdir_p(bundler_dir) + Dir.chdir(bundler_dir) do |dir| + `cp -R #{bundler.bundler_path}/. .` + end + + # write bundler shim, so we can control the version bundler used + # Ruby 2.6.0 started vendoring bundler + write_bundler_shim("vendor/bundle/bin") if ruby_version.vendored_bundler? + end + end + + # default set of binaries to install + # @return [Array] resulting list + def binaries + add_node_js_binary + add_yarn_binary + end + + # vendors binaries into the slug + def install_binaries + instrument 'ruby.install_binaries' do + binaries.each {|binary| install_binary(binary) } + Dir["bin/*"].each {|path| run("chmod +x #{path}") } + end + end + + # vendors individual binary into the slug + # @param [String] name of the binary package from S3. + # Example: https://s3.amazonaws.com/language-pack-ruby/node-0.4.7.tgz, where name is "node-0.4.7" + def install_binary(name) + topic "Installing #{name}" + bin_dir = "bin" + FileUtils.mkdir_p bin_dir + Dir.chdir(bin_dir) do |dir| + if name.match(/^node\-/) + @node_installer.install + # need to set PATH here b/c `node-gyp` can change the CWD, but still depends on executing node. + # the current PATH is relative, but it needs to be absolute for this. + # doing this here also prevents it from being exported during runtime + node_bin_path = File.absolute_path(".") + # this needs to be set after so other binaries in bin/ don't take precedence" + ENV["PATH"] = "#{ENV["PATH"]}:#{node_bin_path}" + elsif name.match(/^yarn\-/) + FileUtils.mkdir_p("../vendor") + Dir.chdir("../vendor") do |vendor_dir| + @yarn_installer.install + yarn_path = File.absolute_path("#{vendor_dir}/#{@yarn_installer.binary_path}") + ENV["PATH"] = "#{yarn_path}:#{ENV["PATH"]}" + end + else + @fetchers[:buildpack].fetch_untar("#{name}.tgz") + end + end + end + + # removes a binary from the slug + # @param [String] relative path of the binary on the slug + def uninstall_binary(path) + FileUtils.rm File.join('bin', File.basename(path)), :force => true + end + + def load_default_cache? + new_app? && ruby_version.default? + end + + # loads a default bundler cache for new apps to speed up initial bundle installs + def load_default_cache + instrument "ruby.load_default_cache" do + if false # load_default_cache? + puts "New app detected loading default bundler cache" + patchlevel = run("ruby -e 'puts RUBY_PATCHLEVEL'").strip + cache_name = "#{LanguagePack::RubyVersion::DEFAULT_VERSION}-p#{patchlevel}-default-cache" + @fetchers[:buildpack].fetch_untar("#{cache_name}.tgz") + end + end + end + + # remove `vendor/bundle` that comes from the git repo + # in case there are native ext. + # users should be using `bundle pack` instead. + # https://github.com/heroku/heroku-buildpack-ruby/issues/21 + def remove_vendor_bundle + if File.exists?("vendor/bundle") + warn(<<-WARNING) +Removing `vendor/bundle`. +Checking in `vendor/bundle` is not supported. Please remove this directory +and add it to your .gitignore. To vendor your gems with Bundler, use +`bundle pack` instead. +WARNING + FileUtils.rm_rf("vendor/bundle") + end + end + + def bundler_binstubs_path + "vendor/bundle/bin" + end + + def bundler_path + @bundler_path ||= "#{slug_vendor_base}/gems/#{bundler.dir_name}" + end + + def write_bundler_shim(path) + FileUtils.mkdir_p(path) + shim_path = "#{path}/bundle" + File.open(shim_path, "w") do |file| + file.print <<-BUNDLE +#!/usr/bin/env ruby +require 'rubygems' + +version = "#{bundler.version}" + +if ARGV.first + str = ARGV.first + str = str.dup.force_encoding("BINARY") if str.respond_to? :force_encoding + if str =~ /\A_(.*)_\z/ and Gem::Version.correct?($1) then + version = $1 + ARGV.shift + end +end + +if Gem.respond_to?(:activate_bin_path) +load Gem.activate_bin_path('bundler', 'bundle', version) +else +gem "bundler", version +load Gem.bin_path("bundler", "bundle", version) +end +BUNDLE + end + FileUtils.chmod(0755, shim_path) + end + + # runs bundler to install the dependencies + def build_bundler + instrument 'ruby.build_bundler' do + log("bundle") do + if File.exist?("#{Dir.pwd}/.bundle/config") + warn(<<~WARNING, inline: true) + You have the `.bundle/config` file checked into your repository + It contains local state like the location of the installed bundle + as well as configured git local gems, and other settings that should + not be shared between multiple checkouts of a single repo. Please + remove the `.bundle/` folder from your repo and add it to your `.gitignore` file. + + https://devcenter.heroku.com/articles/bundler-configuration + WARNING + end + + if bundler.windows_gemfile_lock? + log("bundle", "has_windows_gemfile_lock") + + File.unlink("Gemfile.lock") + ENV.delete("BUNDLE_DEPLOYMENT") + + warn(<<~WARNING, inline: true) + Removing `Gemfile.lock` because it was generated on Windows. + Bundler will do a full resolve so native gems are handled properly. + This may result in unexpected gem versions being used in your app. + In rare occasions Bundler may not be able to resolve your dependencies at all. + + https://devcenter.heroku.com/articles/bundler-windows-gemfile + WARNING + end + + bundle_command = String.new("") + bundle_command << "BUNDLE_WITHOUT='#{ENV["BUNDLE_WITHOUT"]}' " + bundle_command << "BUNDLE_PATH=#{ENV["BUNDLE_PATH"]} " + bundle_command << "BUNDLE_BIN=#{ENV["BUNDLE_BIN"]} " + bundle_command << "BUNDLE_DEPLOYMENT=#{ENV["BUNDLE_DEPLOYMENT"]} " if ENV["BUNDLE_DEPLOYMENT"] # Unset on windows since we delete the Gemfile.lock + bundle_command << "BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE=#{ENV["BUNDLE_GLOBAL_PATH_APPENDS_RUBY_SCOPE"]} " if bundler.needs_ruby_global_append_path? + bundle_command << "bundle install -j4" + + topic("Installing dependencies using bundler #{bundler.version}") + + bundler_output = String.new("") + bundle_time = nil + env_vars = {} + Dir.mktmpdir("libyaml-") do |tmpdir| + libyaml_dir = "#{tmpdir}/#{LIBYAML_PATH}" + + # need to setup compile environment for the psych gem + yaml_include = File.expand_path("#{libyaml_dir}/include").shellescape + yaml_lib = File.expand_path("#{libyaml_dir}/lib").shellescape + pwd = Dir.pwd + bundler_path = "#{pwd}/#{slug_vendor_base}/gems/#{bundler.dir_name}/lib" + + # we need to set BUNDLE_CONFIG and BUNDLE_GEMFILE for + # codon since it uses bundler. + env_vars["BUNDLE_GEMFILE"] = "#{pwd}/Gemfile" + env_vars["BUNDLE_CONFIG"] = "#{pwd}/.bundle/config" + env_vars["CPATH"] = noshellescape("#{yaml_include}:$CPATH") + env_vars["CPPATH"] = noshellescape("#{yaml_include}:$CPPATH") + env_vars["LIBRARY_PATH"] = noshellescape("#{yaml_lib}:$LIBRARY_PATH") + env_vars["RUBYOPT"] = syck_hack + env_vars["NOKOGIRI_USE_SYSTEM_LIBRARIES"] = "true" + env_vars["BUNDLE_DISABLE_VERSION_CHECK"] = "true" + env_vars["BUNDLER_LIB_PATH"] = "#{bundler_path}" if ruby_version.ruby_version == "1.8.7" + env_vars["BUNDLE_DISABLE_VERSION_CHECK"] = "true" + + puts "Running: #{bundle_command}" + instrument "ruby.bundle_install" do + bundle_time = Benchmark.realtime do + bundler_output << pipe("#{bundle_command} --no-clean", out: "2>&1", env: env_vars, user_env: true) + end + end + end + + if $?.success? + puts "Bundle completed (#{"%.2f" % bundle_time}s)" + log "bundle", :status => "success" + puts "Cleaning up the bundler cache." + instrument "ruby.bundle_clean" do + # Only show bundle clean output when not using default cache + if load_default_cache? + run("bundle clean > /dev/null", user_env: true, env: env_vars) + else + pipe("bundle clean", out: "2> /dev/null", user_env: true, env: env_vars) + end + end + @bundler_cache.store + + # Keep gem cache out of the slug + FileUtils.rm_rf("#{slug_vendor_base}/cache") + else + mcount "fail.bundle.install" + log "bundle", :status => "failure" + error_message = "Failed to install gems via Bundler." + puts "Bundler Output: #{bundler_output}" + if bundler_output.match(/An error occurred while installing sqlite3/) + mcount "fail.sqlite3" + error_message += <<~ERROR + + Detected sqlite3 gem which is not supported on Heroku: + https://devcenter.heroku.com/articles/sqlite3 + ERROR + end + + if bundler_output.match(/but your Gemfile specified/) + mcount "fail.ruby_version_mismatch" + error_message += <<~ERROR + + Detected a mismatch between your Ruby version installed and + Ruby version specified in Gemfile or Gemfile.lock. You can + correct this by running: + + $ bundle update --ruby + $ git add Gemfile.lock + $ git commit -m "update ruby version" + + If this does not solve the issue please see this documentation: + + https://devcenter.heroku.com/articles/ruby-versions#your-ruby-version-is-x-but-your-gemfile-specified-y + ERROR + end + + error error_message + end + end + end + end + + def post_bundler + instrument "ruby.post_bundler" do + Dir[File.join(slug_vendor_base, "**", ".git")].each do |dir| + FileUtils.rm_rf(dir) + end + bundler.clean + end + end + + # RUBYOPT line that requires syck_hack file + # @return [String] require string if needed or else an empty string + def syck_hack + instrument "ruby.syck_hack" do + syck_hack_file = File.expand_path(File.join(File.dirname(__FILE__), "../../vendor/syck_hack")) + rv = run_stdout('ruby -e "puts RUBY_VERSION"').strip + # < 1.9.3 includes syck, so we need to use the syck hack + if Gem::Version.new(rv) < Gem::Version.new("1.9.3") + "-r#{syck_hack_file}" + else + "" + end + end + end + + # writes ERB based database.yml for Rails. The database.yml uses the DATABASE_URL from the environment during runtime. + def create_database_yml + instrument 'ruby.create_database_yml' do + return false unless File.directory?("config") + return false if bundler.has_gem?('activerecord') && bundler.gem_version('activerecord') >= Gem::Version.new('4.1.0.beta1') + + log("create_database_yml") do + topic("Writing config/database.yml to read from DATABASE_URL") + File.open("config/database.yml", "w") do |file| + file.puts <<-DATABASE_YML +<% + +require 'cgi' +require 'uri' + +begin + uri = URI.parse(ENV["DATABASE_URL"]) +rescue URI::InvalidURIError + raise "Invalid DATABASE_URL" +end + +raise "No RACK_ENV or RAILS_ENV found" unless ENV["RAILS_ENV"] || ENV["RACK_ENV"] + +def attribute(name, value, force_string = false) + if value + value_string = + if force_string + '"' + value + '"' + else + value + end + "\#{name}: \#{value_string}" + else + "" + end +end + +adapter = uri.scheme +adapter = "postgresql" if adapter == "postgres" + +database = (uri.path || "").split("/")[1] + +username = uri.user +password = uri.password + +host = uri.host +port = uri.port + +params = CGI.parse(uri.query || "") + +%> + +<%= ENV["RAILS_ENV"] || ENV["RACK_ENV"] %>: + <%= attribute "adapter", adapter %> + <%= attribute "database", database %> + <%= attribute "username", username %> + <%= attribute "password", password, true %> + <%= attribute "host", host %> + <%= attribute "port", port %> + +<% params.each do |key, value| %> + <%= key %>: <%= value.first %> +<% end %> + DATABASE_YML + end + end + end + end + + def rake + @rake ||= begin + rake_gem_available = bundler.has_gem?("rake") || ruby_version.rake_is_vendored? + raise_on_fail = bundler.gem_version('railties') && bundler.gem_version('railties') > Gem::Version.new('3.x') + + topic "Detecting rake tasks" + rake = LanguagePack::Helpers::RakeRunner.new(rake_gem_available) + rake.load_rake_tasks!({ env: rake_env }, raise_on_fail) + rake + end + end + + def rake_env + if database_url + { "DATABASE_URL" => database_url } + else + {} + end.merge(user_env_hash) + end + + def database_url + env("DATABASE_URL") if env("DATABASE_URL") + end + + # executes the block with GIT_DIR environment variable removed since it can mess with the current working directory git thinks it's in + # @param [block] block to be executed in the GIT_DIR free context + def allow_git(&blk) + git_dir = ENV.delete("GIT_DIR") # can mess with bundler + blk.call + ENV["GIT_DIR"] = git_dir + end + + # decides if we need to enable the dev database addon + # @return [Array] the database addon if the pg gem is detected or an empty Array if it isn't. + def add_dev_database_addon + pg_adapters.any? {|a| bundler.has_gem?(a) } ? ['heroku-postgresql'] : [] + end + + def pg_adapters + [ + "pg", + "activerecord-jdbcpostgresql-adapter", + "jdbc-postgres", + "jdbc-postgresql", + "jruby-pg", + "rjack-jdbc-postgres", + "tgbyte-activerecord-jdbcpostgresql-adapter" + ] + end + + # decides if we need to install the node.js binary + # @note execjs will blow up if no JS RUNTIME is detected and is loaded. + # @return [Array] the node.js binary path if we need it or an empty Array + def add_node_js_binary + return [] if node_js_preinstalled? + + if Pathname(build_path).join("package.json").exist? || + bundler.has_gem?('execjs') || + bundler.has_gem?('webpacker') + [@node_installer.binary_path] + else + [] + end + end + + def add_yarn_binary + return [] if yarn_preinstalled? +| + if Pathname(build_path).join("yarn.lock").exist? || bundler.has_gem?('webpacker') + [@yarn_installer.name] + else + [] + end + end + + def has_yarn_binary? + add_yarn_binary.any? + end + + # checks if node.js is installed via the official heroku-buildpack-nodejs using multibuildpack + # @return String if it's detected and false if it isn't + def node_preinstall_bin_path + return @node_preinstall_bin_path if defined?(@node_preinstall_bin_path) + + legacy_path = "#{Dir.pwd}/#{NODE_BP_PATH}" + path = run("which node").strip + if path && $?.success? + @node_preinstall_bin_path = path + elsif run("#{legacy_path}/node -v") && $?.success? + @node_preinstall_bin_path = legacy_path + else + @node_preinstall_bin_path = false + end + end + alias :node_js_preinstalled? :node_preinstall_bin_path + + def node_not_preinstalled? + !node_js_preinstalled? + end + + # Example: tmp/build_8523f77fb96a956101d00988dfeed9d4/.heroku/yarn/bin/ (without the `yarn` at the end) + def yarn_preinstall_bin_path + (yarn_preinstall_binary_path || "").chomp("/yarn") + end + + # Example `tmp/build_8523f77fb96a956101d00988dfeed9d4/.heroku/yarn/bin/yarn` + def yarn_preinstall_binary_path + return @yarn_preinstall_binary_path if defined?(@yarn_preinstall_binary_path) + + path = run("which yarn").strip + if path && $?.success? + @yarn_preinstall_binary_path = path + else + @yarn_preinstall_binary_path = false + end + end + + def yarn_preinstalled? + yarn_preinstall_binary_path + end + + def yarn_not_preinstalled? + !yarn_preinstalled? + end + + def run_assets_precompile_rake_task + instrument 'ruby.run_assets_precompile_rake_task' do + + precompile = rake.task("assets:precompile") + return true unless precompile.is_defined? + + topic "Precompiling assets" + precompile.invoke(env: rake_env) + if precompile.success? + puts "Asset precompilation completed (#{"%.2f" % precompile.time}s)" + else + precompile_fail(precompile.output) + end + end + end + + def precompile_fail(output) + mcount "fail.assets_precompile" + log "assets_precompile", :status => "failure" + msg = "Precompiling assets failed.\n" + if output.match(/(127\.0\.0\.1)|(org\.postgresql\.util)/) + msg << "Attempted to access a nonexistent database:\n" + msg << "https://devcenter.heroku.com/articles/pre-provision-database\n" + end + + sprockets_version = bundler.gem_version('sprockets') + if output.match(/Sprockets::FileNotFound/) && (sprockets_version < Gem::Version.new('4.0.0.beta7') && sprockets_version > Gem::Version.new('4.0.0.beta4')) + mcount "fail.assets_precompile.file_not_found_beta" + msg << "If you have this file in your project\n" + msg << "try upgrading to Sprockets 4.0.0.beta7 or later:\n" + msg << "https://github.com/rails/sprockets/pull/547\n" + end + + error msg + end + + def bundler_cache + "vendor/bundle" + end + + def valid_bundler_cache?(path, metadata) + full_ruby_version = run_stdout(%q(ruby -v)).strip + rubygems_version = run_stdout(%q(gem -v)).strip + old_rubygems_version = nil + + old_rubygems_version = metadata[:ruby_version] + old_stack = metadata[:stack] + old_stack ||= DEFAULT_LEGACY_STACK + + stack_change = old_stack != @stack + if !new_app? && stack_change + return [false, "Purging Cache. Changing stack from #{old_stack} to #{@stack}"] + end + + # fix bug from v37 deploy + if File.exists?("#{path}/vendor/ruby_version") + puts "Broken cache detected. Purging build cache." + cache.clear("vendor") + FileUtils.rm_rf("#{path}/vendor/ruby_version") + return [false, "Broken cache detected. Purging build cache."] + # fix bug introduced in v38 + elsif !metadata.include?(:buildpack_version) && metadata.include?(:ruby_version) + puts "Broken cache detected. Purging build cache." + return [false, "Broken cache detected. Purging build cache."] + elsif (@bundler_cache.exists? || @bundler_cache.old?) && full_ruby_version != metadata[:ruby_version] + return [false, <<-MESSAGE] +Ruby version change detected. Clearing bundler cache. +Old: #{metadata[:ruby_version]} +New: #{full_ruby_version} +MESSAGE + end + + # fix git gemspec bug from Bundler 1.3.0+ upgrade + if File.exists?(bundler_cache) && !metadata.include?(:bundler_version) && !run("find #{path}/vendor/bundle/*/*/bundler/gems/*/ -name *.gemspec").include?("No such file or directory") + return [false, "Old bundler cache detected. Clearing bundler cache."] + end + + # fix for https://github.com/heroku/heroku-buildpack-ruby/issues/86 + if (!metadata.include?(:rubygems_version) || + (old_rubygems_version == "2.0.0" && old_rubygems_version != rubygems_version)) && + metadata.include?(:ruby_version) && metadata[:ruby_version].strip.include?("ruby 2.0.0p0") + return [false, "Updating to rubygems #{rubygems_version}. Clearing bundler cache."] + end + + # fix for https://github.com/sparklemotion/nokogiri/issues/923 + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 76 + return [false, <<-MESSAGE] +Fixing nokogiri install. Clearing bundler cache. +See https://github.com/sparklemotion/nokogiri/issues/923. +MESSAGE + end + + # recompile nokogiri to use new libyaml + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 99 && bundler.has_gem?("psych") + return [false, <<-MESSAGE] +Need to recompile psych for CVE-2013-6393. Clearing bundler cache. +See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=737076. +MESSAGE + end + + # recompile gems for libyaml 0.1.7 update + if metadata.include?(:buildpack_version) && (bv = metadata[:buildpack_version].sub('v', '').to_i) && bv != 0 && bv <= 147 && + (metadata.include?(:ruby_version) && metadata[:ruby_version].match(/ruby 2\.1\.(9|10)/) || + bundler.has_gem?("psych") + ) + return [false, <<-MESSAGE] +Need to recompile gems for CVE-2014-2014-9130. Clearing bundler cache. +See https://devcenter.heroku.com/changelog-items/1016. +MESSAGE + end + + true + end + + def load_bundler_cache + instrument "ruby.load_bundler_cache" do + cache.load "vendor" + + full_ruby_version = run_stdout(%q(ruby -v)).strip + rubygems_version = run_stdout(%q(gem -v)).strip + heroku_metadata = "vendor/heroku" + old_rubygems_version = nil + ruby_version_cache = "ruby_version" + buildpack_version_cache = "buildpack_version" + bundler_version_cache = "bundler_version" + rubygems_version_cache = "rubygems_version" + stack_cache = "stack" + + # bundle clean does not remove binstubs + FileUtils.rm_rf("vendor/bundler/bin") + + old_rubygems_version = @metadata.read(ruby_version_cache).strip if @metadata.exists?(ruby_version_cache) + old_stack = @metadata.read(stack_cache).strip if @metadata.exists?(stack_cache) + old_stack ||= DEFAULT_LEGACY_STACK + + stack_change = old_stack != @stack + convert_stack = @bundler_cache.old? + @bundler_cache.convert_stack(stack_change) if convert_stack + if !new_app? && stack_change + puts "Purging Cache. Changing stack from #{old_stack} to #{@stack}" + purge_bundler_cache(old_stack) + elsif !new_app? && !convert_stack + @bundler_cache.load + end + + # fix bug from v37 deploy + if File.exists?("vendor/ruby_version") + puts "Broken cache detected. Purging build cache." + cache.clear("vendor") + FileUtils.rm_rf("vendor/ruby_version") + purge_bundler_cache + # fix bug introduced in v38 + elsif !@metadata.include?(buildpack_version_cache) && @metadata.exists?(ruby_version_cache) + puts "Broken cache detected. Purging build cache." + purge_bundler_cache + elsif (@bundler_cache.exists? || @bundler_cache.old?) && @metadata.exists?(ruby_version_cache) && full_ruby_version != @metadata.read(ruby_version_cache).strip + puts "Ruby version change detected. Clearing bundler cache." + puts "Old: #{@metadata.read(ruby_version_cache).strip}" + puts "New: #{full_ruby_version}" + purge_bundler_cache + end + + # fix git gemspec bug from Bundler 1.3.0+ upgrade + if File.exists?(bundler_cache) && !@metadata.exists?(bundler_version_cache) && !run("find vendor/bundle/*/*/bundler/gems/*/ -name *.gemspec").include?("No such file or directory") + puts "Old bundler cache detected. Clearing bundler cache." + purge_bundler_cache + end + + # fix for https://github.com/heroku/heroku-buildpack-ruby/issues/86 + if (!@metadata.exists?(rubygems_version_cache) || + (old_rubygems_version == "2.0.0" && old_rubygems_version != rubygems_version)) && + @metadata.exists?(ruby_version_cache) && @metadata.read(ruby_version_cache).strip.include?("ruby 2.0.0p0") + puts "Updating to rubygems #{rubygems_version}. Clearing bundler cache." + purge_bundler_cache + end + + # fix for https://github.com/sparklemotion/nokogiri/issues/923 + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 76 + puts "Fixing nokogiri install. Clearing bundler cache." + puts "See https://github.com/sparklemotion/nokogiri/issues/923." + purge_bundler_cache + end + + # recompile nokogiri to use new libyaml + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 99 && bundler.has_gem?("psych") + puts "Need to recompile psych for CVE-2013-6393. Clearing bundler cache." + puts "See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=737076." + purge_bundler_cache + end + + # recompile gems for libyaml 0.1.7 update + if @metadata.exists?(buildpack_version_cache) && (bv = @metadata.read(buildpack_version_cache).sub('v', '').to_i) && bv != 0 && bv <= 147 && + (@metadata.exists?(ruby_version_cache) && @metadata.read(ruby_version_cache).strip.match(/ruby 2\.1\.(9|10)/) || + bundler.has_gem?("psych") + ) + puts "Need to recompile gems for CVE-2014-2014-9130. Clearing bundler cache." + puts "See https://devcenter.heroku.com/changelog-items/1016." + purge_bundler_cache + end + + FileUtils.mkdir_p(heroku_metadata) + @metadata.write(ruby_version_cache, full_ruby_version, false) + @metadata.write(buildpack_version_cache, BUILDPACK_VERSION, false) + @metadata.write(bundler_version_cache, bundler.version, false) + @metadata.write(rubygems_version_cache, rubygems_version, false) + @metadata.write(stack_cache, @stack, false) + @metadata.save + end + end + + def purge_bundler_cache(stack = nil) + instrument "ruby.purge_bundler_cache" do + @bundler_cache.clear(stack) + # need to reinstall language pack gems + install_bundler_in_app(slug_vendor_base) + end + end +end diff --git a/spec/syntax_suggest/fixtures/syntax_tree.rb.txt b/spec/syntax_suggest/fixtures/syntax_tree.rb.txt new file mode 100644 index 00000000000000..1c110783f97e57 --- /dev/null +++ b/spec/syntax_suggest/fixtures/syntax_tree.rb.txt @@ -0,0 +1,9234 @@ +# frozen_string_literal: true + +require 'ripper' +require_relative 'syntax_tree/version' + +class SyntaxTree < Ripper + # Represents a line in the source. If this class is being used, it means that + # every character in the string is 1 byte in length, so we can just return the + # start of the line + the index. + class SingleByteString + def initialize(start) + @start = start + end + + def [](byteindex) + @start + byteindex + end + end + + # Represents a line in the source. If this class is being used, it means that + # there are characters in the string that are multi-byte, so we will build up + # an array of indices, such that array[byteindex] will be equal to the index + # of the character within the string. + class MultiByteString + def initialize(start, line) + @indices = [] + + line + .each_char + .with_index(start) do |char, index| + char.bytesize.times { @indices << index } + end + end + + def [](byteindex) + @indices[byteindex] + end + end + + # Represents the location of a node in the tree from the source code. + class Location + attr_reader :start_line, :start_char, :end_line, :end_char + + def initialize(start_line:, start_char:, end_line:, end_char:) + @start_line = start_line + @start_char = start_char + @end_line = end_line + @end_char = end_char + end + + def ==(other) + other.is_a?(Location) && start_line == other.start_line && + start_char == other.start_char && end_line == other.end_line && + end_char == other.end_char + end + + def to(other) + Location.new( + start_line: start_line, + start_char: start_char, + end_line: other.end_line, + end_char: other.end_char + ) + end + + def to_json(*opts) + [start_line, start_char, end_line, end_char].to_json(*opts) + end + + def self.token(line:, char:, size:) + new( + start_line: line, + start_char: char, + end_line: line, + end_char: char + size + ) + end + + def self.fixed(line:, char:) + new(start_line: line, start_char: char, end_line: line, end_char: char) + end + end + + # A special parser error so that we can get nice syntax displays on the error + # message when prettier prints out the results. + class ParseError < StandardError + attr_reader :lineno, :column + + def initialize(error, lineno, column) + super(error) + @lineno = lineno + @column = column + end + end + + attr_reader :source, :lines, :tokens + + # This is an attr_accessor so Stmts objects can grab comments out of this + # array and attach them to themselves. + attr_accessor :comments + + def initialize(source, *) + super + + # We keep the source around so that we can refer back to it when we're + # generating the AST. Sometimes it's easier to just reference the source + # string when you want to check if it contains a certain character, for + # example. + @source = source + + # Similarly, we keep the lines of the source string around to be able to + # check if certain lines contain certain characters. For example, we'll use + # this to generate the content that goes after the __END__ keyword. Or we'll + # use this to check if a comment has other content on its line. + @lines = source.split("\n") + + # This is the full set of comments that have been found by the parser. It's + # a running list. At the end of every block of statements, they will go in + # and attempt to grab any comments that are on their own line and turn them + # into regular statements. So at the end of parsing the only comments left + # in here will be comments on lines that also contain code. + @comments = [] + + # This is the current embdoc (comments that start with =begin and end with + # =end). Since they can't be nested, there's no need for a stack here, as + # there can only be one active. These end up getting dumped into the + # comments list before getting picked up by the statements that surround + # them. + @embdoc = nil + + # This is an optional node that can be present if the __END__ keyword is + # used in the file. In that case, this will represent the content after that + # keyword. + @__end__ = nil + + # Heredocs can actually be nested together if you're using interpolation, so + # this is a stack of heredoc nodes that are currently being created. When we + # get to the token that finishes off a heredoc node, we pop the top + # one off. If there are others surrounding it, then the body events will now + # be added to the correct nodes. + @heredocs = [] + + # This is a running list of tokens that have fired. It's useful + # mostly for maintaining location information. For example, if you're inside + # the handle of a def event, then in order to determine where the AST node + # started, you need to look backward in the tokens to find a def + # keyword. Most of the time, when a parser event consumes one of these + # events, it will be deleted from the list. So ideally, this list stays + # pretty short over the course of parsing a source string. + @tokens = [] + + # Here we're going to build up a list of SingleByteString or MultiByteString + # objects. They're each going to represent a string in the source. They are + # used by the `char_pos` method to determine where we are in the source + # string. + @line_counts = [] + last_index = 0 + + @source.lines.each do |line| + if line.size == line.bytesize + @line_counts << SingleByteString.new(last_index) + else + @line_counts << MultiByteString.new(last_index, line) + end + + last_index += line.size + end + end + + def self.parse(source) + parser = new(source) + response = parser.parse + response unless parser.error? + end + + private + + # ---------------------------------------------------------------------------- + # :section: Helper methods + # The following methods are used by the ripper event handlers to either + # determine their bounds or query other nodes. + # ---------------------------------------------------------------------------- + + # This represents the current place in the source string that we've gotten to + # so far. We have a memoized line_counts object that we can use to get the + # number of characters that we've had to go through to get to the beginning of + # this line, then we add the number of columns into this line that we've gone + # through. + def char_pos + @line_counts[lineno - 1][column] + end + + # As we build up a list of tokens, we'll periodically need to go backwards and + # find the ones that we've already hit in order to determine the location + # information for nodes that use them. For example, if you have a module node + # then you'll look backward for a kw token to determine your start location. + # + # This works with nesting since we're deleting tokens from the list once + # they've been used up. For example if you had nested module declarations then + # the innermost declaration would grab the last kw node that matches "module" + # (which would happen to be the innermost keyword). Then the outer one would + # only be able to grab the first one. In this way all of the tokens act as + # their own stack. + def find_token(type, value = :any, consume: true) + index = + tokens.rindex do |token| + token.is_a?(type) && (value == :any || (token.value == value)) + end + + if consume + # If we're expecting to be able to find a token and consume it, + # but can't actually find it, then we need to raise an error. This is + # _usually_ caused by a syntax error in the source that we're printing. It + # could also be caused by accidentally attempting to consume a token twice + # by two different parser event handlers. + unless index + message = "Cannot find expected #{value == :any ? type : value}" + raise ParseError.new(message, lineno, column) + end + + tokens.delete_at(index) + elsif index + tokens[index] + end + end + + # A helper function to find a :: operator. We do special handling instead of + # using find_token here because we don't pop off all of the :: + # operators so you could end up getting the wrong information if you have for + # instance ::X::Y::Z. + def find_colon2_before(const) + index = + tokens.rindex do |token| + token.is_a?(Op) && token.value == '::' && + token.location.start_char < const.location.start_char + end + + tokens[index] + end + + # Finds the next position in the source string that begins a statement. This + # is used to bind statements lists and make sure they don't include a + # preceding comment. For example, we want the following comment to be attached + # to the class node and not the statement node: + # + # class Foo # :nodoc: + # ... + # end + # + # By finding the next non-space character, we can make sure that the bounds of + # the statement list are correct. + def find_next_statement_start(position) + remaining = source[position..-1] + + if remaining.sub(/\A +/, '')[0] == '#' + return position + remaining.index("\n") + end + + position + end + + # ---------------------------------------------------------------------------- + # :section: Ripper event handlers + # The following methods all handle a dispatched ripper event. + # ---------------------------------------------------------------------------- + + # BEGINBlock represents the use of the +BEGIN+ keyword, which hooks into the + # lifecycle of the interpreter. Whatever is inside the block will get executed + # when the program starts. + # + # BEGIN { + # } + # + # Interestingly, the BEGIN keyword doesn't allow the do and end keywords for + # the block. Only braces are permitted. + class BEGINBlock + # [LBrace] the left brace that is seen after the keyword + attr_reader :lbrace + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, statements:, location:) + @lbrace = lbrace + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('BEGIN') + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :BEGIN, + lbrace: lbrace, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_BEGIN: (Statements statements) -> BEGINBlock + def on_BEGIN(statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start(lbrace.location.end_char), + rbrace.location.start_char + ) + + keyword = find_token(Kw, 'BEGIN') + + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: keyword.location.to(rbrace.location) + ) + end + + # CHAR irepresents a single codepoint in the script encoding. + # + # ?a + # + # In the example above, the CHAR node represents the string literal "a". You + # can use control characters with this as well, as in ?\C-a. + class CHAR + # [String] the value of the character literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('CHAR') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :CHAR, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_CHAR: (String value) -> CHAR + def on_CHAR(value) + node = + CHAR.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # ENDBlock represents the use of the +END+ keyword, which hooks into the + # lifecycle of the interpreter. Whatever is inside the block will get executed + # when the program ends. + # + # END { + # } + # + # Interestingly, the END keyword doesn't allow the do and end keywords for the + # block. Only braces are permitted. + class ENDBlock + # [LBrace] the left brace that is seen after the keyword + attr_reader :lbrace + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, statements:, location:) + @lbrace = lbrace + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('END') + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :END, lbrace: lbrace, stmts: statements, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_END: (Statements statements) -> ENDBlock + def on_END(statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start(lbrace.location.end_char), + rbrace.location.start_char + ) + + keyword = find_token(Kw, 'END') + + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: keyword.location.to(rbrace.location) + ) + end + + # EndContent represents the use of __END__ syntax, which allows individual + # scripts to keep content after the main ruby code that can be read through + # the DATA constant. + # + # puts DATA.read + # + # __END__ + # some other content that is not executed by the program + # + class EndContent + # [String] the content after the script + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('__end__') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :__end__, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on___end__: (String value) -> EndContent + def on___end__(value) + @__end__ = + EndContent.new( + value: lines[lineno..-1].join("\n"), + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # Alias represents the use of the +alias+ keyword with regular arguments (not + # global variables). The +alias+ keyword is used to make a method respond to + # another name as well as the current one. + # + # alias aliased_name name + # + # For the example above, in the current context you can now call aliased_name + # and it will execute the name method. When you're aliasing two methods, you + # can either provide bare words (like the example above) or you can provide + # symbols (note that this includes dynamic symbols like + # :"left-#{middle}-right"). + class Alias + # [DynaSymbol | SymbolLiteral] the new name of the method + attr_reader :left + + # [DynaSymbol | SymbolLiteral] the old name of the method + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('alias') + q.breakable + q.pp(left) + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :alias, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_alias: ( + # (DynaSymbol | SymbolLiteral) left, + # (DynaSymbol | SymbolLiteral) right + # ) -> Alias + def on_alias(left, right) + keyword = find_token(Kw, 'alias') + + Alias.new( + left: left, + right: right, + location: keyword.location.to(right.location) + ) + end + + # ARef represents when you're pulling a value out of a collection at a + # specific index. Put another way, it's any time you're calling the method + # #[]. + # + # collection[index] + # + # The nodes usually contains two children, the collection and the index. In + # some cases, you don't necessarily have the second child node, because you + # can call procs with a pretty esoteric syntax. In the following example, you + # wouldn't have a second child node: + # + # collection[] + # + class ARef + # [untyped] the value being indexed + attr_reader :collection + + # [nil | Args | ArgsAddBlock] the value being passed within the brackets + attr_reader :index + + # [Location] the location of this node + attr_reader :location + + def initialize(collection:, index:, location:) + @collection = collection + @index = index + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aref') + q.breakable + q.pp(collection) + q.breakable + q.pp(index) + end + end + + def to_json(*opts) + { + type: :aref, + collection: collection, + index: index, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aref: (untyped collection, (nil | Args | ArgsAddBlock) index) -> ARef + def on_aref(collection, index) + find_token(LBracket) + rbracket = find_token(RBracket) + + ARef.new( + collection: collection, + index: index, + location: collection.location.to(rbracket.location) + ) + end + + # ARefField represents assigning values into collections at specific indices. + # Put another way, it's any time you're calling the method #[]=. The + # ARefField node itself is just the left side of the assignment, and they're + # always wrapped in assign nodes. + # + # collection[index] = value + # + class ARefField + # [untyped] the value being indexed + attr_reader :collection + + # [nil | ArgsAddBlock] the value being passed within the brackets + attr_reader :index + + # [Location] the location of this node + attr_reader :location + + def initialize(collection:, index:, location:) + @collection = collection + @index = index + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aref_field') + q.breakable + q.pp(collection) + q.breakable + q.pp(index) + end + end + + def to_json(*opts) + { + type: :aref_field, + collection: collection, + index: index, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aref_field: ( + # untyped collection, + # (nil | ArgsAddBlock) index + # ) -> ARefField + def on_aref_field(collection, index) + find_token(LBracket) + rbracket = find_token(RBracket) + + ARefField.new( + collection: collection, + index: index, + location: collection.location.to(rbracket.location) + ) + end + + # def on_arg_ambiguous(value) + # value + # end + + # ArgParen represents wrapping arguments to a method inside a set of + # parentheses. + # + # method(argument) + # + # In the example above, there would be an ArgParen node around the + # ArgsAddBlock node that represents the set of arguments being sent to the + # method method. The argument child node can be +nil+ if no arguments were + # passed, as in: + # + # method() + # + class ArgParen + # [nil | Args | ArgsAddBlock | ArgsForward] the arguments inside the + # parentheses + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('arg_paren') + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :arg_paren, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_arg_paren: ( + # (nil | Args | ArgsAddBlock | ArgsForward) arguments + # ) -> ArgParen + def on_arg_paren(arguments) + lparen = find_token(LParen) + rparen = find_token(RParen) + + # If the arguments exceed the ending of the parentheses, then we know we + # have a heredoc in the arguments, and we need to use the bounds of the + # arguments to determine how large the arg_paren is. + ending = + if arguments && arguments.location.end_line > rparen.location.end_line + arguments + else + rparen + end + + ArgParen.new( + arguments: arguments, + location: lparen.location.to(ending.location) + ) + end + + # Args represents a list of arguments being passed to a method call or array + # literal. + # + # method(first, second, third) + # + class Args + # [Array[ untyped ]] the arguments that this node wraps + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args') + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :args, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add: (Args arguments, untyped argument) -> Args + def on_args_add(arguments, argument) + if arguments.parts.empty? + # If this is the first argument being passed into the list of arguments, + # then we're going to use the bounds of the argument to override the + # parent node's location since this will be more accurate. + Args.new(parts: [argument], location: argument.location) + else + # Otherwise we're going to update the existing list with the argument + # being added as well as the new end bounds. + Args.new( + parts: arguments.parts << argument, + location: arguments.location.to(argument.location) + ) + end + end + + # ArgsAddBlock represents a list of arguments and potentially a block + # argument. ArgsAddBlock is commonly seen being passed to any method where you + # use parentheses (wrapped in an ArgParen node). It’s also used to pass + # arguments to the various control-flow keywords like +return+. + # + # method(argument, &block) + # + class ArgsAddBlock + # [Args] the arguments before the optional block + attr_reader :arguments + + # [nil | untyped] the optional block argument + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, block:, location:) + @arguments = arguments + @block = block + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args_add_block') + q.breakable + q.pp(arguments) + q.breakable + q.pp(block) + end + end + + def to_json(*opts) + { + type: :args_add_block, + args: arguments, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add_block: ( + # Args arguments, + # (false | untyped) block + # ) -> ArgsAddBlock + def on_args_add_block(arguments, block) + ending = block || arguments + + ArgsAddBlock.new( + arguments: arguments, + block: block || nil, + location: arguments.location.to(ending.location) + ) + end + + # Star represents using a splat operator on an expression. + # + # method(*arguments) + # + class ArgStar + # [untyped] the expression being splatted + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('arg_star') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :arg_star, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_add_star: (Args arguments, untyped star) -> Args + def on_args_add_star(arguments, argument) + beginning = find_token(Op, '*') + ending = argument || beginning + + location = + if arguments.parts.empty? + ending.location + else + arguments.location.to(ending.location) + end + + arg_star = + ArgStar.new( + value: argument, + location: beginning.location.to(ending.location) + ) + + Args.new(parts: arguments.parts << arg_star, location: location) + end + + # ArgsForward represents forwarding all kinds of arguments onto another method + # call. + # + # def request(method, path, **headers, &block); end + # + # def get(...) + # request(:GET, ...) + # end + # + # def post(...) + # request(:POST, ...) + # end + # + # In the example above, both the get and post methods are forwarding all of + # their arguments (positional, keyword, and block) on to the request method. + # The ArgsForward node appears in both the caller (the request method calls) + # and the callee (the get and post definitions). + class ArgsForward + # [String] the value of the operator + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('args_forward') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :args_forward, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_args_forward: () -> ArgsForward + def on_args_forward + op = find_token(Op, '...') + + ArgsForward.new(value: op.value, location: op.location) + end + + # :call-seq: + # on_args_new: () -> Args + def on_args_new + Args.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # ArrayLiteral represents any form of an array literal, and contains myriad + # child nodes because of the special array literal syntax like %w and %i. + # + # [] + # [one, two, three] + # [*one_two_three] + # %i[one two three] + # %w[one two three] + # %I[one two three] + # %W[one two three] + # + # Every line in the example above produces an ArrayLiteral node. In order, the + # child contents node of this ArrayLiteral node would be nil, Args, QSymbols, + # QWords, Symbols, and Words. + class ArrayLiteral + # [nil | Args | QSymbols | QWords | Symbols | Words] the + # contents of the array + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('array') + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :array, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_array: ( + # (nil | Args | QSymbols | QWords | Symbols | Words) contents + # ) -> ArrayLiteral + def on_array(contents) + if !contents || contents.is_a?(Args) + lbracket = find_token(LBracket) + rbracket = find_token(RBracket) + + ArrayLiteral.new( + contents: contents, + location: lbracket.location.to(rbracket.location) + ) + else + tstring_end = find_token(TStringEnd) + contents = + contents.class.new( + elements: contents.elements, + location: contents.location.to(tstring_end.location) + ) + + ArrayLiteral.new(contents: contents, location: contents.location) + end + end + + # AryPtn represents matching against an array pattern using the Ruby 2.7+ + # pattern matching syntax. It’s one of the more complicated nodes, because + # the four parameters that it accepts can almost all be nil. + # + # case [1, 2, 3] + # in [Integer, Integer] + # "matched" + # in Container[Integer, Integer] + # "matched" + # in [Integer, *, Integer] + # "matched" + # end + # + # An AryPtn node is created with four parameters: an optional constant + # wrapper, an array of positional matches, an optional splat with identifier, + # and an optional array of positional matches that occur after the splat. + # All of the in clauses above would create an AryPtn node. + class AryPtn + # [nil | VarRef] the optional constant wrapper + attr_reader :constant + + # [Array[ untyped ]] the regular positional arguments that this array + # pattern is matching against + attr_reader :requireds + + # [nil | VarField] the optional starred identifier that grabs up a list of + # positional arguments + attr_reader :rest + + # [Array[ untyped ]] the list of positional arguments occurring after the + # optional star if there is one + attr_reader :posts + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, requireds:, rest:, posts:, location:) + @constant = constant + @requireds = requireds + @rest = rest + @posts = posts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('aryptn') + + if constant + q.breakable + q.pp(constant) + end + + if requireds.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(requireds) { |required| q.pp(required) } + end + end + + if rest + q.breakable + q.pp(rest) + end + + if posts.any? + q.breakable + q.group(2, '(', ')') { q.seplist(posts) { |post| q.pp(post) } } + end + end + end + + def to_json(*opts) + { + type: :aryptn, + constant: constant, + reqs: requireds, + rest: rest, + posts: posts, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_aryptn: ( + # (nil | VarRef) constant, + # (nil | Array[untyped]) requireds, + # (nil | VarField) rest, + # (nil | Array[untyped]) posts + # ) -> AryPtn + def on_aryptn(constant, requireds, rest, posts) + parts = [constant, *requireds, rest, *posts].compact + + AryPtn.new( + constant: constant, + requireds: requireds || [], + rest: rest, + posts: posts || [], + location: parts[0].location.to(parts[-1].location) + ) + end + + # Assign represents assigning something to a variable or constant. Generally, + # the left side of the assignment is going to be any node that ends with the + # name "Field". + # + # variable = value + # + class Assign + # [ARefField | ConstPathField | Field | TopConstField | VarField] the target + # to assign the result of the expression to + attr_reader :target + + # [untyped] the expression to be assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, value:, location:) + @target = target + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assign') + q.breakable + q.pp(target) + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assign, target: target, value: value, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_assign: ( + # (ARefField | ConstPathField | Field | TopConstField | VarField) target, + # untyped value + # ) -> Assign + def on_assign(target, value) + Assign.new( + target: target, + value: value, + location: target.location.to(value.location) + ) + end + + # Assoc represents a key-value pair within a hash. It is a child node of + # either an AssocListFromArgs or a BareAssocHash. + # + # { key1: value1, key2: value2 } + # + # In the above example, the would be two AssocNew nodes. + class Assoc + # [untyped] the key of this pair + attr_reader :key + + # [untyped] the value of this pair + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(key:, value:, location:) + @key = key + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoc') + q.breakable + q.pp(key) + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assoc, key: key, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_assoc_new: (untyped key, untyped value) -> Assoc + def on_assoc_new(key, value) + Assoc.new( + key: key, + value: value, + location: key.location.to(value.location) + ) + end + + # AssocSplat represents double-splatting a value into a hash (either a hash + # literal or a bare hash in a method call). + # + # { **pairs } + # + class AssocSplat + # [untyped] the expression that is being splatted + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoc_splat') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :assoc_splat, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_assoc_splat: (untyped value) -> AssocSplat + def on_assoc_splat(value) + operator = find_token(Op, '**') + + AssocSplat.new(value: value, location: operator.location.to(value.location)) + end + + # AssocListFromArgs represents the key-value pairs of a hash literal. Its + # parent node is always a hash. + # + # { key1: value1, key2: value2 } + # + class AssocListFromArgs + # [Array[ AssocNew | AssocSplat ]] + attr_reader :assocs + + # [Location] the location of this node + attr_reader :location + + def initialize(assocs:, location:) + @assocs = assocs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('assoclist_from_args') + q.breakable + q.group(2, '(', ')') { q.seplist(assocs) { |assoc| q.pp(assoc) } } + end + end + + def to_json(*opts) + { type: :assoclist_from_args, assocs: assocs, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_assoclist_from_args: ( + # Array[AssocNew | AssocSplat] assocs + # ) -> AssocListFromArgs + def on_assoclist_from_args(assocs) + AssocListFromArgs.new( + assocs: assocs, + location: assocs[0].location.to(assocs[-1].location) + ) + end + + # Backref represents a global variable referencing a matched value. It comes + # in the form of a $ followed by a positive integer. + # + # $1 + # + class Backref + # [String] the name of the global backreference variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('backref') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :backref, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_backref: (String value) -> Backref + def on_backref(value) + node = + Backref.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Backtick represents the use of the ` operator. It's usually found being used + # for an XStringLiteral, but could also be found as the name of a method being + # defined. + class Backtick + # [String] the backtick in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('backtick') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :backtick, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_backtick: (String value) -> Backtick + def on_backtick(value) + node = + Backtick.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # BareAssocHash represents a hash of contents being passed as a method + # argument (and therefore has omitted braces). It's very similar to an + # AssocListFromArgs node. + # + # method(key1: value1, key2: value2) + # + class BareAssocHash + # [Array[ AssocNew | AssocSplat ]] + attr_reader :assocs + + # [Location] the location of this node + attr_reader :location + + def initialize(assocs:, location:) + @assocs = assocs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('bare_assoc_hash') + q.breakable + q.group(2, '(', ')') { q.seplist(assocs) { |assoc| q.pp(assoc) } } + end + end + + def to_json(*opts) + { type: :bare_assoc_hash, assocs: assocs, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_bare_assoc_hash: (Array[AssocNew | AssocSplat] assocs) -> BareAssocHash + def on_bare_assoc_hash(assocs) + BareAssocHash.new( + assocs: assocs, + location: assocs[0].location.to(assocs[-1].location) + ) + end + + # Begin represents a begin..end chain. + # + # begin + # value + # end + # + class Begin + # [BodyStmt] the bodystmt that contains the contents of this begin block + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(bodystmt:, location:) + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('begin') + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { type: :begin, bodystmt: bodystmt, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_begin: (BodyStmt bodystmt) -> Begin + def on_begin(bodystmt) + keyword = find_token(Kw, 'begin') + end_char = + if bodystmt.rescue_clause || bodystmt.ensure_clause || + bodystmt.else_clause + bodystmt.location.end_char + else + find_token(Kw, 'end').location.end_char + end + + bodystmt.bind(keyword.location.end_char, end_char) + + Begin.new( + bodystmt: bodystmt, + location: keyword.location.to(bodystmt.location) + ) + end + + # Binary represents any expression that involves two sub-expressions with an + # operator in between. This can be something that looks like a mathematical + # operation: + # + # 1 + 1 + # + # but can also be something like pushing a value onto an array: + # + # array << value + # + class Binary + # [untyped] the left-hand side of the expression + attr_reader :left + + # [String] the operator used between the two expressions + attr_reader :operator + + # [untyped] the right-hand side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, operator:, right:, location:) + @left = left + @operator = operator + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('binary') + q.breakable + q.pp(left) + q.breakable + q.text(operator) + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { + type: :binary, + left: left, + op: operator, + right: right, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_binary: (untyped left, (Op | Symbol) operator, untyped right) -> Binary + def on_binary(left, operator, right) + # On most Ruby implementations, operator is a Symbol that represents that + # operation being performed. For instance in the example `1 < 2`, the + # `operator` object would be `:<`. However, on JRuby, it's an `@op` node, + # so here we're going to explicitly convert it into the same normalized + # form. + operator = tokens.delete(operator).value unless operator.is_a?(Symbol) + + Binary.new( + left: left, + operator: operator, + right: right, + location: left.location.to(right.location) + ) + end + + # BlockVar represents the parameters being declared for a block. Effectively + # this node is everything contained within the pipes. This includes all of the + # various parameter types, as well as block-local variable declarations. + # + # method do |positional, optional = value, keyword:, █ local| + # end + # + class BlockVar + # [Params] the parameters being declared with the block + attr_reader :params + + # [Array[ Ident ]] the list of block-local variable declarations + attr_reader :locals + + # [Location] the location of this node + attr_reader :location + + def initialize(params:, locals:, location:) + @params = params + @locals = locals + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('block_var') + q.breakable + q.pp(params) + + if locals.any? + q.breakable + q.group(2, '(', ')') { q.seplist(locals) { |local| q.pp(local) } } + end + end + end + + def to_json(*opts) + { + type: :block_var, + params: params, + locals: locals, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar + def on_block_var(params, locals) + index = + tokens.rindex do |node| + node.is_a?(Op) && %w[| ||].include?(node.value) && + node.location.start_char < params.location.start_char + end + + beginning = tokens[index] + ending = tokens[-1] + + BlockVar.new( + params: params, + locals: locals || [], + location: beginning.location.to(ending.location) + ) + end + + # BlockArg represents declaring a block parameter on a method definition. + # + # def method(&block); end + # + class BlockArg + # [Ident] the name of the block argument + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('blockarg') + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :blockarg, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_blockarg: (Ident name) -> BlockArg + def on_blockarg(name) + operator = find_token(Op, '&') + + BlockArg.new(name: name, location: operator.location.to(name.location)) + end + + # bodystmt can't actually determine its bounds appropriately because it + # doesn't necessarily know where it started. So the parent node needs to + # report back down into this one where it goes. + class BodyStmt + # [Statements] the list of statements inside the begin clause + attr_reader :statements + + # [nil | Rescue] the optional rescue chain attached to the begin clause + attr_reader :rescue_clause + + # [nil | Statements] the optional set of statements inside the else clause + attr_reader :else_clause + + # [nil | Ensure] the optional ensure clause + attr_reader :ensure_clause + + # [Location] the location of this node + attr_reader :location + + def initialize( + statements:, + rescue_clause:, + else_clause:, + ensure_clause:, + location: + ) + @statements = statements + @rescue_clause = rescue_clause + @else_clause = else_clause + @ensure_clause = ensure_clause + @location = location + end + + def bind(start_char, end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: end_char + ) + + parts = [rescue_clause, else_clause, ensure_clause] + + # Here we're going to determine the bounds for the statements + consequent = parts.compact.first + statements.bind( + start_char, + consequent ? consequent.location.start_char : end_char + ) + + # Next we're going to determine the rescue clause if there is one + if rescue_clause + consequent = parts.drop(1).compact.first + rescue_clause.bind_end( + consequent ? consequent.location.start_char : end_char + ) + end + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('bodystmt') + q.breakable + q.pp(statements) + + if rescue_clause + q.breakable + q.pp(rescue_clause) + end + + if else_clause + q.breakable + q.pp(else_clause) + end + + if ensure_clause + q.breakable + q.pp(ensure_clause) + end + end + end + + def to_json(*opts) + { + type: :bodystmt, + stmts: statements, + rsc: rescue_clause, + els: else_clause, + ens: ensure_clause, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_bodystmt: ( + # Statements statements, + # (nil | Rescue) rescue_clause, + # (nil | Statements) else_clause, + # (nil | Ensure) ensure_clause + # ) -> BodyStmt + def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # BraceBlock represents passing a block to a method call using the { } + # operators. + # + # method { |variable| variable + 1 } + # + class BraceBlock + # [LBrace] the left brace that opens this block + attr_reader :lbrace + + # [nil | BlockVar] the optional set of parameters to the block + attr_reader :block_var + + # [Statements] the list of expressions to evaluate within the block + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(lbrace:, block_var:, statements:, location:) + @lbrace = lbrace + @block_var = block_var + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('brace_block') + + if block_var + q.breakable + q.pp(block_var) + end + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :brace_block, + lbrace: lbrace, + block_var: block_var, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_brace_block: ( + # (nil | BlockVar) block_var, + # Statements statements + # ) -> BraceBlock + def on_brace_block(block_var, statements) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + statements.bind( + find_next_statement_start((block_var || lbrace).location.end_char), + rbrace.location.start_char + ) + + location = + Location.new( + start_line: lbrace.location.start_line, + start_char: lbrace.location.start_char, + end_line: [rbrace.location.end_line, statements.location.end_line].max, + end_char: rbrace.location.end_char + ) + + BraceBlock.new( + lbrace: lbrace, + block_var: block_var, + statements: statements, + location: location + ) + end + + # Break represents using the +break+ keyword. + # + # break + # + # It can also optionally accept arguments, as in: + # + # break 1 + # + class Break + # [Args | ArgsAddBlock] the arguments being sent to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('break') + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :break, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_break: ((Args | ArgsAddBlock) arguments) -> Break + def on_break(arguments) + keyword = find_token(Kw, 'break') + + location = keyword.location + location = location.to(arguments.location) unless arguments.is_a?(Args) + + Break.new(arguments: arguments, location: location) + end + + # Call represents a method call. This node doesn't contain the arguments being + # passed (if arguments are passed, this node will get nested under a + # MethodAddArg node). + # + # receiver.message + # + class Call + # [untyped] the receiver of the method call + attr_reader :receiver + + # [:"::" | Op | Period] the operator being used to send the message + attr_reader :operator + + # [:call | Backtick | Const | Ident | Op] the message being sent + attr_reader :message + + # [Location] the location of this node + attr_reader :location + + def initialize(receiver:, operator:, message:, location:) + @receiver = receiver + @operator = operator + @message = message + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('call') + q.breakable + q.pp(receiver) + q.breakable + q.pp(operator) + q.breakable + q.pp(message) + end + end + + def to_json(*opts) + { + type: :call, + receiver: receiver, + op: operator, + message: message, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_call: ( + # untyped receiver, + # (:"::" | Op | Period) operator, + # (:call | Backtick | Const | Ident | Op) message + # ) -> Call + def on_call(receiver, operator, message) + ending = message + ending = operator if message == :call + + Call.new( + receiver: receiver, + operator: operator, + message: message, + location: + Location.new( + start_line: receiver.location.start_line, + start_char: receiver.location.start_char, + end_line: [ending.location.end_line, receiver.location.end_line].max, + end_char: ending.location.end_char + ) + ) + end + + # Case represents the beginning of a case chain. + # + # case value + # when 1 + # "one" + # when 2 + # "two" + # else + # "number" + # end + # + class Case + # [nil | untyped] optional value being switched on + attr_reader :value + + # [In | When] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, consequent:, location:) + @value = value + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('case') + + if value + q.breakable + q.pp(value) + end + + q.breakable + q.pp(consequent) + end + end + + def to_json(*opts) + { type: :case, value: value, cons: consequent, loc: location }.to_json( + *opts + ) + end + end + + # RAssign represents a single-line pattern match. + # + # value in pattern + # value => pattern + # + class RAssign + # [untyped] the left-hand expression + attr_reader :value + + # [Kw | Op] the operator being used to match against the pattern, which is + # either => or in + attr_reader :operator + + # [untyped] the pattern on the right-hand side of the expression + attr_reader :pattern + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, operator:, pattern:, location:) + @value = value + @operator = operator + @pattern = pattern + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rassign') + + q.breakable + q.pp(value) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(pattern) + end + end + + def to_json(*opts) + { + type: :rassign, + value: value, + op: operator, + pattern: pattern, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_case: (untyped value, untyped consequent) -> Case | RAssign + def on_case(value, consequent) + if keyword = find_token(Kw, 'case', consume: false) + tokens.delete(keyword) + + Case.new( + value: value, + consequent: consequent, + location: keyword.location.to(consequent.location) + ) + else + operator = find_token(Kw, 'in', consume: false) || find_token(Op, '=>') + + RAssign.new( + value: value, + operator: operator, + pattern: consequent, + location: value.location.to(consequent.location) + ) + end + end + + # Class represents defining a class using the +class+ keyword. + # + # class Container + # end + # + # Classes can have path names as their class name in case it's being nested + # under a namespace, as in: + # + # class Namespace::Container + # end + # + # Classes can also be defined as a top-level path, in the case that it's + # already in a namespace but you want to define it at the top-level instead, + # as in: + # + # module OtherNamespace + # class ::Namespace::Container + # end + # end + # + # All of these declarations can also have an optional superclass reference, as + # in: + # + # class Child < Parent + # end + # + # That superclass can actually be any Ruby expression, it doesn't necessarily + # need to be a constant, as in: + # + # class Child < method + # end + # + class ClassDeclaration + # [ConstPathRef | ConstRef | TopConstRef] the name of the class being + # defined + attr_reader :constant + + # [nil | untyped] the optional superclass declaration + attr_reader :superclass + + # [BodyStmt] the expressions to execute within the context of the class + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, superclass:, bodystmt:, location:) + @constant = constant + @superclass = superclass + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('class') + + q.breakable + q.pp(constant) + + if superclass + q.breakable + q.pp(superclass) + end + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :class, + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_class: ( + # (ConstPathRef | ConstRef | TopConstRef) constant, + # untyped superclass, + # BodyStmt bodystmt + # ) -> ClassDeclaration + def on_class(constant, superclass, bodystmt) + beginning = find_token(Kw, 'class') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start((superclass || constant).location.end_char), + ending.location.start_char + ) + + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Comma represents the use of the , operator. + class Comma + # [String] the comma in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_comma: (String value) -> Comma + def on_comma(value) + node = + Comma.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Command represents a method call with arguments and no parentheses. Note + # that Command nodes only happen when there is no explicit receiver for this + # method. + # + # method argument + # + class Command + # [Const | Ident] the message being sent to the implicit receiver + attr_reader :message + + # [Args | ArgsAddBlock] the arguments being sent with the message + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(message:, arguments:, location:) + @message = message + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('command') + + q.breakable + q.pp(message) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :command, + message: message, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_command: ( + # (Const | Ident) message, + # (Args | ArgsAddBlock) arguments + # ) -> Command + def on_command(message, arguments) + Command.new( + message: message, + arguments: arguments, + location: message.location.to(arguments.location) + ) + end + + # CommandCall represents a method call on an object with arguments and no + # parentheses. + # + # object.method argument + # + class CommandCall + # [untyped] the receiver of the message + attr_reader :receiver + + # [:"::" | Op | Period] the operator used to send the message + attr_reader :operator + + # [Const | Ident | Op] the message being send + attr_reader :message + + # [Args | ArgsAddBlock] the arguments going along with the message + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(receiver:, operator:, message:, arguments:, location:) + @receiver = receiver + @operator = operator + @message = message + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('command_call') + + q.breakable + q.pp(receiver) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(message) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :command_call, + receiver: receiver, + op: operator, + message: message, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_command_call: ( + # untyped receiver, + # (:"::" | Op | Period) operator, + # (Const | Ident | Op) message, + # (Args | ArgsAddBlock) arguments + # ) -> CommandCall + def on_command_call(receiver, operator, message, arguments) + ending = arguments || message + + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: receiver.location.to(ending.location) + ) + end + + # Comment represents a comment in the source. + # + # # comment + # + class Comment + # [String] the contents of the comment + attr_reader :value + + # [boolean] whether or not there is code on the same line as this comment. + # If there is, then inline will be true. + attr_reader :inline + alias inline? inline + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, inline:, location:) + @value = value + @inline = inline + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('comment') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :comment, + value: value.force_encoding('UTF-8'), + inline: inline, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_comment: (String value) -> Comment + def on_comment(value) + line = lineno + comment = + Comment.new( + value: value[1..-1].chomp, + inline: value.strip != lines[line - 1], + location: + Location.token(line: line, char: char_pos, size: value.size - 1) + ) + + @comments << comment + comment + end + + # Const represents a literal value that _looks_ like a constant. This could + # actually be a reference to a constant: + # + # Constant + # + # It could also be something that looks like a constant in another context, as + # in a method call to a capitalized method: + # + # object.Constant + # + # or a symbol that starts with a capital letter: + # + # :Constant + # + class Const + # [String] the name of the constant + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :const, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_const: (String value) -> Const + def on_const(value) + node = + Const.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # ConstPathField represents the child node of some kind of assignment. It + # represents when you're assigning to a constant that is being referenced as + # a child of another variable. + # + # object::Const = value + # + class ConstPathField + # [untyped] the source of the constant + attr_reader :parent + + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, constant:, location:) + @parent = parent + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_path_field') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { + type: :const_path_field, + parent: parent, + constant: constant, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + def on_const_path_field(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + end + + # ConstPathRef represents referencing a constant by a path. + # + # object::Const + # + class ConstPathRef + # [untyped] the source of the constant + attr_reader :parent + + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, constant:, location:) + @parent = parent + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_path_ref') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { + type: :const_path_ref, + parent: parent, + constant: constant, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_const_path_ref: (untyped parent, Const constant) -> ConstPathRef + def on_const_path_ref(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + end + + # ConstRef represents the name of the constant being used in a class or module + # declaration. + # + # class Container + # end + # + class ConstRef + # [Const] the constant itself + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('const_ref') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :const_ref, constant: constant, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_const_ref: (Const constant) -> ConstRef + def on_const_ref(constant) + ConstRef.new(constant: constant, location: constant.location) + end + + # CVar represents the use of a class variable. + # + # @@variable + # + class CVar + # [String] the name of the class variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('cvar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :cvar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_cvar: (String value) -> CVar + def on_cvar(value) + node = + CVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Def represents defining a regular method on the current self object. + # + # def method(param) result end + # + class Def + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Params | Paren] the parameter declaration for the method + attr_reader :params + + # [BodyStmt] the expressions to be executed by the method + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, params:, bodystmt:, location:) + @name = name + @params = params + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('def') + + q.breakable + q.pp(name) + + q.breakable + q.pp(params) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :def, + name: name, + params: params, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # DefEndless represents defining a single-line method since Ruby 3.0+. + # + # def method = result + # + class DefEndless + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Paren] the parameter declaration for the method + attr_reader :paren + + # [untyped] the expression to be executed by the method + attr_reader :statement + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, paren:, statement:, location:) + @name = name + @paren = paren + @statement = statement + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('def_endless') + + q.breakable + q.pp(name) + + q.breakable + q.pp(paren) + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { + type: :def_endless, + name: name, + paren: paren, + stmt: statement, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_def: ( + # (Backtick | Const | Ident | Kw | Op) name, + # (Params | Paren) params, + # untyped bodystmt + # ) -> Def | DefEndless + def on_def(name, params, bodystmt) + # Make sure to delete this token in case you're defining something like def + # class which would lead to this being a kw and causing all kinds of trouble + tokens.delete(name) + + # Find the beginning of the method definition, which works for single-line + # and normal method definitions. + beginning = find_token(Kw, 'def') + + # If we don't have a bodystmt node, then we have a single-line method + unless bodystmt.is_a?(BodyStmt) + node = + DefEndless.new( + name: name, + paren: params, + statement: bodystmt, + location: beginning.location.to(bodystmt.location) + ) + + return node + end + + # If there aren't any params then we need to correct the params node + # location information + if params.is_a?(Params) && params.empty? + end_char = name.location.end_char + location = + Location.new( + start_line: params.location.start_line, + start_char: end_char, + end_line: params.location.end_line, + end_char: end_char + ) + + params = Params.new(location: location) + end + + ending = find_token(Kw, 'end') + bodystmt.bind( + find_next_statement_start(params.location.end_char), + ending.location.start_char + ) + + Def.new( + name: name, + params: params, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Defined represents the use of the +defined?+ operator. It can be used with + # and without parentheses. + # + # defined?(variable) + # + class Defined + # [untyped] the value being sent to the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('defined') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :defined, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_defined: (untyped value) -> Defined + def on_defined(value) + beginning = find_token(Kw, 'defined?') + ending = value + + range = beginning.location.end_char...value.location.start_char + if source[range].include?('(') + find_token(LParen) + ending = find_token(RParen) + end + + Defined.new(value: value, location: beginning.location.to(ending.location)) + end + + # Defs represents defining a singleton method on an object. + # + # def object.method(param) result end + # + class Defs + # [untyped] the target where the method is being defined + attr_reader :target + + # [Op | Period] the operator being used to declare the method + attr_reader :operator + + # [Backtick | Const | Ident | Kw | Op] the name of the method + attr_reader :name + + # [Params | Paren] the parameter declaration for the method + attr_reader :params + + # [BodyStmt] the expressions to be executed by the method + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, operator:, name:, params:, bodystmt:, location:) + @target = target + @operator = operator + @name = name + @params = params + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('defs') + + q.breakable + q.pp(target) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(name) + + q.breakable + q.pp(params) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :defs, + target: target, + op: operator, + name: name, + params: params, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_defs: ( + # untyped target, + # (Op | Period) operator, + # (Backtick | Const | Ident | Kw | Op) name, + # (Params | Paren) params, + # BodyStmt bodystmt + # ) -> Defs + def on_defs(target, operator, name, params, bodystmt) + # Make sure to delete this token in case you're defining something + # like def class which would lead to this being a kw and causing all kinds + # of trouble + tokens.delete(name) + + # If there aren't any params then we need to correct the params node + # location information + if params.is_a?(Params) && params.empty? + end_char = name.location.end_char + location = + Location.new( + start_line: params.location.start_line, + start_char: end_char, + end_line: params.location.end_line, + end_char: end_char + ) + + params = Params.new(location: location) + end + + beginning = find_token(Kw, 'def') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(params.location.end_char), + ending.location.start_char + ) + + Defs.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # DoBlock represents passing a block to a method call using the +do+ and +end+ + # keywords. + # + # method do |value| + # end + # + class DoBlock + # [Kw] the do keyword that opens this block + attr_reader :keyword + + # [nil | BlockVar] the optional variable declaration within this block + attr_reader :block_var + + # [BodyStmt] the expressions to be executed within this block + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(keyword:, block_var:, bodystmt:, location:) + @keyword = keyword + @block_var = block_var + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('do_block') + + if block_var + q.breakable + q.pp(block_var) + end + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :do_block, + keyword: keyword, + block_var: block_var, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_do_block: (BlockVar block_var, BodyStmt bodystmt) -> DoBlock + def on_do_block(block_var, bodystmt) + beginning = find_token(Kw, 'do') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start((block_var || beginning).location.end_char), + ending.location.start_char + ) + + DoBlock.new( + keyword: beginning, + block_var: block_var, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # Dot2 represents using the .. operator between two expressions. Usually this + # is to create a range object. + # + # 1..2 + # + # Sometimes this operator is used to create a flip-flop. + # + # if value == 5 .. value == 10 + # end + # + # One of the sides of the expression may be nil, but not both. + class Dot2 + # [nil | untyped] the left side of the expression + attr_reader :left + + # [nil | untyped] the right side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dot2') + + if left + q.breakable + q.pp(left) + end + + if right + q.breakable + q.pp(right) + end + end + end + + def to_json(*opts) + { type: :dot2, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_dot2: ((nil | untyped) left, (nil | untyped) right) -> Dot2 + def on_dot2(left, right) + operator = find_token(Op, '..') + + beginning = left || operator + ending = right || operator + + Dot2.new( + left: left, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # Dot3 represents using the ... operator between two expressions. Usually this + # is to create a range object. It's effectively the same event as the Dot2 + # node but with this operator you're asking Ruby to omit the final value. + # + # 1...2 + # + # Like Dot2 it can also be used to create a flip-flop. + # + # if value == 5 ... value == 10 + # end + # + # One of the sides of the expression may be nil, but not both. + class Dot3 + # [nil | untyped] the left side of the expression + attr_reader :left + + # [nil | untyped] the right side of the expression + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dot3') + + if left + q.breakable + q.pp(left) + end + + if right + q.breakable + q.pp(right) + end + end + end + + def to_json(*opts) + { type: :dot3, left: left, right: right, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_dot3: ((nil | untyped) left, (nil | untyped) right) -> Dot3 + def on_dot3(left, right) + operator = find_token(Op, '...') + + beginning = left || operator + ending = right || operator + + Dot3.new( + left: left, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # DynaSymbol represents a symbol literal that uses quotes to dynamically + # define its value. + # + # :"#{variable}" + # + # They can also be used as a special kind of dynamic hash key, as in: + # + # { "#{key}": value } + # + class DynaSymbol + # [Array[ StringDVar | StringEmbExpr | TStringContent ]] the parts of the + # dynamic symbol + attr_reader :parts + + # [String] the quote used to delimit the dynamic symbol + attr_reader :quote + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, quote:, location:) + @parts = parts + @quote = quote + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('dyna_symbol') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :dyna_symbol, parts: parts, quote: quote, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_dyna_symbol: (StringContent string_content) -> DynaSymbol + def on_dyna_symbol(string_content) + if find_token(SymBeg, consume: false) + # A normal dynamic symbol + symbeg = find_token(SymBeg) + tstring_end = find_token(TStringEnd) + + DynaSymbol.new( + quote: symbeg.value, + parts: string_content.parts, + location: symbeg.location.to(tstring_end.location) + ) + else + # A dynamic symbol as a hash key + tstring_beg = find_token(TStringBeg) + label_end = find_token(LabelEnd) + + DynaSymbol.new( + parts: string_content.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ) + end + end + + # Else represents the end of an +if+, +unless+, or +case+ chain. + # + # if variable + # else + # end + # + class Else + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, location:) + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('else') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :else, stmts: statements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_else: (Statements statements) -> Else + def on_else(statements) + beginning = find_token(Kw, 'else') + + # else can either end with an end keyword (in which case we'll want to + # consume that event) or it can end with an ensure keyword (in which case + # we'll leave that to the ensure to handle). + index = + tokens.rindex do |token| + token.is_a?(Kw) && %w[end ensure].include?(token.value) + end + + node = tokens[index] + ending = node.value == 'end' ? tokens.delete_at(index) : node + + statements.bind(beginning.location.end_char, ending.location.start_char) + + Else.new( + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # Elsif represents another clause in an +if+ or +unless+ chain. + # + # if variable + # elsif other_variable + # end + # + class Elsif + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil | Elsif | Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('elsif') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :elsif, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_elsif: ( + # untyped predicate, + # Statements statements, + # (nil | Elsif | Else) consequent + # ) -> Elsif + def on_elsif(predicate, statements, consequent) + beginning = find_token(Kw, 'elsif') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # EmbDoc represents a multi-line comment. + # + # =begin + # first line + # second line + # =end + # + class EmbDoc + # [String] the contents of the comment + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def inline? + false + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('embdoc') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :embdoc, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_embdoc: (String value) -> EmbDoc + def on_embdoc(value) + @embdoc.value << value + @embdoc + end + + # :call-seq: + # on_embdoc_beg: (String value) -> EmbDoc + def on_embdoc_beg(value) + @embdoc = + EmbDoc.new( + value: value, + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # :call-seq: + # on_embdoc_end: (String value) -> EmbDoc + def on_embdoc_end(value) + location = @embdoc.location + embdoc = + EmbDoc.new( + value: @embdoc.value << value.chomp, + location: + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: lineno, + end_char: char_pos + value.length - 1 + ) + ) + + @comments << embdoc + @embdoc = nil + + embdoc + end + + # EmbExprBeg represents the beginning token for using interpolation inside of + # a parent node that accepts string content (like a string or regular + # expression). + # + # "Hello, #{person}!" + # + class EmbExprBeg + # [String] the #{ used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embexpr_beg: (String value) -> EmbExprBeg + def on_embexpr_beg(value) + node = + EmbExprBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # EmbExprEnd represents the ending token for using interpolation inside of a + # parent node that accepts string content (like a string or regular + # expression). + # + # "Hello, #{person}!" + # + class EmbExprEnd + # [String] the } used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embexpr_end: (String value) -> EmbExprEnd + def on_embexpr_end(value) + node = + EmbExprEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # EmbVar represents the use of shorthand interpolation for an instance, class, + # or global variable into a parent node that accepts string content (like a + # string or regular expression). + # + # "#@variable" + # + # In the example above, an EmbVar node represents the # because it forces + # @variable to be interpolated. + class EmbVar + # [String] the # used in the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_embvar: (String value) -> EmbVar + def on_embvar(value) + node = + EmbVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Ensure represents the use of the +ensure+ keyword and its subsequent + # statements. + # + # begin + # ensure + # end + # + class Ensure + # [Kw] the ensure keyword that began this node + attr_reader :keyword + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(keyword:, statements:, location:) + @keyword = keyword + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ensure') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :ensure, + keyword: keyword, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ensure: (Statements statements) -> Ensure + def on_ensure(statements) + keyword = find_token(Kw, 'ensure') + + # We don't want to consume the :@kw event, because that would break + # def..ensure..end chains. + ending = find_token(Kw, 'end', consume: false) + statements.bind( + find_next_statement_start(keyword.location.end_char), + ending.location.start_char + ) + + Ensure.new( + keyword: keyword, + statements: statements, + location: keyword.location.to(ending.location) + ) + end + + # ExcessedComma represents a trailing comma in a list of block parameters. It + # changes the block parameters such that they will destructure. + # + # [[1, 2, 3], [2, 3, 4]].each do |first, second,| + # end + # + # In the above example, an ExcessedComma node would appear in the third + # position of the Params node that is used to declare that block. The third + # position typically represents a rest-type parameter, but in this case is + # used to indicate that a trailing comma was used. + class ExcessedComma + # [String] the comma + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('excessed_comma') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :excessed_comma, value: value, loc: location }.to_json(*opts) + end + end + + # The handler for this event accepts no parameters (though in previous + # versions of Ruby it accepted a string literal with a value of ","). + # + # :call-seq: + # on_excessed_comma: () -> ExcessedComma + def on_excessed_comma(*) + comma = find_token(Comma) + + ExcessedComma.new(value: comma.value, location: comma.location) + end + + # FCall represents the piece of a method call that comes before any arguments + # (i.e., just the name of the method). It is used in places where the parser + # is sure that it is a method call and not potentially a local variable. + # + # method(argument) + # + # In the above example, it's referring to the +method+ segment. + class FCall + # [Const | Ident] the name of the method + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('fcall') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :fcall, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_fcall: ((Const | Ident) value) -> FCall + def on_fcall(value) + FCall.new(value: value, location: value.location) + end + + # Field is always the child of an assignment. It represents assigning to a + # “field” on an object. + # + # object.variable = value + # + class Field + # [untyped] the parent object that owns the field being assigned + attr_reader :parent + + # [:"::" | Op | Period] the operator being used for the assignment + attr_reader :operator + + # [Const | Ident] the name of the field being assigned + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(parent:, operator:, name:, location:) + @parent = parent + @operator = operator + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('field') + + q.breakable + q.pp(parent) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { + type: :field, + parent: parent, + op: operator, + name: name, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_field: ( + # untyped parent, + # (:"::" | Op | Period) operator + # (Const | Ident) name + # ) -> Field + def on_field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: parent.location.to(name.location) + ) + end + + # FloatLiteral represents a floating point number literal. + # + # 1.0 + # + class FloatLiteral + # [String] the value of the floating point number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('float') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :float, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_float: (String value) -> FloatLiteral + def on_float(value) + node = + FloatLiteral.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # FndPtn represents matching against a pattern where you find a pattern in an + # array using the Ruby 3.0+ pattern matching syntax. + # + # case value + # in [*, 7, *] + # end + # + class FndPtn + # [nil | untyped] the optional constant wrapper + attr_reader :constant + + # [VarField] the splat on the left-hand side + attr_reader :left + + # [Array[ untyped ]] the list of positional expressions in the pattern that + # are being matched + attr_reader :values + + # [VarField] the splat on the right-hand side + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, left:, values:, right:, location:) + @constant = constant + @left = left + @values = values + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('fndptn') + + if constant + q.breakable + q.pp(constant) + end + + q.breakable + q.pp(left) + + q.breakable + q.group(2, '(', ')') { q.seplist(values) { |value| q.pp(value) } } + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { + type: :fndptn, + constant: constant, + left: left, + values: values, + right: right, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_fndptn: ( + # (nil | untyped) constant, + # VarField left, + # Array[untyped] values, + # VarField right + # ) -> FndPtn + def on_fndptn(constant, left, values, right) + beginning = constant || find_token(LBracket) + ending = find_token(RBracket) + + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: beginning.location.to(ending.location) + ) + end + + # For represents using a +for+ loop. + # + # for value in list do + # end + # + class For + # [MLHS | MLHSAddStar | VarField] the variable declaration being used to + # pull values out of the object being enumerated + attr_reader :index + + # [untyped] the object being enumerated in the loop + attr_reader :collection + + # [Statements] the statements to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(index:, collection:, statements:, location:) + @index = index + @collection = collection + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('for') + + q.breakable + q.pp(index) + + q.breakable + q.pp(collection) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :for, + index: index, + collection: collection, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_for: ( + # (MLHS | MLHSAddStar | VarField) value, + # untyped collection, + # Statements statements + # ) -> For + def on_for(index, collection, statements) + beginning = find_token(Kw, 'for') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > collection.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + statements.bind( + (keyword || collection).location.end_char, + ending.location.start_char + ) + + For.new( + index: index, + collection: collection, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # GVar represents a global variable literal. + # + # $variable + # + class GVar + # [String] the name of the global variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('gvar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :gvar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_gvar: (String value) -> GVar + def on_gvar(value) + node = + GVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # HashLiteral represents a hash literal. + # + # { key => value } + # + class HashLiteral + # [nil | AssocListFromArgs] the contents of the hash + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('hash') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :hash, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_hash: ((nil | AssocListFromArgs) contents) -> HashLiteral + def on_hash(contents) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) + + if contents + # Here we're going to expand out the location information for the contents + # node so that it can grab up any remaining comments inside the hash. + location = + Location.new( + start_line: contents.location.start_line, + start_char: lbrace.location.end_char, + end_line: contents.location.end_line, + end_char: rbrace.location.start_char + ) + + contents = contents.class.new(assocs: contents.assocs, location: location) + end + + HashLiteral.new( + contents: contents, + location: lbrace.location.to(rbrace.location) + ) + end + + # Heredoc represents a heredoc string literal. + # + # <<~DOC + # contents + # DOC + # + class Heredoc + # [HeredocBeg] the opening of the heredoc + attr_reader :beginning + + # [String] the ending of the heredoc + attr_reader :ending + + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # heredoc string literal + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(beginning:, ending: nil, parts: [], location:) + @beginning = beginning + @ending = ending + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('heredoc') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :heredoc, + beging: beginning, + ending: ending, + parts: parts, + loc: location + }.to_json(*opts) + end + end + + # HeredocBeg represents the beginning declaration of a heredoc. + # + # <<~DOC + # contents + # DOC + # + # In the example above the HeredocBeg node represents <<~DOC. + class HeredocBeg + # [String] the opening declaration of the heredoc + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('heredoc_beg') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :heredoc_beg, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_heredoc_beg: (String value) -> HeredocBeg + def on_heredoc_beg(value) + location = + Location.token(line: lineno, char: char_pos, size: value.size + 1) + + # Here we're going to artificially create an extra node type so that if + # there are comments after the declaration of a heredoc, they get printed. + beginning = HeredocBeg.new(value: value, location: location) + @heredocs << Heredoc.new(beginning: beginning, location: location) + + beginning + end + + # :call-seq: + # on_heredoc_dedent: (StringContent string, Integer width) -> Heredoc + def on_heredoc_dedent(string, width) + heredoc = @heredocs[-1] + + @heredocs[-1] = + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: string.parts, + location: heredoc.location + ) + end + + # :call-seq: + # on_heredoc_end: (String value) -> Heredoc + def on_heredoc_end(value) + heredoc = @heredocs[-1] + + @heredocs[-1] = + Heredoc.new( + beginning: heredoc.beginning, + ending: value.chomp, + parts: heredoc.parts, + location: + Location.new( + start_line: heredoc.location.start_line, + start_char: heredoc.location.start_char, + end_line: lineno, + end_char: char_pos + ) + ) + end + + # HshPtn represents matching against a hash pattern using the Ruby 2.7+ + # pattern matching syntax. + # + # case value + # in { key: } + # end + # + class HshPtn + # [nil | untyped] the optional constant wrapper + attr_reader :constant + + # [Array[ [Label, untyped] ]] the set of tuples representing the keywords + # that should be matched against in the pattern + attr_reader :keywords + + # [nil | VarField] an optional parameter to gather up all remaining keywords + attr_reader :keyword_rest + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, keywords:, keyword_rest:, location:) + @constant = constant + @keywords = keywords + @keyword_rest = keyword_rest + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('hshptn') + + if constant + q.breakable + q.pp(constant) + end + + if keywords.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(keywords) { |keyword| q.pp(keyword) } + end + end + + if keyword_rest + q.breakable + q.pp(keyword_rest) + end + end + end + + def to_json(*opts) + { + type: :hshptn, + constant: constant, + keywords: keywords, + kwrest: keyword_rest, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_hshptn: ( + # (nil | untyped) constant, + # Array[[Label, untyped]] keywords, + # (nil | VarField) keyword_rest + # ) -> HshPtn + def on_hshptn(constant, keywords, keyword_rest) + parts = [constant, keywords, keyword_rest].flatten(2).compact + + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: parts[0].location.to(parts[-1].location) + ) + end + + # Ident represents an identifier anywhere in code. It can represent a very + # large number of things, depending on where it is in the syntax tree. + # + # value + # + class Ident + # [String] the value of the identifier + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ident') + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :ident, + value: value.force_encoding('UTF-8'), + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ident: (String value) -> Ident + def on_ident(value) + node = + Ident.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # If represents the first clause in an +if+ chain. + # + # if predicate + # end + # + class If + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil, Elsif, Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('if') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :if, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_if: ( + # untyped predicate, + # Statements statements, + # (nil | Elsif | Else) consequent + # ) -> If + def on_if(predicate, statements, consequent) + beginning = find_token(Kw, 'if') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + If.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # IfOp represents a ternary clause. + # + # predicate ? truthy : falsy + # + class IfOp + # [untyped] the expression to be checked + attr_reader :predicate + + # [untyped] the expression to be executed if the predicate is truthy + attr_reader :truthy + + # [untyped] the expression to be executed if the predicate is falsy + attr_reader :falsy + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, truthy:, falsy:, location:) + @predicate = predicate + @truthy = truthy + @falsy = falsy + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ifop') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(truthy) + + q.breakable + q.pp(falsy) + end + end + + def to_json(*opts) + { + type: :ifop, + pred: predicate, + tthy: truthy, + flsy: falsy, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_ifop: (untyped predicate, untyped truthy, untyped falsy) -> IfOp + def on_ifop(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: predicate.location.to(falsy.location) + ) + end + + # IfMod represents the modifier form of an +if+ statement. + # + # expression if predicate + # + class IfMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('if_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :if_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_if_mod: (untyped predicate, untyped statement) -> IfMod + def on_if_mod(predicate, statement) + find_token(Kw, 'if') + + IfMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # def on_ignored_nl(value) + # value + # end + + # def on_ignored_sp(value) + # value + # end + + # Imaginary represents an imaginary number literal. + # + # 1i + # + class Imaginary + # [String] the value of the imaginary number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('imaginary') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :imaginary, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_imaginary: (String value) -> Imaginary + def on_imaginary(value) + node = + Imaginary.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # In represents using the +in+ keyword within the Ruby 2.7+ pattern matching + # syntax. + # + # case value + # in pattern + # end + # + class In + # [untyped] the pattern to check against + attr_reader :pattern + + # [Statements] the expressions to execute if the pattern matched + attr_reader :statements + + # [nil | In | Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(pattern:, statements:, consequent:, location:) + @pattern = pattern + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('in') + + q.breakable + q.pp(pattern) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :in, + pattern: pattern, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_in: (RAssign pattern, nil statements, nil consequent) -> RAssign + # | ( + # untyped pattern, + # Statements statements, + # (nil | In | Else) consequent + # ) -> In + def on_in(pattern, statements, consequent) + # Here we have a rightward assignment + return pattern unless statements + + beginning = find_token(Kw, 'in') + ending = consequent || find_token(Kw, 'end') + + statements.bind(beginning.location.end_char, ending.location.start_char) + + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # Int represents an integer number literal. + # + # 1 + # + class Int + # [String] the value of the integer + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('int') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :int, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_int: (String value) -> Int + def on_int(value) + node = + Int.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # IVar represents an instance variable literal. + # + # @variable + # + class IVar + # [String] the name of the instance variable + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('ivar') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :ivar, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_ivar: (String value) -> IVar + def on_ivar(value) + node = + IVar.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Kw represents the use of a keyword. It can be almost anywhere in the syntax + # tree, so you end up seeing it quite a lot. + # + # if value + # end + # + # In the above example, there would be two Kw nodes: one for the if and one + # for the end. Note that anything that matches the list of keywords in Ruby + # will use a Kw, so if you use a keyword in a symbol literal for instance: + # + # :if + # + # then the contents of the symbol node will contain a Kw node. + class Kw + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('kw') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :kw, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_kw: (String value) -> Kw + def on_kw(value) + node = + Kw.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # KwRestParam represents defining a parameter in a method definition that + # accepts all remaining keyword parameters. + # + # def method(**kwargs) end + # + class KwRestParam + # [nil | Ident] the name of the parameter + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('kwrest_param') + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :kwrest_param, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_kwrest_param: ((nil | Ident) name) -> KwRestParam + def on_kwrest_param(name) + location = find_token(Op, '**').location + location = location.to(name.location) if name + + KwRestParam.new(name: name, location: location) + end + + # Label represents the use of an identifier to associate with an object. You + # can find it in a hash key, as in: + # + # { key: value } + # + # In this case "key:" would be the body of the label. You can also find it in + # pattern matching, as in: + # + # case value + # in key: + # end + # + # In this case "key:" would be the body of the label. + class Label + # [String] the value of the label + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('label') + + q.breakable + q.text(':') + q.text(value[0...-1]) + end + end + + def to_json(*opts) + { type: :label, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_label: (String value) -> Label + def on_label(value) + node = + Label.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LabelEnd represents the end of a dynamic symbol. + # + # { "key": value } + # + # In the example above, LabelEnd represents the "\":" token at the end of the + # hash key. This node is important for determining the type of quote being + # used by the label. + class LabelEnd + # [String] the end of the label + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_label_end: (String value) -> LabelEnd + def on_label_end(value) + node = + LabelEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Lambda represents using a lambda literal (not the lambda method call). + # + # ->(value) { value * 2 } + # + class Lambda + # [Params | Paren] the parameter declaration for this lambda + attr_reader :params + + # [BodyStmt | Statements] the expressions to be executed in this lambda + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(params:, statements:, location:) + @params = params + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lambda') + + q.breakable + q.pp(params) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :lambda, + params: params, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_lambda: ( + # (Params | Paren) params, + # (BodyStmt | Statements) statements + # ) -> Lambda + def on_lambda(params, statements) + beginning = find_token(TLambda) + + if token = find_token(TLamBeg, consume: false) + opening = tokens.delete(token) + closing = find_token(RBrace) + else + opening = find_token(Kw, 'do') + closing = find_token(Kw, 'end') + end + + statements.bind(opening.location.end_char, closing.location.start_char) + + Lambda.new( + params: params, + statements: statements, + location: beginning.location.to(closing.location) + ) + end + + # LBrace represents the use of a left brace, i.e., {. + class LBrace + # [String] the left brace + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lbrace') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :lbrace, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_lbrace: (String value) -> LBrace + def on_lbrace(value) + node = + LBrace.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LBracket represents the use of a left bracket, i.e., [. + class LBracket + # [String] the left bracket + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_lbracket: (String value) -> LBracket + def on_lbracket(value) + node = + LBracket.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # LParen represents the use of a left parenthesis, i.e., (. + class LParen + # [String] the left parenthesis + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('lparen') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :lparen, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_lparen: (String value) -> LParen + def on_lparen(value) + node = + LParen.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # def on_magic_comment(key, value) + # [key, value] + # end + + # MAssign is a parent node of any kind of multiple assignment. This includes + # splitting out variables on the left like: + # + # first, second, third = value + # + # as well as splitting out variables on the right, as in: + # + # value = first, second, third + # + # Both sides support splats, as well as variables following them. There's also + # destructuring behavior that you can achieve with the following: + # + # first, = value + # + class MAssign + # [Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen] the target of the multiple + # assignment + attr_reader :target + + # [untyped] the value being assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, value:, location:) + @target = target + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('massign') + + q.breakable + q.pp(target) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :massign, target: target, value: value, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_massign: ( + # (Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen) target, + # untyped value + # ) -> MAssign + def on_massign(target, value) + comma_range = target.location.end_char...value.location.start_char + target.comma = true if source[comma_range].strip.start_with?(',') + + MAssign.new( + target: target, + value: value, + location: target.location.to(value.location) + ) + end + + # MethodAddArg represents a method call with arguments and parentheses. + # + # method(argument) + # + # MethodAddArg can also represent with a method on an object, as in: + # + # object.method(argument) + # + # Finally, MethodAddArg can represent calling a method with no receiver that + # ends in a ?. In this case, the parser knows it's a method call and not a + # local variable, so it uses a MethodAddArg node as opposed to a VCall node, + # as in: + # + # method? + # + class MethodAddArg + # [Call | FCall] the method call + attr_reader :call + + # [ArgParen | Args | ArgsAddBlock] the arguments to the method call + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(call:, arguments:, location:) + @call = call + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('method_add_arg') + + q.breakable + q.pp(call) + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { + type: :method_add_arg, + call: call, + args: arguments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_method_add_arg: ( + # (Call | FCall) call, + # (ArgParen | Args | ArgsAddBlock) arguments + # ) -> MethodAddArg + def on_method_add_arg(call, arguments) + location = call.location + + location = location.to(arguments.location) unless arguments.is_a?(Args) + + MethodAddArg.new(call: call, arguments: arguments, location: location) + end + + # MethodAddBlock represents a method call with a block argument. + # + # method {} + # + class MethodAddBlock + # [Call | Command | CommandCall | FCall | MethodAddArg] the method call + attr_reader :call + + # [BraceBlock | DoBlock] the block being sent with the method call + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize(call:, block:, location:) + @call = call + @block = block + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('method_add_block') + + q.breakable + q.pp(call) + + q.breakable + q.pp(block) + end + end + + def to_json(*opts) + { + type: :method_add_block, + call: call, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_method_add_block: ( + # (Call | Command | CommandCall | FCall | MethodAddArg) call, + # (BraceBlock | DoBlock) block + # ) -> MethodAddBlock + def on_method_add_block(call, block) + MethodAddBlock.new( + call: call, + block: block, + location: call.location.to(block.location) + ) + end + + # MLHS represents a list of values being destructured on the left-hand side + # of a multiple assignment. + # + # first, second, third = value + # + class MLHS + # Array[ARefField | Field | Ident | MlhsParen | VarField] the parts of + # the left-hand side of a multiple assignment + attr_reader :parts + + # [boolean] whether or not there is a trailing comma at the end of this + # list, which impacts destructuring. It's an attr_accessor so that while + # the syntax tree is being built it can be set by its parent node + attr_accessor :comma + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, comma: false, location:) + @parts = parts + @comma = comma + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :mlhs, parts: parts, comma: comma, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mlhs_add: ( + # MLHS mlhs, + # (ARefField | Field | Ident | MlhsParen | VarField) part + # ) -> MLHS + def on_mlhs_add(mlhs, part) + if mlhs.parts.empty? + MLHS.new(parts: [part], location: part.location) + else + MLHS.new( + parts: mlhs.parts << part, + location: mlhs.location.to(part.location) + ) + end + end + + # MLHSAddPost represents adding another set of variables onto a list of + # assignments after a splat variable within a multiple assignment. + # + # left, *middle, right = values + # + class MLHSAddPost + # [MlhsAddStar] the value being starred + attr_reader :star + + # [Mlhs] the values after the star + attr_reader :mlhs + + # [Location] the location of this node + attr_reader :location + + def initialize(star:, mlhs:, location:) + @star = star + @mlhs = mlhs + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_add_post') + + q.breakable + q.pp(star) + + q.breakable + q.pp(mlhs) + end + end + + def to_json(*opts) + { type: :mlhs_add_post, star: star, mlhs: mlhs, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mlhs_add_post: (MLHSAddStar star, MLHS mlhs) -> MLHSAddPost + def on_mlhs_add_post(star, mlhs) + MLHSAddPost.new( + star: star, + mlhs: mlhs, + location: star.location.to(mlhs.location) + ) + end + + # MLHSAddStar represents a splatted variable inside of a multiple assignment + # on the left hand side. + # + # first, *rest = values + # + class MLHSAddStar + # [MLHS] the values before the starred expression + attr_reader :mlhs + + # [nil | ARefField | Field | Ident | VarField] the expression being + # splatted + attr_reader :star + + # [Location] the location of this node + attr_reader :location + + def initialize(mlhs:, star:, location:) + @mlhs = mlhs + @star = star + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_add_star') + + q.breakable + q.pp(mlhs) + + q.breakable + q.pp(star) + end + end + + def to_json(*opts) + { type: :mlhs_add_star, mlhs: mlhs, star: star, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mlhs_add_star: ( + # MLHS mlhs, + # (nil | ARefField | Field | Ident | VarField) part + # ) -> MLHSAddStar + def on_mlhs_add_star(mlhs, part) + beginning = find_token(Op, '*') + ending = part || beginning + + MLHSAddStar.new( + mlhs: mlhs, + star: part, + location: beginning.location.to(ending.location) + ) + end + + # :call-seq: + # on_mlhs_new: () -> MLHS + def on_mlhs_new + MLHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # MLHSParen represents parentheses being used to destruct values in a multiple + # assignment on the left hand side. + # + # (left, right) = value + # + class MLHSParen + # [Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen] the contents inside of the + # parentheses + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(contents:, location:) + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mlhs_paren') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :mlhs_paren, cnts: contents, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mlhs_paren: ( + # (Mlhs | MlhsAddPost | MlhsAddStar | MlhsParen) contents + # ) -> MLHSParen + def on_mlhs_paren(contents) + lparen = find_token(LParen) + rparen = find_token(RParen) + + comma_range = lparen.location.end_char...rparen.location.start_char + contents.comma = true if source[comma_range].strip.end_with?(',') + + MLHSParen.new( + contents: contents, + location: lparen.location.to(rparen.location) + ) + end + + # ModuleDeclaration represents defining a module using the +module+ keyword. + # + # module Namespace + # end + # + class ModuleDeclaration + # [ConstPathRef | ConstRef | TopConstRef] the name of the module + attr_reader :constant + + # [BodyStmt] the expressions to be executed in the context of the module + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, bodystmt:, location:) + @constant = constant + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('module') + + q.breakable + q.pp(constant) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :module, + constant: constant, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_module: ( + # (ConstPathRef | ConstRef | TopConstRef) constant, + # BodyStmt bodystmt + # ) -> ModuleDeclaration + def on_module(constant, bodystmt) + beginning = find_token(Kw, 'module') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(constant.location.end_char), + ending.location.start_char + ) + + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # MRHS represents the values that are being assigned on the right-hand side of + # a multiple assignment. + # + # values = first, second, third + # + class MRHS + # Array[untyped] the parts that are being assigned + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :mrhs, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_mrhs_new: () -> MRHS + def on_mrhs_new + MRHS.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # :call-seq: + # on_mrhs_add: (MRHS mrhs, untyped part) -> MRHS + def on_mrhs_add(mrhs, part) + if mrhs.is_a?(MRHSNewFromArgs) + MRHS.new( + parts: [*mrhs.arguments.parts, part], + location: mrhs.location.to(part.location) + ) + elsif mrhs.parts.empty? + MRHS.new(parts: [part], location: mrhs.location) + else + MRHS.new(parts: mrhs.parts << part, loc: mrhs.location.to(part.location)) + end + end + + # MRHSAddStar represents using the splat operator to expand out a value on the + # right hand side of a multiple assignment. + # + # values = first, *rest + # + class MRHSAddStar + # [MRHS | MRHSNewFromArgs] the values before the splatted expression + attr_reader :mrhs + + # [untyped] the splatted expression + attr_reader :star + + # [Location] the location of this node + attr_reader :location + + def initialize(mrhs:, star:, location:) + @mrhs = mrhs + @star = star + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs_add_star') + + q.breakable + q.pp(mrhs) + + q.breakable + q.pp(star) + end + end + + def to_json(*opts) + { type: :mrhs_add_star, mrhs: mrhs, star: star, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mrhs_add_star: ( + # (MRHS | MRHSNewFromArgs) mrhs, + # untyped star + # ) -> MRHSAddStar + def on_mrhs_add_star(mrhs, star) + beginning = find_token(Op, '*') + ending = star || beginning + + MRHSAddStar.new( + mrhs: mrhs, + star: star, + location: beginning.location.to(ending.location) + ) + end + + # MRHSNewFromArgs represents the shorthand of a multiple assignment that + # allows you to assign values using just commas as opposed to assigning from + # an array. + # + # values = first, second, third + # + class MRHSNewFromArgs + # [Args] the arguments being used in the assignment + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('mrhs_new_from_args') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :mrhs_new_from_args, args: arguments, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_mrhs_new_from_args: (Args arguments) -> MRHSNewFromArgs + def on_mrhs_new_from_args(arguments) + MRHSNewFromArgs.new(arguments: arguments, location: arguments.location) + end + + # Next represents using the +next+ keyword. + # + # next + # + # The +next+ keyword can also optionally be called with an argument: + # + # next value + # + # +next+ can even be called with multiple arguments, but only if parentheses + # are omitted, as in: + # + # next first, second, third + # + # If a single value is being given, parentheses can be used, as in: + # + # next(value) + # + class Next + # [Args | ArgsAddBlock] the arguments passed to the next keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('next') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :next, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_next: ((Args | ArgsAddBlock) arguments) -> Next + def on_next(arguments) + keyword = find_token(Kw, 'next') + + location = keyword.location + location = location.to(arguments.location) unless arguments.is_a?(Args) + + Next.new(arguments: arguments, location: location) + end + + # def on_nl(value) + # value + # end + + # def on_nokw_param(value) + # value + # end + + # Op represents an operator literal in the source. + # + # 1 + 2 + # + # In the example above, the Op node represents the + operator. + class Op + # [String] the operator + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('op') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :op, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_op: (String value) -> Op + def on_op(value) + node = + Op.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # OpAssign represents assigning a value to a variable or constant using an + # operator like += or ||=. + # + # variable += value + # + class OpAssign + # [ARefField | ConstPathField | Field | TopConstField | VarField] the target + # to assign the result of the expression to + attr_reader :target + + # [Op] the operator being used for the assignment + attr_reader :operator + + # [untyped] the expression to be assigned + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, operator:, value:, location:) + @target = target + @operator = operator + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('opassign') + + q.breakable + q.pp(target) + + q.breakable + q.pp(operator) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :opassign, + target: target, + op: operator, + value: value, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_opassign: ( + # (ARefField | ConstPathField | Field | TopConstField | VarField) target, + # Op operator, + # untyped value + # ) -> OpAssign + def on_opassign(target, operator, value) + OpAssign.new( + target: target, + operator: operator, + value: value, + location: target.location.to(value.location) + ) + end + + # def on_operator_ambiguous(value) + # value + # end + + # Params represents defining parameters on a method or lambda. + # + # def method(param) end + # + class Params + # [Array[ Ident ]] any required parameters + attr_reader :requireds + + # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # values + attr_reader :optionals + + # [nil | ArgsForward | ExcessedComma | RestParam] the optional rest + # parameter + attr_reader :rest + + # [Array[ Ident ]] any positional parameters that exist after a rest + # parameter + attr_reader :posts + + # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # optional default values + attr_reader :keywords + + # [nil | :nil | KwRestParam] the optional keyword rest parameter + attr_reader :keyword_rest + + # [nil | BlockArg] the optional block parameter + attr_reader :block + + # [Location] the location of this node + attr_reader :location + + def initialize( + requireds: [], + optionals: [], + rest: nil, + posts: [], + keywords: [], + keyword_rest: nil, + block: nil, + location: + ) + @requireds = requireds + @optionals = optionals + @rest = rest + @posts = posts + @keywords = keywords + @keyword_rest = keyword_rest + @block = block + @location = location + end + + # Params nodes are the most complicated in the tree. Occasionally you want + # to know if they are "empty", which means not having any parameters + # declared. This logic accesses every kind of parameter and determines if + # it's missing. + def empty? + requireds.empty? && optionals.empty? && !rest && posts.empty? && + keywords.empty? && !keyword_rest && !block + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('params') + + if requireds.any? + q.breakable + q.group(2, '(', ')') { q.seplist(requireds) { |name| q.pp(name) } } + end + + if optionals.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(optionals) do |(name, default)| + q.pp(name) + q.text('=') + q.group(2) do + q.breakable('') + q.pp(default) + end + end + end + end + + if rest + q.breakable + q.pp(rest) + end + + if posts.any? + q.breakable + q.group(2, '(', ')') { q.seplist(posts) { |value| q.pp(value) } } + end + + if keywords.any? + q.breakable + q.group(2, '(', ')') do + q.seplist(keywords) do |(name, default)| + q.pp(name) + + if default + q.text('=') + q.group(2) do + q.breakable('') + q.pp(default) + end + end + end + end + end + + if keyword_rest + q.breakable + q.pp(keyword_rest) + end + + if block + q.breakable + q.pp(block) + end + end + end + + def to_json(*opts) + { + type: :params, + reqs: requireds, + opts: optionals, + rest: rest, + posts: posts, + keywords: keywords, + kwrest: keyword_rest, + block: block, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_params: ( + # (nil | Array[Ident]) requireds, + # (nil | Array[[Ident, untyped]]) optionals, + # (nil | ArgsForward | ExcessedComma | RestParam) rest, + # (nil | Array[Ident]) posts, + # (nil | Array[[Ident, nil | untyped]]) keywords, + # (nil | :nil | KwRestParam) keyword_rest, + # (nil | BlockArg) block + # ) -> Params + def on_params( + requireds, + optionals, + rest, + posts, + keywords, + keyword_rest, + block + ) + parts = [ + *requireds, + *optionals&.flatten(1), + rest, + *posts, + *keywords&.flat_map { |(key, value)| [key, value || nil] }, + (keyword_rest if keyword_rest != :nil), + block + ].compact + + location = + if parts.any? + parts[0].location.to(parts[-1].location) + else + Location.fixed(line: lineno, char: char_pos) + end + + Params.new( + requireds: requireds || [], + optionals: optionals || [], + rest: rest, + posts: posts || [], + keywords: keywords || [], + keyword_rest: keyword_rest, + block: block, + location: location + ) + end + + # Paren represents using balanced parentheses in a couple places in a Ruby + # program. In general parentheses can be used anywhere a Ruby expression can + # be used. + # + # (1 + 2) + # + class Paren + # [LParen] the left parenthesis that opened this statement + attr_reader :lparen + + # [untyped] the expression inside the parentheses + attr_reader :contents + + # [Location] the location of this node + attr_reader :location + + def initialize(lparen:, contents:, location:) + @lparen = lparen + @contents = contents + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('paren') + + q.breakable + q.pp(contents) + end + end + + def to_json(*opts) + { type: :paren, lparen: lparen, cnts: contents, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_paren: (untyped contents) -> Paren + def on_paren(contents) + lparen = find_token(LParen) + rparen = find_token(RParen) + + if contents && contents.is_a?(Params) + location = contents.location + location = + Location.new( + start_line: location.start_line, + start_char: find_next_statement_start(lparen.location.end_char), + end_line: location.end_line, + end_char: rparen.location.start_char + ) + + contents = + Params.new( + requireds: contents.requireds, + optionals: contents.optionals, + rest: contents.rest, + posts: contents.posts, + keywords: contents.keywords, + keyword_rest: contents.keyword_rest, + block: contents.block, + location: location + ) + end + + Paren.new( + lparen: lparen, + contents: contents, + location: lparen.location.to(rparen.location) + ) + end + + # If we encounter a parse error, just immediately bail out so that our runner + # can catch it. + def on_parse_error(error, *) + raise ParseError.new(error, lineno, column) + end + alias on_alias_error on_parse_error + alias on_assign_error on_parse_error + alias on_class_name_error on_parse_error + alias on_param_error on_parse_error + + # Period represents the use of the +.+ operator. It is usually found in method + # calls. + class Period + # [String] the period + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('period') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :period, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_period: (String value) -> Period + def on_period(value) + Period.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # Program represents the overall syntax tree. + class Program + # [Statements] the top-level expressions of the program + attr_reader :statements + + # [Array[ Comment | EmbDoc ]] the comments inside the program + attr_reader :comments + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, comments:, location:) + @statements = statements + @comments = comments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('program') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :program, + stmts: statements, + comments: comments, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_program: (Statements statements) -> Program + def on_program(statements) + location = + Location.new( + start_line: 1, + start_char: 0, + end_line: lines.length, + end_char: source.length + ) + + statements.body << @__end__ if @__end__ + statements.bind(0, source.length) + + Program.new(statements: statements, comments: @comments, location: location) + end + + # QSymbols represents a symbol literal array without interpolation. + # + # %i[one two three] + # + class QSymbols + # [Array[ TStringContent ]] the elements of the array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('qsymbols') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :qsymbols, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_qsymbols_add: (QSymbols qsymbols, TStringContent element) -> QSymbols + def on_qsymbols_add(qsymbols, element) + QSymbols.new( + elements: qsymbols.elements << element, + location: qsymbols.location.to(element.location) + ) + end + + # QSymbolsBeg represents the beginning of a symbol literal array. + # + # %i[one two three] + # + # In the snippet above, QSymbolsBeg represents the "%i[" token. Note that + # these kinds of arrays can start with a lot of different delimiter types + # (e.g., %i| or %i<). + class QSymbolsBeg + # [String] the beginning of the array literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_qsymbols_beg: (String value) -> QSymbolsBeg + def on_qsymbols_beg(value) + node = + QSymbolsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_qsymbols_new: () -> QSymbols + def on_qsymbols_new + qsymbols_beg = find_token(QSymbolsBeg) + + QSymbols.new(elements: [], location: qsymbols_beg.location) + end + + # QWords represents a string literal array without interpolation. + # + # %w[one two three] + # + class QWords + # [Array[ TStringContent ]] the elements of the array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('qwords') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :qwords, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_qwords_add: (QWords qwords, TStringContent element) -> QWords + def on_qwords_add(qwords, element) + QWords.new( + elements: qwords.elements << element, + location: qwords.location.to(element.location) + ) + end + + # QWordsBeg represents the beginning of a string literal array. + # + # %w[one two three] + # + # In the snippet above, QWordsBeg represents the "%w[" token. Note that these + # kinds of arrays can start with a lot of different delimiter types (e.g., + # %w| or %w<). + class QWordsBeg + # [String] the beginning of the array literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_qwords_beg: (String value) -> QWordsBeg + def on_qwords_beg(value) + node = + QWordsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_qwords_new: () -> QWords + def on_qwords_new + qwords_beg = find_token(QWordsBeg) + + QWords.new(elements: [], location: qwords_beg.location) + end + + # RationalLiteral represents the use of a rational number literal. + # + # 1r + # + class RationalLiteral + # [String] the rational number literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rational') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :rational, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_rational: (String value) -> RationalLiteral + def on_rational(value) + node = + RationalLiteral.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RBrace represents the use of a right brace, i.e., +++. + class RBrace + # [String] the right brace + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rbrace: (String value) -> RBrace + def on_rbrace(value) + node = + RBrace.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RBracket represents the use of a right bracket, i.e., +]+. + class RBracket + # [String] the right bracket + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rbracket: (String value) -> RBracket + def on_rbracket(value) + node = + RBracket.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Redo represents the use of the +redo+ keyword. + # + # redo + # + class Redo + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('redo') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :redo, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_redo: () -> Redo + def on_redo + keyword = find_token(Kw, 'redo') + + Redo.new(value: keyword.value, location: keyword.location) + end + + # RegexpContent represents the body of a regular expression. + # + # /.+ #{pattern} .+/ + # + # In the example above, a RegexpContent node represents everything contained + # within the forward slashes. + class RegexpContent + # [String] the opening of the regular expression + attr_reader :beginning + + # [Array[ StringDVar | StringEmbExpr | TStringContent ]] the parts of the + # regular expression + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(beginning:, parts:, location:) + @beginning = beginning + @parts = parts + @location = location + end + end + + # :call-seq: + # on_regexp_add: ( + # RegexpContent regexp_content, + # (StringDVar | StringEmbExpr | TStringContent) part + # ) -> RegexpContent + def on_regexp_add(regexp_content, part) + RegexpContent.new( + beginning: regexp_content.beginning, + parts: regexp_content.parts << part, + location: regexp_content.location.to(part.location) + ) + end + + # RegexpBeg represents the start of a regular expression literal. + # + # /.+/ + # + # In the example above, RegexpBeg represents the first / token. Regular + # expression literals can also be declared using the %r syntax, as in: + # + # %r{.+} + # + class RegexpBeg + # [String] the beginning of the regular expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_regexp_beg: (String value) -> RegexpBeg + def on_regexp_beg(value) + node = + RegexpBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # RegexpEnd represents the end of a regular expression literal. + # + # /.+/m + # + # In the example above, the RegexpEnd event represents the /m at the end of + # the regular expression literal. You can also declare regular expression + # literals using %r, as in: + # + # %r{.+}m + # + class RegexpEnd + # [String] the end of the regular expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_regexp_end: (String value) -> RegexpEnd + def on_regexp_end(value) + RegexpEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # RegexpLiteral represents a regular expression literal. + # + # /.+/ + # + class RegexpLiteral + # [String] the beginning of the regular expression literal + attr_reader :beginning + + # [String] the ending of the regular expression literal + attr_reader :ending + + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # regular expression literal + attr_reader :parts + + # [Locatione] the location of this node + attr_reader :location + + def initialize(beginning:, ending:, parts:, location:) + @beginning = beginning + @ending = ending + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('regexp_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :regexp_literal, + beging: beginning, + ending: ending, + parts: parts, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_regexp_literal: ( + # RegexpContent regexp_content, + # RegexpEnd ending + # ) -> RegexpLiteral + def on_regexp_literal(regexp_content, ending) + RegexpLiteral.new( + beginning: regexp_content.beginning, + ending: ending.value, + parts: regexp_content.parts, + location: regexp_content.location.to(ending.location) + ) + end + + # :call-seq: + # on_regexp_new: () -> RegexpContent + def on_regexp_new + regexp_beg = find_token(RegexpBeg) + + RegexpContent.new( + beginning: regexp_beg.value, + parts: [], + location: regexp_beg.location + ) + end + + # RescueEx represents the list of exceptions being rescued in a rescue clause. + # + # begin + # rescue Exception => exception + # end + # + class RescueEx + # [untyped] the list of exceptions being rescued + attr_reader :exceptions + + # [nil | Field | VarField] the expression being used to capture the raised + # exception + attr_reader :variable + + # [Location] the location of this node + attr_reader :location + + def initialize(exceptions:, variable:, location:) + @exceptions = exceptions + @variable = variable + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue_ex') + + q.breakable + q.pp(exceptions) + + q.breakable + q.pp(variable) + end + end + + def to_json(*opts) + { + type: :rescue_ex, + extns: exceptions, + var: variable, + loc: location + }.to_json(*opts) + end + end + + # Rescue represents the use of the rescue keyword inside of a BodyStmt node. + # + # begin + # rescue + # end + # + class Rescue + # [RescueEx] the exceptions being rescued + attr_reader :exception + + # [Statements] the expressions to evaluate when an error is rescued + attr_reader :statements + + # [nil | Rescue] the optional next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(exception:, statements:, consequent:, location:) + @exception = exception + @statements = statements + @consequent = consequent + @location = location + end + + def bind_end(end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: location.end_line, + end_char: end_char + ) + + if consequent + consequent.bind_end(end_char) + statements.bind_end(consequent.location.start_char) + else + statements.bind_end(end_char) + end + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue') + + if exception + q.breakable + q.pp(exception) + end + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :rescue, + extn: exception, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_rescue: ( + # (nil | [untyped] | MRHS | MRHSAddStar) exceptions, + # (nil | Field | VarField) variable, + # Statements statements, + # (nil | Rescue) consequent + # ) -> Rescue + def on_rescue(exceptions, variable, statements, consequent) + keyword = find_token(Kw, 'rescue') + exceptions = exceptions[0] if exceptions.is_a?(Array) + + last_node = variable || exceptions || keyword + statements.bind( + find_next_statement_start(last_node.location.end_char), + char_pos + ) + + # We add an additional inner node here that ripper doesn't provide so that + # we have a nice place to attach inline comments. But we only need it if we + # have an exception or a variable that we're rescuing. + rescue_ex = + if exceptions || variable + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: + Location.new( + start_line: keyword.location.start_line, + start_char: keyword.location.end_char + 1, + end_line: last_node.location.end_line, + end_char: last_node.location.end_char + ) + ) + end + + Rescue.new( + exception: rescue_ex, + statements: statements, + consequent: consequent, + location: + Location.new( + start_line: keyword.location.start_line, + start_char: keyword.location.start_char, + end_line: lineno, + end_char: char_pos + ) + ) + end + + # RescueMod represents the use of the modifier form of a +rescue+ clause. + # + # expression rescue value + # + class RescueMod + # [untyped] the expression to execute + attr_reader :statement + + # [untyped] the value to use if the executed expression raises an error + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, value:, location:) + @statement = statement + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rescue_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :rescue_mod, + stmt: statement, + value: value, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_rescue_mod: (untyped statement, untyped value) -> RescueMod + def on_rescue_mod(statement, value) + find_token(Kw, 'rescue') + + RescueMod.new( + statement: statement, + value: value, + location: statement.location.to(value.location) + ) + end + + # RestParam represents defining a parameter in a method definition that + # accepts all remaining positional parameters. + # + # def method(*rest) end + # + class RestParam + # [nil | Ident] the name of the parameter + attr_reader :name + + # [Location] the location of this node + attr_reader :location + + def initialize(name:, location:) + @name = name + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('rest_param') + + q.breakable + q.pp(name) + end + end + + def to_json(*opts) + { type: :rest_param, name: name, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_rest_param: ((nil | Ident) name) -> RestParam + def on_rest_param(name) + location = find_token(Op, '*').location + location = location.to(name.location) if name + + RestParam.new(name: name, location: location) + end + + # Retry represents the use of the +retry+ keyword. + # + # retry + # + class Retry + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('retry') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :retry, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_retry: () -> Retry + def on_retry + keyword = find_token(Kw, 'retry') + + Retry.new(value: keyword.value, location: keyword.location) + end + + # Return represents using the +return+ keyword with arguments. + # + # return value + # + class Return + # [Args | ArgsAddBlock] the arguments being passed to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('return') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :return, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_return: ((Args | ArgsAddBlock) arguments) -> Return + def on_return(arguments) + keyword = find_token(Kw, 'return') + + Return.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # Return0 represents the bare +return+ keyword with no arguments. + # + # return + # + class Return0 + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('return0') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :return0, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_return0: () -> Return0 + def on_return0 + keyword = find_token(Kw, 'return') + + Return0.new(value: keyword.value, location: keyword.location) + end + + # RParen represents the use of a right parenthesis, i.e., +)+. + class RParen + # [String] the parenthesis + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_rparen: (String value) -> RParen + def on_rparen(value) + node = + RParen.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # SClass represents a block of statements that should be evaluated within the + # context of the singleton class of an object. It's frequently used to define + # singleton methods. + # + # class << self + # end + # + class SClass + # [untyped] the target of the singleton class to enter + attr_reader :target + + # [BodyStmt] the expressions to be executed + attr_reader :bodystmt + + # [Location] the location of this node + attr_reader :location + + def initialize(target:, bodystmt:, location:) + @target = target + @bodystmt = bodystmt + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('sclass') + + q.breakable + q.pp(target) + + q.breakable + q.pp(bodystmt) + end + end + + def to_json(*opts) + { + type: :sclass, + target: target, + bodystmt: bodystmt, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_sclass: (untyped target, BodyStmt bodystmt) -> SClass + def on_sclass(target, bodystmt) + beginning = find_token(Kw, 'class') + ending = find_token(Kw, 'end') + + bodystmt.bind( + find_next_statement_start(target.location.end_char), + ending.location.start_char + ) + + SClass.new( + target: target, + bodystmt: bodystmt, + location: beginning.location.to(ending.location) + ) + end + + # def on_semicolon(value) + # value + # end + + # def on_sp(value) + # value + # end + + # stmts_add is a parser event that represents a single statement inside a + # list of statements within any lexical block. It accepts as arguments the + # parent stmts node as well as an stmt which can be any expression in + # Ruby. + def on_stmts_add(statements, statement) + statements << statement + end + + # Everything that has a block of code inside of it has a list of statements. + # Normally we would just track those as a node that has an array body, but we + # have some special handling in order to handle empty statement lists. They + # need to have the right location information, so all of the parent node of + # stmts nodes will report back down the location information. We then + # propagate that onto void_stmt nodes inside the stmts in order to make sure + # all comments get printed appropriately. + class Statements + # [SyntaxTree] the parser that created this node + attr_reader :parser + + # [Array[ untyped ]] the list of expressions contained within this node + attr_reader :body + + # [Location] the location of this node + attr_reader :location + + def initialize(parser:, body:, location:) + @parser = parser + @body = body + @location = location + end + + def bind(start_char, end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: end_char + ) + + if body[0].is_a?(VoidStmt) + location = body[0].location + location = + Location.new( + start_line: location.start_line, + start_char: start_char, + end_line: location.end_line, + end_char: start_char + ) + + body[0] = VoidStmt.new(location: location) + end + + attach_comments(start_char, end_char) + end + + def bind_end(end_char) + @location = + Location.new( + start_line: location.start_line, + start_char: location.start_char, + end_line: location.end_line, + end_char: end_char + ) + end + + def <<(statement) + @location = + body.any? ? location.to(statement.location) : statement.location + + body << statement + self + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('statements') + + q.breakable + q.seplist(body) { |statement| q.pp(statement) } + end + end + + def to_json(*opts) + { type: :statements, body: body, loc: location }.to_json(*opts) + end + + private + + def attach_comments(start_char, end_char) + attachable = + parser.comments.select do |comment| + !comment.inline? && start_char <= comment.location.start_char && + end_char >= comment.location.end_char && + !comment.value.include?('prettier-ignore') + end + + return if attachable.empty? + + parser.comments -= attachable + @body = (body + attachable).sort_by! { |node| node.location.start_char } + end + end + + # :call-seq: + # on_stmts_new: () -> Statements + def on_stmts_new + Statements.new( + parser: self, + body: [], + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # StringContent represents the contents of a string-like value. + # + # "string" + # + class StringContent + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # string + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + end + + # :call-seq: + # on_string_add: ( + # String string, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> StringContent + def on_string_add(string, part) + location = + string.parts.any? ? string.location.to(part.location) : part.location + + StringContent.new(parts: string.parts << part, location: location) + end + + # StringConcat represents concatenating two strings together using a backward + # slash. + # + # "first" \ + # "second" + # + class StringConcat + # [StringConcat | StringLiteral] the left side of the concatenation + attr_reader :left + + # [StringLiteral] the right side of the concatenation + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_concat') + + q.breakable + q.pp(left) + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :string_concat, left: left, right: right, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_string_concat: ( + # (StringConcat | StringLiteral) left, + # StringLiteral right + # ) -> StringConcat + def on_string_concat(left, right) + StringConcat.new( + left: left, + right: right, + location: left.location.to(right.location) + ) + end + + # :call-seq: + # on_string_content: () -> StringContent + def on_string_content + StringContent.new( + parts: [], + location: Location.fixed(line: lineno, char: char_pos) + ) + end + + # StringDVar represents shorthand interpolation of a variable into a string. + # It allows you to take an instance variable, class variable, or global + # variable and omit the braces when interpolating. + # + # "#@variable" + # + class StringDVar + # [Backref | VarRef] the variable being interpolated + attr_reader :variable + + # [Location] the location of this node + attr_reader :location + + def initialize(variable:, location:) + @variable = variable + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_dvar') + + q.breakable + q.pp(variable) + end + end + + def to_json(*opts) + { type: :string_dvar, var: variable, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_string_dvar: ((Backref | VarRef) variable) -> StringDVar + def on_string_dvar(variable) + embvar = find_token(EmbVar) + + StringDVar.new( + variable: variable, + location: embvar.location.to(variable.location) + ) + end + + # StringEmbExpr represents interpolated content. It can be contained within a + # couple of different parent nodes, including regular expressions, strings, + # and dynamic symbols. + # + # "string #{expression}" + # + class StringEmbExpr + # [Statements] the expressions to be interpolated + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(statements:, location:) + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_embexpr') + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { type: :string_embexpr, stmts: statements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_string_embexpr: (Statements statements) -> StringEmbExpr + def on_string_embexpr(statements) + embexpr_beg = find_token(EmbExprBeg) + embexpr_end = find_token(EmbExprEnd) + + statements.bind( + embexpr_beg.location.end_char, + embexpr_end.location.start_char + ) + + StringEmbExpr.new( + statements: statements, + location: embexpr_beg.location.to(embexpr_end.location) + ) + end + + # StringLiteral represents a string literal. + # + # "string" + # + class StringLiteral + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # string literal + attr_reader :parts + + # [String] which quote was used by the string literal + attr_reader :quote + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, quote:, location:) + @parts = parts + @quote = quote + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('string_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { + type: :string_literal, + parts: parts, + quote: quote, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_string_literal: (String string) -> Heredoc | StringLiteral + def on_string_literal(string) + heredoc = @heredocs[-1] + + if heredoc && heredoc.ending + heredoc = @heredocs.pop + + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: string.parts, + location: heredoc.location + ) + else + tstring_beg = find_token(TStringBeg) + tstring_end = find_token(TStringEnd) + + StringLiteral.new( + parts: string.parts, + quote: tstring_beg.value, + location: tstring_beg.location.to(tstring_end.location) + ) + end + end + + # Super represents using the +super+ keyword with arguments. It can optionally + # use parentheses. + # + # super(value) + # + class Super + # [ArgParen | Args | ArgsAddBlock] the arguments to the keyword + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('super') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :super, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_super: ((ArgParen | Args | ArgsAddBlock) arguments) -> Super + def on_super(arguments) + keyword = find_token(Kw, 'super') + + Super.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # SymBeg represents the beginning of a symbol literal. + # + # :symbol + # + # SymBeg is also used for dynamic symbols, as in: + # + # :"symbol" + # + # Finally, SymBeg is also used for symbols using the %s syntax, as in: + # + # %s[symbol] + # + # The value of this node is a string. In most cases (as in the first example + # above) it will contain just ":". In the case of dynamic symbols it will + # contain ":'" or ":\"". In the case of %s symbols, it will contain the start + # of the symbol including the %s and the delimiter. + class SymBeg + # [String] the beginning of the symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # symbeg is a token that represents the beginning of a symbol literal. + # In most cases it will contain just ":" as in the value, but if its a dynamic + # symbol being defined it will contain ":'" or ":\"". + def on_symbeg(value) + node = + SymBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # SymbolContent represents symbol contents and is always the child of a + # SymbolLiteral node. + # + # :symbol + # + class SymbolContent + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the + # symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_symbol: ( + # (Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op) value + # ) -> SymbolContent + def on_symbol(value) + tokens.pop + + SymbolContent.new(value: value, location: value.location) + end + + # SymbolLiteral represents a symbol in the system with no interpolation + # (as opposed to a DynaSymbol which has interpolation). + # + # :symbol + # + class SymbolLiteral + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the + # symbol + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('symbol_literal') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :symbol_literal, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_symbol_literal: ( + # ( + # Backtick | Const | CVar | GVar | Ident | + # IVar | Kw | Op | SymbolContent + # ) value + # ) -> SymbolLiteral + def on_symbol_literal(value) + if tokens[-1] == value + SymbolLiteral.new(value: tokens.pop, location: value.location) + else + symbeg = find_token(SymBeg) + + SymbolLiteral.new( + value: value.value, + location: symbeg.location.to(value.location) + ) + end + end + + # Symbols represents a symbol array literal with interpolation. + # + # %I[one two three] + # + class Symbols + # [Array[ Word ]] the words in the symbol array literal + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('symbols') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :symbols, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_symbols_add: (Symbols symbols, Word word) -> Symbols + def on_symbols_add(symbols, word) + Symbols.new( + elements: symbols.elements << word, + location: symbols.location.to(word.location) + ) + end + + # SymbolsBeg represents the start of a symbol array literal with + # interpolation. + # + # %I[one two three] + # + # In the snippet above, SymbolsBeg represents the "%I[" token. Note that these + # kinds of arrays can start with a lot of different delimiter types + # (e.g., %I| or %I<). + class SymbolsBeg + # [String] the beginning of the symbol literal array + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_symbols_beg: (String value) -> SymbolsBeg + def on_symbols_beg(value) + node = + SymbolsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_symbols_new: () -> Symbols + def on_symbols_new + symbols_beg = find_token(SymbolsBeg) + + Symbols.new(elements: [], location: symbols_beg.location) + end + + # TLambda represents the beginning of a lambda literal. + # + # -> { value } + # + # In the example above the TLambda represents the +->+ operator. + class TLambda + # [String] the beginning of the lambda literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tlambda: (String value) -> TLambda + def on_tlambda(value) + node = + TLambda.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TLamBeg represents the beginning of the body of a lambda literal using + # braces. + # + # -> { value } + # + # In the example above the TLamBeg represents the +{+ operator. + class TLamBeg + # [String] the beginning of the body of the lambda literal + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tlambeg: (String value) -> TLamBeg + def on_tlambeg(value) + node = + TLamBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TopConstField is always the child node of some kind of assignment. It + # represents when you're assigning to a constant that is being referenced at + # the top level. + # + # ::Constant = value + # + class TopConstField + # [Const] the constant being assigned + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('top_const_field') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :top_const_field, constant: constant, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_top_const_field: (Const constant) -> TopConstRef + def on_top_const_field(constant) + operator = find_colon2_before(constant) + + TopConstField.new( + constant: constant, + location: operator.location.to(constant.location) + ) + end + + # TopConstRef is very similar to TopConstField except that it is not involved + # in an assignment. + # + # ::Constant + # + class TopConstRef + # [Const] the constant being referenced + attr_reader :constant + + # [Location] the location of this node + attr_reader :location + + def initialize(constant:, location:) + @constant = constant + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('top_const_ref') + + q.breakable + q.pp(constant) + end + end + + def to_json(*opts) + { type: :top_const_ref, constant: constant, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_top_const_ref: (Const constant) -> TopConstRef + def on_top_const_ref(constant) + operator = find_colon2_before(constant) + + TopConstRef.new( + constant: constant, + location: operator.location.to(constant.location) + ) + end + + # TStringBeg represents the beginning of a string literal. + # + # "string" + # + # In the example above, TStringBeg represents the first set of quotes. Strings + # can also use single quotes. They can also be declared using the +%q+ and + # +%Q+ syntax, as in: + # + # %q{string} + # + class TStringBeg + # [String] the beginning of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tstring_beg: (String value) -> TStringBeg + def on_tstring_beg(value) + node = + TStringBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # TStringContent represents plain characters inside of an entity that accepts + # string content like a string, heredoc, command string, or regular + # expression. + # + # "string" + # + # In the example above, TStringContent represents the +string+ token contained + # within the string. + class TStringContent + # [String] the content of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('tstring_content') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { + type: :tstring_content, + value: value.force_encoding('UTF-8'), + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_tstring_content: (String value) -> TStringContent + def on_tstring_content(value) + TStringContent.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + end + + # TStringEnd represents the end of a string literal. + # + # "string" + # + # In the example above, TStringEnd represents the second set of quotes. + # Strings can also use single quotes. They can also be declared using the +%q+ + # and +%Q+ syntax, as in: + # + # %q{string} + # + class TStringEnd + # [String] the end of the string + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_tstring_end: (String value) -> TStringEnd + def on_tstring_end(value) + node = + TStringEnd.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # Not represents the unary +not+ method being called on an expression. + # + # not value + # + class Not + # [untyped] the statement on which to operate + attr_reader :statement + + # [boolean] whether or not parentheses were used + attr_reader :parentheses + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, parentheses:, location:) + @statement = statement + @parentheses = parentheses + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('not') + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { + type: :not, + value: statement, + paren: parentheses, + loc: location + }.to_json(*opts) + end + end + + # Unary represents a unary method being called on an expression, as in +!+ or + # +~+. + # + # !value + # + class Unary + # [String] the operator being used + attr_reader :operator + + # [untyped] the statement on which to operate + attr_reader :statement + + # [Location] the location of this node + attr_reader :location + + def initialize(operator:, statement:, location:) + @operator = operator + @statement = statement + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unary') + + q.breakable + q.pp(operator) + + q.breakable + q.pp(statement) + end + end + + def to_json(*opts) + { type: :unary, op: operator, value: statement, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_unary: (:not operator, untyped statement) -> Not + # | (Symbol operator, untyped statement) -> Unary + def on_unary(operator, statement) + if operator == :not + # We have somewhat special handling of the not operator since if it has + # parentheses they don't get reported as a paren node for some reason. + + beginning = find_token(Kw, 'not') + ending = statement + + range = beginning.location.end_char...statement.location.start_char + paren = source[range].include?('(') + + if paren + find_token(LParen) + ending = find_token(RParen) + end + + Not.new( + statement: statement, + parentheses: paren, + location: beginning.location.to(ending.location) + ) + else + # Special case instead of using find_token here. It turns out that + # if you have a range that goes from a negative number to a negative + # number then you can end up with a .. or a ... that's higher in the + # stack. So we need to explicitly disallow those operators. + index = + tokens.rindex do |token| + token.is_a?(Op) && + token.location.start_char < statement.location.start_char && + !%w[.. ...].include?(token.value) + end + + beginning = tokens.delete_at(index) + + Unary.new( + operator: operator[0], # :+@ -> "+" + statement: statement, + location: beginning.location.to(statement.location) + ) + end + end + + # Undef represents the use of the +undef+ keyword. + # + # undef method + # + class Undef + # [Array[ DynaSymbol | SymbolLiteral ]] the symbols to undefine + attr_reader :symbols + + # [Location] the location of this node + attr_reader :location + + def initialize(symbols:, location:) + @symbols = symbols + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('undef') + + q.breakable + q.group(2, '(', ')') { q.seplist(symbols) { |symbol| q.pp(symbol) } } + end + end + + def to_json(*opts) + { type: :undef, syms: symbols, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_undef: (Array[DynaSymbol | SymbolLiteral] symbols) -> Undef + def on_undef(symbols) + keyword = find_token(Kw, 'undef') + + Undef.new( + symbols: symbols, + location: keyword.location.to(symbols.last.location) + ) + end + + # Unless represents the first clause in an +unless+ chain. + # + # unless predicate + # end + # + class Unless + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil, Elsif, Else] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, consequent:, location:) + @predicate = predicate + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unless') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :unless, + pred: predicate, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_unless: ( + # untyped predicate, + # Statements statements, + # ((nil | Elsif | Else) consequent) + # ) -> Unless + def on_unless(predicate, statements, consequent) + beginning = find_token(Kw, 'unless') + ending = consequent || find_token(Kw, 'end') + + statements.bind(predicate.location.end_char, ending.location.start_char) + + Unless.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # UnlessMod represents the modifier form of an +unless+ statement. + # + # expression unless predicate + # + class UnlessMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('unless_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :unless_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_unless_mod: (untyped predicate, untyped statement) -> UnlessMod + def on_unless_mod(predicate, statement) + find_token(Kw, 'unless') + + UnlessMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # Until represents an +until+ loop. + # + # until predicate + # end + # + class Until + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, location:) + @predicate = predicate + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('until') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :until, + pred: predicate, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_until: (untyped predicate, Statements statements) -> Until + def on_until(predicate, statements) + beginning = find_token(Kw, 'until') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > predicate.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + # Update the Statements location information + statements.bind(predicate.location.end_char, ending.location.start_char) + + Until.new( + predicate: predicate, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # UntilMod represents the modifier form of a +until+ loop. + # + # expression until predicate + # + class UntilMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('until_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :until_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_until_mod: (untyped predicate, untyped statement) -> UntilMod + def on_until_mod(predicate, statement) + find_token(Kw, 'until') + + UntilMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # VarAlias represents when you're using the +alias+ keyword with global + # variable arguments. + # + # alias $new $old + # + class VarAlias + # [GVar] the new alias of the variable + attr_reader :left + + # [Backref | GVar] the current name of the variable to be aliased + attr_reader :right + + # [Location] the location of this node + attr_reader :location + + def initialize(left:, right:, location:) + @left = left + @right = right + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_alias') + + q.breakable + q.pp(left) + + q.breakable + q.pp(right) + end + end + + def to_json(*opts) + { type: :var_alias, left: left, right: right, loc: location }.to_json( + *opts + ) + end + end + + # :call-seq: + # on_var_alias: (GVar left, (Backref | GVar) right) -> VarAlias + def on_var_alias(left, right) + keyword = find_token(Kw, 'alias') + + VarAlias.new( + left: left, + right: right, + location: keyword.location.to(right.location) + ) + end + + # VarField represents a variable that is being assigned a value. As such, it + # is always a child of an assignment type node. + # + # variable = value + # + # In the example above, the VarField node represents the +variable+ token. + class VarField + # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_field') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :var_field, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_var_field: ( + # (nil | Const | CVar | GVar | Ident | IVar) value + # ) -> VarField + def on_var_field(value) + location = + if value + value.location + else + # You can hit this pattern if you're assigning to a splat using pattern + # matching syntax in Ruby 2.7+ + Location.fixed(line: lineno, char: char_pos) + end + + VarField.new(value: value, location: location) + end + + # VarRef represents a variable reference. + # + # true + # + # This can be a plain local variable like the example above. It can also be a + # constant, a class variable, a global variable, an instance variable, a + # keyword (like +self+, +nil+, +true+, or +false+), or a numbered block + # variable. + class VarRef + # [Const | CVar | GVar | Ident | IVar | Kw] the value of this node + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('var_ref') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :var_ref, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_var_ref: ((Const | CVar | GVar | Ident | IVar | Kw) value) -> VarRef + def on_var_ref(value) + VarRef.new(value: value, location: value.location) + end + + # AccessCtrl represents a call to a method visibility control, i.e., +public+, + # +protected+, or +private+. + # + # private + # + class AccessCtrl + # [Ident] the value of this expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('access_ctrl') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :access_ctrl, value: value, loc: location }.to_json(*opts) + end + end + + # VCall represent any plain named object with Ruby that could be either a + # local variable or a method call. + # + # variable + # + class VCall + # [Ident] the value of this expression + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('vcall') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :vcall, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_vcall: (Ident ident) -> AccessCtrl | VCall + def on_vcall(ident) + @controls ||= %w[private protected public].freeze + + if @controls.include?(ident.value) && ident.value == lines[lineno - 1].strip + # Access controls like private, protected, and public are reported as + # vcall nodes since they're technically method calls. We want to be able + # add new lines around them as necessary, so here we're going to + # explicitly track those as a different node type. + AccessCtrl.new(value: ident, location: ident.location) + else + VCall.new(value: ident, location: ident.location) + end + end + + # VoidStmt represents an empty lexical block of code. + # + # ;; + # + class VoidStmt + # [Location] the location of this node + attr_reader :location + + def initialize(location:) + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') { q.text('void_stmt') } + end + + def to_json(*opts) + { type: :void_stmt, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_void_stmt: () -> VoidStmt + def on_void_stmt + VoidStmt.new(location: Location.fixed(line: lineno, char: char_pos)) + end + + # When represents a +when+ clause in a +case+ chain. + # + # case value + # when predicate + # end + # + class When + # [untyped] the arguments to the when clause + attr_reader :arguments + + # [Statements] the expressions to be executed + attr_reader :statements + + # [nil | Else | When] the next clause in the chain + attr_reader :consequent + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, statements:, consequent:, location:) + @arguments = arguments + @statements = statements + @consequent = consequent + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('when') + + q.breakable + q.pp(arguments) + + q.breakable + q.pp(statements) + + if consequent + q.breakable + q.pp(consequent) + end + end + end + + def to_json(*opts) + { + type: :when, + args: arguments, + stmts: statements, + cons: consequent, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_when: ( + # untyped arguments, + # Statements statements, + # (nil | Else | When) consequent + # ) -> When + def on_when(arguments, statements, consequent) + beginning = find_token(Kw, 'when') + ending = consequent || find_token(Kw, 'end') + + statements.bind(arguments.location.end_char, ending.location.start_char) + + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) + end + + # While represents a +while+ loop. + # + # while predicate + # end + # + class While + # [untyped] the expression to be checked + attr_reader :predicate + + # [Statements] the expressions to be executed + attr_reader :statements + + # [Location] the location of this node + attr_reader :location + + def initialize(predicate:, statements:, location:) + @predicate = predicate + @statements = statements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('while') + + q.breakable + q.pp(predicate) + + q.breakable + q.pp(statements) + end + end + + def to_json(*opts) + { + type: :while, + pred: predicate, + stmts: statements, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_while: (untyped predicate, Statements statements) -> While + def on_while(predicate, statements) + beginning = find_token(Kw, 'while') + ending = find_token(Kw, 'end') + + # Consume the do keyword if it exists so that it doesn't get confused for + # some other block + keyword = find_token(Kw, 'do', consume: false) + if keyword && keyword.location.start_char > predicate.location.end_char && + keyword.location.end_char < ending.location.start_char + tokens.delete(keyword) + end + + # Update the Statements location information + statements.bind(predicate.location.end_char, ending.location.start_char) + + While.new( + predicate: predicate, + statements: statements, + location: beginning.location.to(ending.location) + ) + end + + # WhileMod represents the modifier form of a +while+ loop. + # + # expression while predicate + # + class WhileMod + # [untyped] the expression to be executed + attr_reader :statement + + # [untyped] the expression to be checked + attr_reader :predicate + + # [Location] the location of this node + attr_reader :location + + def initialize(statement:, predicate:, location:) + @statement = statement + @predicate = predicate + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('while_mod') + + q.breakable + q.pp(statement) + + q.breakable + q.pp(predicate) + end + end + + def to_json(*opts) + { + type: :while_mod, + stmt: statement, + pred: predicate, + loc: location + }.to_json(*opts) + end + end + + # :call-seq: + # on_while_mod: (untyped predicate, untyped statement) -> WhileMod + def on_while_mod(predicate, statement) + find_token(Kw, 'while') + + WhileMod.new( + statement: statement, + predicate: predicate, + location: statement.location.to(predicate.location) + ) + end + + # Word represents an element within a special array literal that accepts + # interpolation. + # + # %W[a#{b}c xyz] + # + # In the example above, there would be two Word nodes within a parent Words + # node. + class Word + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # word + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('word') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :word, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_word_add: ( + # Word word, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> Word + def on_word_add(word, part) + location = + word.parts.empty? ? part.location : word.location.to(part.location) + + Word.new(parts: word.parts << part, location: location) + end + + # :call-seq: + # on_word_new: () -> Word + def on_word_new + Word.new(parts: [], location: Location.fixed(line: lineno, char: char_pos)) + end + + # Words represents a string literal array with interpolation. + # + # %W[one two three] + # + class Words + # [Array[ Word ]] the elements of this array + attr_reader :elements + + # [Location] the location of this node + attr_reader :location + + def initialize(elements:, location:) + @elements = elements + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('words') + + q.breakable + q.group(2, '(', ')') { q.seplist(elements) { |element| q.pp(element) } } + end + end + + def to_json(*opts) + { type: :words, elems: elements, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_words_add: (Words words, Word word) -> Words + def on_words_add(words, word) + Words.new( + elements: words.elements << word, + location: words.location.to(word.location) + ) + end + + # WordsBeg represents the beginning of a string literal array with + # interpolation. + # + # %W[one two three] + # + # In the snippet above, a WordsBeg would be created with the value of "%W[". + # Note that these kinds of arrays can start with a lot of different delimiter + # types (e.g., %W| or %W<). + class WordsBeg + # [String] the start of the word literal array + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + end + + # :call-seq: + # on_words_beg: (String value) -> WordsBeg + def on_words_beg(value) + node = + WordsBeg.new( + value: value, + location: Location.token(line: lineno, char: char_pos, size: value.size) + ) + + tokens << node + node + end + + # :call-seq: + # on_words_new: () -> Words + def on_words_new + words_beg = find_token(WordsBeg) + + Words.new(elements: [], location: words_beg.location) + end + + # def on_words_sep(value) + # value + # end + + # XString represents the contents of an XStringLiteral. + # + # `ls` + # + class XString + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # xstring + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + end + + # :call-seq: + # on_xstring_add: ( + # XString xstring, + # (StringEmbExpr | StringDVar | TStringContent) part + # ) -> XString + def on_xstring_add(xstring, part) + XString.new( + parts: xstring.parts << part, + location: xstring.location.to(part.location) + ) + end + + # :call-seq: + # on_xstring_new: () -> XString + def on_xstring_new + heredoc = @heredocs[-1] + + location = + if heredoc && heredoc.beginning.value.include?('`') + heredoc.location + else + find_token(Backtick).location + end + + XString.new(parts: [], location: location) + end + + # XStringLiteral represents a string that gets executed. + # + # `ls` + # + class XStringLiteral + # [Array[ StringEmbExpr | StringDVar | TStringContent ]] the parts of the + # xstring + attr_reader :parts + + # [Location] the location of this node + attr_reader :location + + def initialize(parts:, location:) + @parts = parts + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('xstring_literal') + + q.breakable + q.group(2, '(', ')') { q.seplist(parts) { |part| q.pp(part) } } + end + end + + def to_json(*opts) + { type: :xstring_literal, parts: parts, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_xstring_literal: (XString xstring) -> Heredoc | XStringLiteral + def on_xstring_literal(xstring) + heredoc = @heredocs[-1] + + if heredoc && heredoc.beginning.value.include?('`') + Heredoc.new( + beginning: heredoc.beginning, + ending: heredoc.ending, + parts: xstring.parts, + location: heredoc.location + ) + else + ending = find_token(TStringEnd) + + XStringLiteral.new( + parts: xstring.parts, + location: xstring.location.to(ending.location) + ) + end + end + + # Yield represents using the +yield+ keyword with arguments. + # + # yield value + # + class Yield + # [ArgsAddBlock | Paren] the arguments passed to the yield + attr_reader :arguments + + # [Location] the location of this node + attr_reader :location + + def initialize(arguments:, location:) + @arguments = arguments + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('yield') + + q.breakable + q.pp(arguments) + end + end + + def to_json(*opts) + { type: :yield, args: arguments, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_yield: ((ArgsAddBlock | Paren) arguments) -> Yield + def on_yield(arguments) + keyword = find_token(Kw, 'yield') + + Yield.new( + arguments: arguments, + location: keyword.location.to(arguments.location) + ) + end + + # Yield0 represents the bare +yield+ keyword with no arguments. + # + # yield + # + class Yield0 + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of this node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('yield0') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :yield0, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_yield0: () -> Yield0 + def on_yield0 + keyword = find_token(Kw, 'yield') + + Yield0.new(value: keyword.value, location: keyword.location) + end + + # ZSuper represents the bare +super+ keyword with no arguments. + # + # super + # + class ZSuper + # [String] the value of the keyword + attr_reader :value + + # [Location] the location of the node + attr_reader :location + + def initialize(value:, location:) + @value = value + @location = location + end + + def pretty_print(q) + q.group(2, '(', ')') do + q.text('zsuper') + + q.breakable + q.pp(value) + end + end + + def to_json(*opts) + { type: :zsuper, value: value, loc: location }.to_json(*opts) + end + end + + # :call-seq: + # on_zsuper: () -> ZSuper + def on_zsuper + keyword = find_token(Kw, 'super') + + ZSuper.new(value: keyword.value, location: keyword.location) + end +end diff --git a/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt b/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt new file mode 100644 index 00000000000000..e62fd3fa6632ad --- /dev/null +++ b/spec/syntax_suggest/fixtures/this_project_extra_def.rb.txt @@ -0,0 +1,64 @@ +module SyntaxErrorSearch + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(block_array, io: $stderr, filename: nil) + @filename = filename + @io = io + @blocks = block_array + @lines = @blocks.map(&:lines).flatten + @digit_count = @lines.last.line_number.to_s.length + @code_lines = @blocks.first.code_lines + + @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true} + end + + def call + @io.puts <<~EOM + + SyntaxSuggest: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line: + EOM + + @io.puts(<<~EOM) if filename + file: #{filename} + EOM + + @io.puts <<~EOM + #{code_with_filename} + EOM + end + + def filename + + def code_with_filename + string = String.new("") + string << "```\n" + string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename + string << code_with_lines + string << "```\n" + string + end + + def code_with_lines + @code_lines.map do |line| + next if line.hidden? + number = line.line_number.to_s.rjust(@digit_count) + if line.empty? + "#{number.to_s}#{line}" + else + string = String.new + string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics + string << "#{number.to_s} " + string << line.to_s + string << "\e[0m" + string + end + end.join + end + end +end diff --git a/spec/syntax_suggest/fixtures/webmock.rb.txt b/spec/syntax_suggest/fixtures/webmock.rb.txt new file mode 100644 index 00000000000000..16da0d2ac0701c --- /dev/null +++ b/spec/syntax_suggest/fixtures/webmock.rb.txt @@ -0,0 +1,35 @@ +describe "webmock tests" do + before(:each) do + WebMock.enable! + end + + after(:each) do + WebMock.disable! + end + + it "port" do + port = rand(1000...9999) + stub_request(:any, "localhost:#{port}") + + query = Cutlass::FunctionQuery.new( + port: port + ).call + + expect(WebMock).to have_requested(:post, "localhost:#{port}"). + with(body: "{}") + end + + it "body" do + body = { lol: "hi" } + port = 8080 + stub_request(:any, "localhost:#{port}") + + query = Cutlass::FunctionQuery.new( + port: port + body: body + ).call + + expect(WebMock).to have_requested(:post, "localhost:#{port}"). + with(body: body.to_json) + end +end diff --git a/spec/syntax_suggest/integration/exe_cli_spec.rb b/spec/syntax_suggest/integration/exe_cli_spec.rb new file mode 100644 index 00000000000000..f0b49b4386f100 --- /dev/null +++ b/spec/syntax_suggest/integration/exe_cli_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "exe" do + def exe_path + if ruby_core? + root_dir.join("../libexec").join("syntax_suggest") + else + root_dir.join("exe").join("syntax_suggest") + end + end + + def exe(cmd) + out = run!("#{exe_path} #{cmd}", raise_on_nonzero_exit: false) + puts out if ENV["SYNTAX_SUGGEST_DEBUG"] + out + end + + it "prints the version" do + out = exe("-v") + expect(out.strip).to include(SyntaxSuggest::VERSION) + end + end +end diff --git a/spec/syntax_suggest/integration/ruby_command_line_spec.rb b/spec/syntax_suggest/integration/ruby_command_line_spec.rb new file mode 100644 index 00000000000000..6ed1bf0bf728c0 --- /dev/null +++ b/spec/syntax_suggest/integration/ruby_command_line_spec.rb @@ -0,0 +1,154 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "Requires with ruby cli" do + it "namespaces all monkeypatched methods" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~'EOM' + puts Kernel.private_methods + EOM + + syntax_suggest_methods_file = tmpdir.join("syntax_suggest_methods.txt") + api_only_methods_file = tmpdir.join("api_only_methods.txt") + kernel_methods_file = tmpdir.join("kernel_methods.txt") + + d_pid = Process.spawn("ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1 > #{syntax_suggest_methods_file}") + k_pid = Process.spawn("ruby #{script} 2>&1 >> #{kernel_methods_file}") + r_pid = Process.spawn("ruby -I#{lib_dir} -rsyntax_suggest/api #{script} 2>&1 > #{api_only_methods_file}") + + Process.wait(k_pid) + Process.wait(d_pid) + Process.wait(r_pid) + + kernel_methods_array = kernel_methods_file.read.strip.lines.map(&:strip) + syntax_suggest_methods_array = syntax_suggest_methods_file.read.strip.lines.map(&:strip) + api_only_methods_array = api_only_methods_file.read.strip.lines.map(&:strip) + + # In ruby 3.1.0-preview1 the `timeout` file is already required + # we can remove it if it exists to normalize the output for + # all ruby versions + [syntax_suggest_methods_array, kernel_methods_array, api_only_methods_array].each do |array| + array.delete("timeout") + end + + methods = (syntax_suggest_methods_array - kernel_methods_array).sort + if methods.any? + expect(methods).to eq(["syntax_suggest_original_load", "syntax_suggest_original_require", "syntax_suggest_original_require_relative"]) + end + + methods = (api_only_methods_array - kernel_methods_array).sort + expect(methods).to eq([]) + end + end + + it "detects require error and adds a message with auto mode" do + skip if ruby_core? + + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + describe "things" do + it "blerg" do + end + + it "flerg" + end + + it "zlerg" do + end + end + EOM + + require_rb = tmpdir.join("require.rb") + require_rb.write <<~EOM + load "#{script.expand_path}" + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{require_rb} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include('❯ 5 it "flerg"').once + end + end + + it "annotates a syntax error in Ruby 3.2+ when require is not used" do + pending("Support for SyntaxError#detailed_message monkeypatch needed https://gist.github.com/schneems/09f45cc23b9a8c46e9af6acbb6e6840d?permalink_comment_id=4172585#gistcomment-4172585") + + skip if ruby_core? + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + describe "things" do + it "blerg" do + end + + it "flerg" + end + + it "zlerg" do + end + end + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include('❯ 5 it "flerg"').once + end + end + + it "does not load internals into memory if no syntax error" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~EOM + class Dog + end + + if defined?(SyntaxSuggest::DEFAULT_VALUE) + puts "SyntaxSuggest is loaded" + else + puts "SyntaxSuggest is NOT loaded" + end + EOM + + require_rb = tmpdir.join("require.rb") + require_rb.write <<~EOM + load "#{script.expand_path}" + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{require_rb} 2>&1` + + expect($?.success?).to be_truthy + expect(out).to include("SyntaxSuggest is NOT loaded").once + end + end + + it "ignores eval" do + Dir.mktmpdir do |dir| + tmpdir = Pathname(dir) + script = tmpdir.join("script.rb") + script.write <<~'EOM' + $stderr = STDOUT + eval("def lol") + EOM + + out = `ruby -I#{lib_dir} -rsyntax_suggest #{script} 2>&1` + + expect($?.success?).to be_falsey + expect(out).to include("(eval):1") + + expect(out).to_not include("SyntaxSuggest") + expect(out).to_not include("Could not find filename") + end + end + end +end diff --git a/spec/syntax_suggest/integration/syntax_suggest_spec.rb b/spec/syntax_suggest/integration/syntax_suggest_spec.rb new file mode 100644 index 00000000000000..a7287ff64e02a6 --- /dev/null +++ b/spec/syntax_suggest/integration/syntax_suggest_spec.rb @@ -0,0 +1,211 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "Integration tests that don't spawn a process (like using the cli)" do + it "does not timeout on massive files" do + next unless ENV["SYNTAX_SUGGEST_TIMEOUT"] + + file = fixtures_dir.join("syntax_tree.rb.txt") + lines = file.read.lines + lines.delete_at(768 - 1) + + io = StringIO.new + + benchmark = Benchmark.measure do + debug_perf do + SyntaxSuggest.call( + io: io, + source: lines.join, + filename: file + ) + end + debug_display(io.string) + debug_display(benchmark) + end + + expect(io.string).to include(<<~'EOM') + 6 class SyntaxTree < Ripper + 170 def self.parse(source) + 174 end + ❯ 754 def on_args_add(arguments, argument) + ❯ 776 class ArgsAddBlock + ❯ 810 end + 9233 end + EOM + end + + it "re-checks all block code, not just what's visible issues/95" do + file = fixtures_dir.join("ruby_buildpack.rb.txt") + io = StringIO.new + + debug_perf do + benchmark = Benchmark.measure do + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file + ) + end + debug_display(io.string) + debug_display(benchmark) + end + + expect(io.string).to_not include("def ruby_install_binstub_path") + expect(io.string).to include(<<~'EOM') + ❯ 1067 def add_yarn_binary + ❯ 1068 return [] if yarn_preinstalled? + ❯ 1069 | + ❯ 1075 end + EOM + end + + it "returns good results on routes.rb" do + source = fixtures_dir.join("routes.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 1 Rails.application.routes.draw do + ❯ 113 namespace :admin do + ❯ 116 match "/foobar(*path)", via: :all, to: redirect { |_params, req| + ❯ 120 } + 121 end + EOM + end + + it "handles multi-line-methods issues/64" do + source = fixtures_dir.join("webmock.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 1 describe "webmock tests" do + 22 it "body" do + 27 query = Cutlass::FunctionQuery.new( + ❯ 28 port: port + ❯ 29 body: body + 30 ).call + 34 end + 35 end + EOM + end + + it "handles derailed output issues/50" do + source = fixtures_dir.join("derailed_require_tree.rb.txt").read + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + debug_display(io.string) + + expect(io.string).to include(<<~'EOM') + 5 module DerailedBenchmarks + 6 class RequireTree + 7 REQUIRED_BY = {} + 9 attr_reader :name + 10 attr_writer :cost + ❯ 13 def initialize(name) + ❯ 18 def self.reset! + ❯ 25 end + 73 end + 74 end + EOM + end + + it "handles heredocs" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(85 - 1) + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: lines.join + ) + + out = io.string + debug_display(out) + + expect(out).to include(<<~EOM) + 16 class Rexe + ❯ 77 class Lookups + ❯ 78 def input_modes + ❯ 148 end + 551 end + EOM + end + + it "rexe" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + 16 class Rexe + 18 VERSION = '1.5.1' + ❯ 77 class Lookups + ❯ 140 def format_requires + ❯ 148 end + 551 end + EOM + end + + it "ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + ❯ 1 def call # 0 + ❯ 3 end # one # 2 + ❯ 4 end # two # 3 + EOM + end + + it "simple regression" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + io = StringIO.new + SyntaxSuggest.call( + io: io, + source: source + ) + out = io.string + expect(out).to include(<<~EOM) + ❯ 1 class Dog + ❯ 2 def bark + ❯ 4 end + EOM + end + end +end diff --git a/spec/syntax_suggest/spec_helper.rb b/spec/syntax_suggest/spec_helper.rb new file mode 100644 index 00000000000000..e78dee76bc1298 --- /dev/null +++ b/spec/syntax_suggest/spec_helper.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +require "bundler/setup" +require "syntax_suggest/api" + +require "benchmark" +require "tempfile" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end + +# Used for debugging modifications to +# display output +def debug_display(output) + return unless ENV["DEBUG_DISPLAY"] + puts + puts output + puts +end + +def spec_dir + Pathname(__dir__) +end + +def lib_dir + root_dir.join("lib") +end + +def root_dir + spec_dir.join("..") +end + +def fixtures_dir + spec_dir.join("fixtures") +end + +def ruby_core? + !root_dir.join("syntax_suggest.gemspec").exist? +end + +def code_line_array(source) + SyntaxSuggest::CleanDocument.new(source: source).call.lines +end + +autoload :RubyProf, "ruby-prof" + +def debug_perf + raise "No block given" unless block_given? + + if ENV["DEBUG_PERF"] + out = nil + result = RubyProf.profile do + out = yield + end + + dir = SyntaxSuggest.record_dir("tmp") + printer = RubyProf::MultiPrinter.new(result, [:flat, :graph, :graph_html, :tree, :call_tree, :stack, :dot]) + printer.print(path: dir, profile: "profile") + + out + else + yield + end +end + +def run!(cmd, raise_on_nonzero_exit: true) + out = `#{cmd} 2>&1` + raise "Command: #{cmd} failed: #{out}" if !$?.success? && raise_on_nonzero_exit + out +end + +# Allows us to write cleaner tests since <<~EOM block quotes +# strip off all leading indentation and we need it to be preserved +# sometimes. +class String + def indent(number) + lines.map do |line| + if line.chomp.empty? + line + else + " " * number + line + end + end.join + end +end diff --git a/spec/syntax_suggest/unit/api_spec.rb b/spec/syntax_suggest/unit/api_spec.rb new file mode 100644 index 00000000000000..21df86bb3e7c8f --- /dev/null +++ b/spec/syntax_suggest/unit/api_spec.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" +begin + require "ruby-prof" +rescue LoadError +end + +module SyntaxSuggest + RSpec.describe "Top level SyntaxSuggest api" do + it "has a `handle_error` interface" do + fake_error = Object.new + def fake_error.message + "#{__FILE__}:216: unterminated string meets end of file " + end + + def fake_error.is_a?(v) + true + end + + io = StringIO.new + SyntaxSuggest.handle_error( + fake_error, + re_raise: false, + io: io + ) + + expect(io.string.strip).to eq("Syntax OK") + end + + it "raises original error with warning if a non-syntax error is passed" do + error = NameError.new("blerg") + io = StringIO.new + expect { + SyntaxSuggest.handle_error( + error, + re_raise: false, + io: io + ) + }.to raise_error { |e| + expect(io.string).to include("Must pass a SyntaxError") + expect(e).to eq(error) + } + end + + it "raises original error with warning if file is not found" do + fake_error = SyntaxError.new + def fake_error.message + "#does/not/exist/lol/doesnotexist:216: unterminated string meets end of file " + end + + io = StringIO.new + expect { + SyntaxSuggest.handle_error( + fake_error, + re_raise: false, + io: io + ) + }.to raise_error { |e| + expect(io.string).to include("Could not find filename") + expect(e).to eq(fake_error) + } + end + + it "respects highlight API" do + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + error = SyntaxError.new("#{fixtures_dir.join("this_project_extra_def.rb.txt")}:1 ") + + require "syntax_suggest/core_ext" + + expect(error.detailed_message(highlight: true)).to include(SyntaxSuggest::DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT) + expect(error.detailed_message(highlight: false)).to_not include(SyntaxSuggest::DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT) + end + + it "can be disabled via falsey kwarg" do + skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") + + error = SyntaxError.new("#{fixtures_dir.join("this_project_extra_def.rb.txt")}:1 ") + + require "syntax_suggest/core_ext" + + expect(error.detailed_message(syntax_suggest: true)).to_not eq(error.detailed_message(syntax_suggest: false)) + end + end +end diff --git a/spec/syntax_suggest/unit/around_block_scan_spec.rb b/spec/syntax_suggest/unit/around_block_scan_spec.rb new file mode 100644 index 00000000000000..6053c3947e4351 --- /dev/null +++ b/spec/syntax_suggest/unit/around_block_scan_spec.rb @@ -0,0 +1,165 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe AroundBlockScan do + it "continues scan from last location even if scan is false" do + source = <<~'EOM' + print 'omg' + print 'lol' + print 'haha' + EOM + code_lines = CodeLine.from_source(source) + block = CodeBlock.new(lines: code_lines[1]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .scan_neighbors + + expect(expand.code_block.to_s).to eq(source) + expand.scan_while { |line| false } + + expect(expand.code_block.to_s).to eq(source) + end + + it "scan_adjacent_indent works on first or last line" do + source_string = <<~EOM + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[4]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .scan_adjacent_indent + + expect(expand.code_block.to_s).to eq(<<~EOM) + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + end + + it "expands indentation" do + source_string = <<~EOM + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[2]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .stop_after_kw + .scan_adjacent_indent + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(2)) + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + EOM + end + + it "can stop before hitting another end" do + source_string = <<~EOM + def lol + end + def foo + puts "lol" + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.stop_after_kw + expand.scan_while { true } + + expect(expand.code_block.to_s).to eq(<<~EOM) + def foo + puts "lol" + end + EOM + end + + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while { true } + + expect(expand.before_index).to eq(0) + expect(expand.after_index).to eq(6) + expect(expand.code_block.to_s).to eq(source_string) + end + + it "only takes what you ask" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while { |line| line.not_empty? } + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + puts "lol" + EOM + end + + it "skips what you want" do + source_string = <<~EOM + def foo + Foo.call + + puts "haha" + # hide me + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + code_lines[4].mark_invisible + + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.skip(:empty?) + expand.skip(:hidden?) + expand.scan_neighbors + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + + puts "haha" + + puts "lol" + + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/block_expand_spec.rb b/spec/syntax_suggest/unit/block_expand_spec.rb new file mode 100644 index 00000000000000..ba0b0457a1d613 --- /dev/null +++ b/spec/syntax_suggest/unit/block_expand_spec.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe BlockExpand do + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + # hidden + end + end + EOM + + code_lines = code_line_array(source_string) + + code_lines[6].mark_invisible + + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "captures multiple empty lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "expands neighbors then indentation" do + source_string = <<~EOM + def foo + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + puts "hey" + puts "lol" + puts "sup" + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + EOM + end + + it "handles else code" do + source_string = <<~EOM + Foo.call + if blerg + puts "lol" + else + puts "haha" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[2]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + if blerg + puts "lol" + else + puts "haha" + end + EOM + end + + it "expand until next boundry (indentation)" do + source_string = <<~EOM + describe "what" do + Foo.call + end + + describe "hi" + Bar.call do + Foo.call + end + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + + block = CodeBlock.new( + lines: code_lines[6] + ) + + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Bar.call do + Foo.call + end + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "hi" + Bar.call do + Foo.call + end + end + EOM + end + + it "expand until next boundry (empty lines)" do + source_string = <<~EOM + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + expansion = BlockExpand.new(code_lines: code_lines) + + block = CodeBlock.new(lines: code_lines[3]) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + + describe "hi" + end + + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/capture_code_context_spec.rb b/spec/syntax_suggest/unit/capture_code_context_spec.rb new file mode 100644 index 00000000000000..e1bc281c13edd3 --- /dev/null +++ b/spec/syntax_suggest/unit/capture_code_context_spec.rb @@ -0,0 +1,202 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CaptureCodeContext do + it "capture_before_after_kws" do + source = <<~'EOM' + def sit + end + + def bark + + def eat + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[0]) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + expect(lines.join).to eq(<<~'EOM') + def sit + end + def bark + def eat + end + EOM + end + + it "handles ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + + code_lines = CleanDocument.new(source: source).call.lines + code_lines[0..2].each(&:mark_invisible) + block = CodeBlock.new(lines: code_lines) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + + lines = lines.sort.map(&:original) + + expect(lines.join).to eq(<<~'EOM') + def call # 0 + end # one # 2 + end # two # 3 + EOM + end + + it "shows ends of captured block" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + code_lines = CleanDocument.new(source: source).call.lines + + code_lines[0..75].each(&:mark_invisible) + code_lines[77..-1].each(&:mark_invisible) + expect(code_lines.join.strip).to eq("class Lookups") + + block = CodeBlock.new(lines: code_lines[76..149]) + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call + + lines = lines.sort.map(&:original) + expect(lines.join).to include(<<~'EOM'.indent(2)) + class Lookups + def format_requires + end + EOM + end + + it "shows ends of captured block" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines) + code_lines[1..-1].each(&:mark_invisible) + + expect(block.to_s.strip).to eq("class Dog") + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call.sort.map(&:original) + expect(lines.join).to eq(<<~'EOM') + class Dog + def bark + end + EOM + end + + it "captures surrounding context on falling indent" do + source = <<~'EOM' + class Blerg + end + + class OH + + def hello + it "foo" do + end + end + + class Zerg + end + EOM + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[6]) + + expect(block.to_s.strip).to eq('it "foo" do') + + display = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + lines = display.call.sort.map(&:original) + expect(lines.join).to eq(<<~'EOM') + class OH + def hello + it "foo" do + end + end + EOM + end + + it "captures surrounding context on same indent" do + source = <<~'EOM' + class Blerg + end + class OH + + def nope + end + + def lol + end + + end # here + + def haha + end + + def nope + end + end + + class Zerg + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[7..10]) + expect(block.to_s).to eq(<<~'EOM'.indent(2)) + def lol + end + + end # here + EOM + + code_context = CaptureCodeContext.new( + blocks: [block], + code_lines: code_lines + ) + + lines = code_context.call + out = DisplayCodeWithLineNumbers.new( + lines: lines + ).call + + expect(out).to eq(<<~'EOM'.indent(2)) + 3 class OH + 8 def lol + 9 end + 11 end # here + 18 end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/clean_document_spec.rb b/spec/syntax_suggest/unit/clean_document_spec.rb new file mode 100644 index 00000000000000..fa049ad8df5773 --- /dev/null +++ b/spec/syntax_suggest/unit/clean_document_spec.rb @@ -0,0 +1,259 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CleanDocument do + it "heredocs" do + source = fixtures_dir.join("this_project_extra_def.rb.txt").read + code_lines = CleanDocument.new(source: source).call.lines + + expect(code_lines[18 - 1].to_s).to eq(<<-'EOL') + @io.puts <<~EOM + + SyntaxSuggest: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line: + EOM + EOL + expect(code_lines[18].to_s).to eq("") + + expect(code_lines[27 - 1].to_s).to eq(<<-'EOL') + @io.puts(<<~EOM) if filename + file: #{filename} + EOM + EOL + expect(code_lines[27].to_s).to eq("") + + expect(code_lines[31 - 1].to_s).to eq(<<-'EOL') + @io.puts <<~EOM + #{code_with_filename} + EOM + EOL + expect(code_lines[31].to_s).to eq("") + end + + it "joins: multi line methods" do + source = <<~EOM + User + .where(name: 'schneems') + .first + EOM + + doc = CleanDocument.new(source: source).join_consecutive! + + expect(doc.lines[0].to_s).to eq(source) + expect(doc.lines[1].to_s).to eq("") + expect(doc.lines[2].to_s).to eq("") + expect(doc.lines[3]).to eq(nil) + + lines = doc.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: lines + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 User + 2 .where(name: 'schneems') + 3 .first + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: lines, + highlight_lines: lines[0] + ).call + ).to eq(<<~'EOM') + ❯ 1 User + ❯ 2 .where(name: 'schneems') + ❯ 3 .first + EOM + end + + it "helper method: take_while_including" do + source = <<~EOM + User + .where(name: 'schneems') + .first + EOM + + doc = CleanDocument.new(source: source) + + lines = doc.take_while_including { |line| !line.to_s.include?("where") } + expect(lines.count).to eq(2) + end + + it "comments: removes comments" do + source = <<~EOM + # lol + puts "what" + # yolo + EOM + + out = CleanDocument.new(source: source).lines.join + expect(out.to_s).to eq(<<~EOM) + + puts "what" + + EOM + end + + it "whitespace: removes whitespace" do + source = " \n" + <<~EOM + puts "what" + EOM + + out = CleanDocument.new(source: source).lines.join + expect(out.to_s).to eq(<<~EOM) + + puts "what" + EOM + + expect(source.lines.first.to_s).to_not eq("\n") + expect(out.lines.first.to_s).to eq("\n") + end + + it "trailing slash: does not join trailing do" do + # Some keywords and syntaxes trigger the "ignored line" + # lex output, we ignore them by filtering by BEG + # + # The `do` keyword is one of these: + # https://gist.github.com/schneems/6a7d7f988d3329fb3bd4b5be3e2efc0c + source = <<~EOM + foo do + puts "lol" + end + EOM + + doc = CleanDocument.new(source: source).join_consecutive! + + expect(doc.lines[0].to_s).to eq(source.lines[0]) + expect(doc.lines[1].to_s).to eq(source.lines[1]) + expect(doc.lines[2].to_s).to eq(source.lines[2]) + end + + it "trailing slash: formats output" do + source = <<~'EOM' + context "timezones workaround" do + it "should receive a time in UTC format and return the time with the"\ + "office's UTC offset substracted from it" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + office = build(:office) + end + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: code_lines.select(&:visible?) + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 context "timezones workaround" do + 2 it "should receive a time in UTC format and return the time with the"\ + 3 "office's UTC offset substracted from it" do + 4 travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + 5 office = build(:office) + 6 end + 7 end + 8 end + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: code_lines.select(&:visible?), + highlight_lines: code_lines[1] + ).call + ).to eq(<<~'EOM') + 1 context "timezones workaround" do + ❯ 2 it "should receive a time in UTC format and return the time with the"\ + ❯ 3 "office's UTC offset substracted from it" do + 4 travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + 5 office = build(:office) + 6 end + 7 end + 8 end + EOM + end + + it "trailing slash: basic detection" do + source = <<~'EOM' + it "trailing s" \ + "lash" do + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + expect(code_lines[0]).to_not be_hidden + expect(code_lines[1]).to be_hidden + + expect( + code_lines.join + ).to eq(code_lines.map(&:original).join) + end + + it "trailing slash: joins multiple lines" do + source = <<~'EOM' + it "should " \ + "keep " \ + "going " do + end + EOM + + doc = CleanDocument.new(source: source).join_trailing_slash! + expect(doc.lines[0].to_s).to eq(source.lines[0..2].join) + expect(doc.lines[1].to_s).to eq("") + expect(doc.lines[2].to_s).to eq("") + expect(doc.lines[3].to_s).to eq(source.lines[3]) + + lines = doc.lines + expect( + DisplayCodeWithLineNumbers.new( + lines: lines + ).call + ).to eq(<<~'EOM'.indent(2)) + 1 it "should " \ + 2 "keep " \ + 3 "going " do + 4 end + EOM + + expect( + DisplayCodeWithLineNumbers.new( + lines: lines, + highlight_lines: lines[0] + ).call + ).to eq(<<~'EOM') + ❯ 1 it "should " \ + ❯ 2 "keep " \ + ❯ 3 "going " do + 4 end + EOM + end + + it "trailing slash: no false positives" do + source = <<~'EOM' + def formatters + @formatters ||= { + amazing_print: ->(obj) { obj.ai + "\n" }, + inspect: ->(obj) { obj.inspect + "\n" }, + json: ->(obj) { obj.to_json }, + marshal: ->(obj) { Marshal.dump(obj) }, + none: ->(_obj) { nil }, + pretty_json: ->(obj) { JSON.pretty_generate(obj) }, + pretty_print: ->(obj) { obj.pretty_inspect }, + puts: ->(obj) { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string }, + to_s: ->(obj) { obj.to_s + "\n" }, + yaml: ->(obj) { obj.to_yaml }, + } + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + expect(code_lines.join).to eq(code_lines.join) + end + end +end diff --git a/spec/syntax_suggest/unit/cli_spec.rb b/spec/syntax_suggest/unit/cli_spec.rb new file mode 100644 index 00000000000000..fecf3e304c9361 --- /dev/null +++ b/spec/syntax_suggest/unit/cli_spec.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + class FakeExit + def initialize + @called = false + @value = nil + end + + def exit(value = nil) + @called = true + @value = value + end + + def called? + @called + end + + attr_reader :value + end + + RSpec.describe Cli do + it "parses valid code" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + file = dir.join("script.rb") + file.write("puts 'lol'") + + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [file.to_s], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(0) + expect(io.string.strip).to eq("Syntax OK") + end + end + + it "parses invalid code" do + file = fixtures_dir.join("this_project_extra_def.rb.txt") + + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [file.to_s], + exit_obj: exit_obj + ).call + + out = io.string + debug_display(out) + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(out.strip).to include("❯ 36 def filename") + end + + it "parses valid code with flags" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + file = dir.join("script.rb") + file.write("puts 'lol'") + + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal", file.to_s], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(0) + expect(cli.options[:terminal]).to be_truthy + expect(io.string.strip).to eq("Syntax OK") + end + end + + it "errors when no file given" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal"], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(io.string.strip).to eq("No file given") + end + + it "errors when file does not exist" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["lol-i-d-o-not-ex-ist-yololo.txtblerglol"], + exit_obj: exit_obj + ) + cli.call + + expect(exit_obj.called?).to be_truthy + expect(exit_obj.value).to eq(1) + expect(io.string.strip).to include("file not found:") + end + + # We cannot execute the parser here + # because it calls `exit` and it will exit + # our tests, however we can assert that the + # parser has the right value for version + it "-v version" do + io = StringIO.new + exit_obj = FakeExit.new + parser = Cli.new( + io: io, + argv: ["-v"], + exit_obj: exit_obj + ).parser + + expect(parser.version).to include(SyntaxSuggest::VERSION.to_s) + end + + it "SYNTAX_SUGGEST_RECORD_DIR" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: [], + env: {"SYNTAX_SUGGEST_RECORD_DIR" => "hahaha"}, + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:record_dir]).to eq("hahaha") + end + + it "--record-dir=" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--record=lol"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:record_dir]).to eq("lol") + end + + it "terminal default to respecting TTY" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: [], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to eq(SyntaxSuggest::DEFAULT_VALUE) + end + + it "--terminal" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--terminal"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to be_truthy + end + + it "--no-terminal" do + io = StringIO.new + exit_obj = FakeExit.new + cli = Cli.new( + io: io, + argv: ["--no-terminal"], + exit_obj: exit_obj + ).parse + + expect(exit_obj.called?).to be_falsey + expect(cli.options[:terminal]).to be_falsey + end + + it "--help outputs help" do + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: ["--help"], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(io.string).to include("Usage: syntax_suggest [options]") + end + + it " outputs help" do + io = StringIO.new + exit_obj = FakeExit.new + Cli.new( + io: io, + argv: [], + exit_obj: exit_obj + ).call + + expect(exit_obj.called?).to be_truthy + expect(io.string).to include("Usage: syntax_suggest [options]") + end + end +end diff --git a/spec/syntax_suggest/unit/code_block_spec.rb b/spec/syntax_suggest/unit/code_block_spec.rb new file mode 100644 index 00000000000000..3ab2751b271597 --- /dev/null +++ b/spec/syntax_suggest/unit/code_block_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeBlock do + it "can detect if it's valid or not" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.valid?).to be_truthy + end + + it "can be sorted in indentation order" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block_0 = CodeBlock.new(lines: code_lines[0]) + block_1 = CodeBlock.new(lines: code_lines[1]) + block_2 = CodeBlock.new(lines: code_lines[2]) + + expect(block_0 <=> block_0.dup).to eq(0) + expect(block_1 <=> block_0).to eq(1) + expect(block_1 <=> block_2).to eq(-1) + + array = [block_2, block_1, block_0].sort + expect(array.last).to eq(block_2) + + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, lex: [])) + array.prepend(block) + expect(array.max).to eq(block) + end + + it "knows it's current indentation level" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.current_indent).to eq(2) + + block = CodeBlock.new(lines: code_lines[0]) + expect(block.current_indent).to eq(0) + end + + it "knows it's current indentation level when mismatched indents" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: [code_lines[1], code_lines[2]]) + expect(block.current_indent).to eq(1) + end + + it "before lines and after lines" do + code_lines = code_line_array(<<~EOM) + def foo + bar; end + end + EOM + + block = CodeBlock.new(lines: code_lines[1]) + expect(block.valid?).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/code_frontier_spec.rb b/spec/syntax_suggest/unit/code_frontier_spec.rb new file mode 100644 index 00000000000000..c9aba7c8d80a42 --- /dev/null +++ b/spec/syntax_suggest/unit/code_frontier_spec.rb @@ -0,0 +1,135 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeFrontier do + it "detect_bad_blocks" do + code_lines = code_line_array(<<~EOM) + describe "lol" do + end + end + + it "lol" do + end + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + blocks = [] + blocks << CodeBlock.new(lines: code_lines[1]) + blocks << CodeBlock.new(lines: code_lines[5]) + blocks.each do |b| + frontier << b + end + + expect(frontier.detect_invalid_blocks.sort).to eq(blocks.sort) + end + + it "self.combination" do + expect( + CodeFrontier.combination([:a, :b, :c, :d]) + ).to eq( + [ + [:a], [:b], [:c], [:d], + [:a, :b], + [:a, :c], + [:a, :d], + [:b, :c], + [:b, :d], + [:c, :d], + [:a, :b, :c], + [:a, :b, :d], + [:a, :c, :d], + [:b, :c, :d], + [:a, :b, :c, :d] + ] + ) + end + + it "doesn't duplicate blocks" do + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + puts "lol" + puts "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) + + frontier << CodeBlock.new(lines: [code_lines[1], code_lines[2], code_lines[3]]) + # expect(frontier.count).to eq(1) + expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + puts "lol" + puts "lol" + EOM + + expect(frontier.pop).to be_nil + + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + puts "lol" + puts "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) + + frontier << CodeBlock.new(lines: [code_lines[3]]) + expect(frontier.count).to eq(2) + expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + EOM + end + + it "detects if multiple syntax errors are found" do + code_lines = code_line_array(<<~EOM) + def foo + end + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + + frontier << CodeBlock.new(lines: code_lines[1]) + block = frontier.pop + expect(block.to_s).to eq(<<~EOM.indent(2)) + end + EOM + frontier << block + + expect(frontier.holds_all_syntax_errors?).to be_truthy + end + + it "detects if it has not captured all syntax errors" do + code_lines = code_line_array(<<~EOM) + def foo + puts "lol" + end + + describe "lol" + end + + it "lol" + end + EOM + + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[1]]) + block = frontier.pop + expect(block.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + EOM + frontier << block + + expect(frontier.holds_all_syntax_errors?).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/code_line_spec.rb b/spec/syntax_suggest/unit/code_line_spec.rb new file mode 100644 index 00000000000000..cc4fa48bc9efea --- /dev/null +++ b/spec/syntax_suggest/unit/code_line_spec.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeLine do + it "bug in keyword detection" do + lines = CodeLine.from_source(<<~'EOM') + def to_json(*opts) + { + type: :module, + }.to_json(*opts) + end + EOM + expect(lines.count(&:is_kw?)).to eq(1) + expect(lines.count(&:is_end?)).to eq(1) + end + + it "supports endless method definitions" do + skip("Unsupported ruby version") unless Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3") + + line = CodeLine.from_source(<<~'EOM').first + def square(x) = x * x + EOM + + expect(line.is_kw?).to be_falsey + expect(line.is_end?).to be_falsey + end + + it "retains original line value, after being marked invisible" do + line = CodeLine.from_source(<<~'EOM').first + puts "lol" + EOM + expect(line.line).to match('puts "lol"') + line.mark_invisible + expect(line.line).to eq("") + expect(line.original).to match('puts "lol"') + end + + it "knows which lines can be joined" do + code_lines = CodeLine.from_source(<<~'EOM') + user = User. + where(name: 'schneems'). + first + puts user.name + EOM + + # Indicates line 1 can join 2, 2 can join 3, but 3 won't join it's next line + expect(code_lines.map(&:ignore_newline_not_beg?)).to eq([true, true, false, false]) + end + it "trailing if" do + code_lines = CodeLine.from_source(<<~'EOM') + puts "lol" if foo + if foo + end + EOM + + expect(code_lines.map(&:is_kw?)).to eq([false, true, false]) + end + + it "trailing unless" do + code_lines = CodeLine.from_source(<<~'EOM') + puts "lol" unless foo + unless foo + end + EOM + + expect(code_lines.map(&:is_kw?)).to eq([false, true, false]) + end + + it "trailing slash" do + code_lines = CodeLine.from_source(<<~'EOM') + it "trailing s" \ + "lash" do + EOM + + expect(code_lines.map(&:trailing_slash?)).to eq([true, false]) + + code_lines = CodeLine.from_source(<<~'EOM') + amazing_print: ->(obj) { obj.ai + "\n" }, + EOM + expect(code_lines.map(&:trailing_slash?)).to eq([false]) + end + + it "knows it's got an end" do + line = CodeLine.from_source(" end").first + + expect(line.is_end?).to be_truthy + expect(line.is_kw?).to be_falsey + end + + it "knows it's got a keyword" do + line = CodeLine.from_source(" if").first + + expect(line.is_end?).to be_falsey + expect(line.is_kw?).to be_truthy + end + + it "ignores marked lines" do + code_lines = CodeLine.from_source(<<~EOM) + def foo + Array(value) |x| + end + end + EOM + + expect(SyntaxSuggest.valid?(code_lines)).to be_falsey + expect(code_lines.join).to eq(<<~EOM) + def foo + Array(value) |x| + end + end + EOM + + expect(code_lines[0].visible?).to be_truthy + expect(code_lines[3].visible?).to be_truthy + + code_lines[0].mark_invisible + code_lines[3].mark_invisible + + expect(code_lines[0].visible?).to be_falsey + expect(code_lines[3].visible?).to be_falsey + + expect(code_lines.join).to eq(<<~EOM.indent(2)) + Array(value) |x| + end + EOM + expect(SyntaxSuggest.valid?(code_lines)).to be_falsey + end + + it "knows empty lines" do + code_lines = CodeLine.from_source(<<~EOM) + # Not empty + + # Not empty + EOM + + expect(code_lines.map(&:empty?)).to eq([false, true, false]) + expect(code_lines.map(&:not_empty?)).to eq([true, false, true]) + expect(code_lines.map { |l| SyntaxSuggest.valid?(l) }).to eq([true, true, true]) + end + + it "counts indentations" do + code_lines = CodeLine.from_source(<<~EOM) + def foo + Array(value) |x| + puts 'lol' + end + end + EOM + + expect(code_lines.map(&:indent)).to eq([0, 2, 4, 2, 0]) + end + + it "doesn't count empty lines as having an indentation" do + code_lines = CodeLine.from_source(<<~EOM) + + + EOM + + expect(code_lines.map(&:indent)).to eq([0, 0]) + end + end +end diff --git a/spec/syntax_suggest/unit/code_search_spec.rb b/spec/syntax_suggest/unit/code_search_spec.rb new file mode 100644 index 00000000000000..b62b2c0a3c9b03 --- /dev/null +++ b/spec/syntax_suggest/unit/code_search_spec.rb @@ -0,0 +1,505 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe CodeSearch do + it "rexe regression" do + lines = fixtures_dir.join("rexe.rb.txt").read.lines + lines.delete_at(148 - 1) + source = lines.join + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq(<<~'EOM'.strip) + class Lookups + EOM + end + + it "squished do regression" do + source = <<~'EOM' + def call + trydo + + @options = CommandLineParser.new.parse + + options.requires.each { |r| require!(r) } + load_global_config_if_exists + options.loads.each { |file| load(file) } + + @user_source_code = ARGV.join(' ') + @user_source_code = 'self' if @user_source_code == '' + + @callable = create_callable + + init_rexe_context + init_parser_and_formatters + + # This is where the user's source code will be executed; the action will in turn call `execute`. + lookup_action(options.input_mode).call unless options.noop + + output_log_entry + end # one + end # two + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + trydo + end # one + EOM + end + + it "regression test ambiguous end" do + source = <<~'EOM' + def call # 0 + print "lol" # 1 + end # one # 2 + end # two # 3 + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + end # two # 3 + EOM + end + + it "regression dog test" do + source = <<~'EOM' + class Dog + def bark + puts "woof" + end + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + class Dog + EOM + expect(search.invalid_blocks.first.lines.length).to eq(4) + end + + it "handles mismatched |" do + source = <<~EOM + class Blerg + Foo.call do |a + end # one + + puts lol + class Foo + end # two + end # three + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call do |a + end # one + EOM + end + + it "handles mismatched }" do + source = <<~EOM + class Blerg + Foo.call do { + + puts lol + class Foo + end # two + end # three + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call do { + EOM + end + + it "handles no spaces between blocks and trailing slash" do + source = <<~'EOM' + require "rails_helper" + RSpec.describe Foo, type: :model do + describe "#bar" do + context "context" do + it "foos the bar with a foo and then bazes the foo with a bar to"\ + "fooify the barred bar" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + foo = build(:foo) + end + end + end + end + describe "#baz?" do + context "baz has barred the foo" do + it "returns true" do # <== HERE + end + end + end + EOM + + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq('it "returns true" do # <== HERE') + end + + it "handles no spaces between blocks" do + source = <<~'EOM' + context "foo bar" do + it "bars the foo" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + end + end + end + context "test" do + it "should" do + end + EOM + search = CodeSearch.new(source) + search.call + + expect(search.invalid_blocks.join.strip).to eq('it "should" do') + end + + it "records debugging steps to a directory" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + search = CodeSearch.new(<<~'EOM', record_dir: dir) + class OH + def hello + def hai + end + end + EOM + search.call + + expect(search.record_dir.entries.map(&:to_s)).to include("1-add-1-(3__4).txt") + expect(search.record_dir.join("1-add-1-(3__4).txt").read).to include(<<~EOM) + 1 class OH + 2 def hello + ❯ 3 def hai + ❯ 4 end + 5 end + EOM + end + end + + it "def with missing end" do + search = CodeSearch.new(<<~'EOM') + class OH + def hello + + def hai + puts "lol" + end + end + EOM + search.call + + expect(search.invalid_blocks.join.strip).to eq("def hello") + + search = CodeSearch.new(<<~'EOM') + class OH + def hello + + def hai + end + end + EOM + search.call + + expect(search.invalid_blocks.join.strip).to eq("def hello") + + search = CodeSearch.new(<<~'EOM') + class OH + def hello + def hai + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + def hello + EOM + end + + describe "real world cases" do + it "finds hanging def in this project" do + source_string = fixtures_dir.join("this_project_extra_def.rb.txt").read + search = CodeSearch.new(source_string) + search.call + + document = DisplayCodeWithLineNumbers.new( + lines: search.code_lines.select(&:visible?), + terminal: false, + highlight_lines: search.invalid_blocks.flat_map(&:lines) + ).call + + expect(document).to include(<<~'EOM') + ❯ 36 def filename + EOM + end + + it "Format Code blocks real world example" do + search = CodeSearch.new(<<~'EOM') + require 'rails_helper' + + RSpec.describe AclassNameHere, type: :worker do + describe "thing" do + context "when" do + let(:thing) { stuff } + let(:another_thing) { moarstuff } + subject { foo.new.perform(foo.id, true) } + + it "stuff" do + subject + + expect(foo.foo.foo).to eq(true) + end + end + end # line 16 accidental end, but valid block + + context "stuff" do + let(:thing) { create(:foo, foo: stuff) } + let(:another_thing) { create(:stuff) } + + subject { described_class.new.perform(foo.id, false) } + + it "more stuff" do + subject + + expect(foo.foo.foo).to eq(false) + end + end + end # mismatched due to 16 + end + EOM + search.call + + document = DisplayCodeWithLineNumbers.new( + lines: search.code_lines.select(&:visible?), + terminal: false, + highlight_lines: search.invalid_blocks.flat_map(&:lines) + ).call + + expect(document).to include(<<~'EOM') + 1 require 'rails_helper' + 2 + 3 RSpec.describe AclassNameHere, type: :worker do + ❯ 4 describe "thing" do + ❯ 16 end # line 16 accidental end, but valid block + ❯ 30 end # mismatched due to 16 + 31 end + EOM + end + end + + # For code that's not perfectly formatted, we ideally want to do our best + # These examples represent the results that exist today, but I would like to improve upon them + describe "needs improvement" do + describe "mis-matched-indentation" do + it "extra space before end" do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end # two + EOM + end + + it "stacked ends 2" do + search = CodeSearch.new(<<~'EOM') + def cat + blerg + end + + Foo.call do + end # one + end # two + + def dog + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call do + end # one + end # two + + EOM + end + + it "stacked ends " do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + + it "missing space before end" do + search = CodeSearch.new(<<~'EOM') + Foo.call + + def foo + puts "lol" + puts "lol" + end + end + EOM + search.call + + # expand-1 and expand-2 seem to be broken? + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + end + end + + it "returns syntax error in outer block without inner block" do + search = CodeSearch.new(<<~'EOM') + Foo.call + def foo + puts "lol" + puts "lol" + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end # two + EOM + end + + it "doesn't just return an empty `end`" do + search = CodeSearch.new(<<~'EOM') + Foo.call + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + Foo.call + end + EOM + end + + it "finds multiple syntax errors" do + search = CodeSearch.new(<<~'EOM') + describe "hi" do + Foo.call + end + end + + it "blerg" do + Bar.call + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + Foo.call + end + Bar.call + end + EOM + end + + it "finds a typo def" do + search = CodeSearch.new(<<~'EOM') + defzfoo + puts "lol" + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM') + defzfoo + end + EOM + end + + it "finds a mis-matched def" do + search = CodeSearch.new(<<~'EOM') + def foo + def blerg + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + def blerg + EOM + end + + it "finds a naked end" do + search = CodeSearch.new(<<~'EOM') + def foo + end # one + end # two + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~'EOM'.indent(2)) + end # one + EOM + end + + it "returns when no invalid blocks are found" do + search = CodeSearch.new(<<~'EOM') + def foo + puts 'lol' + end + EOM + search.call + + expect(search.invalid_blocks).to eq([]) + end + + it "expands frontier by eliminating valid lines" do + search = CodeSearch.new(<<~'EOM') + def foo + puts 'lol' + end + EOM + search.create_blocks_from_untracked_lines + + expect(search.code_lines.join).to eq(<<~'EOM') + def foo + end + EOM + end + end +end diff --git a/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb b/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb new file mode 100644 index 00000000000000..c696132782cb26 --- /dev/null +++ b/spec/syntax_suggest/unit/display_invalid_blocks_spec.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe DisplayInvalidBlocks do + it "works with valid code" do + syntax_string = <<~EOM + class OH + def hello + end + def hai + end + end + EOM + + search = CodeSearch.new(syntax_string) + search.call + + io = StringIO.new + display = DisplayInvalidBlocks.new( + io: io, + blocks: search.invalid_blocks, + terminal: false, + code_lines: search.code_lines + ) + display.call + expect(io.string).to include("Syntax OK") + end + + it "selectively prints to terminal if input is a tty by default" do + source = <<~EOM + class OH + def hello + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + io = StringIO.new + def io.isatty + true + end + + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + code_lines: code_lines + ) + display.call + expect(io.string).to include([ + "❯ 2 ", + DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT, + " def hello" + ].join) + + io = StringIO.new + def io.isatty + false + end + + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + code_lines: code_lines + ) + display.call + expect(io.string).to include("❯ 2 def hello") + end + + it "outputs to io when using `call`" do + source = <<~EOM + class OH + def hello + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + + io = StringIO.new + block = CodeBlock.new(lines: code_lines[1]) + display = DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ) + display.call + expect(io.string).to include("❯ 2 def hello") + end + + it " wraps code with github style codeblocks" do + source = <<~EOM + class OH + def hello + + def hai + end + end + EOM + + code_lines = CleanDocument.new(source: source).call.lines + block = CodeBlock.new(lines: code_lines[1]) + io = StringIO.new + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ).call + expect(io.string).to include(<<~EOM) + 1 class OH + ❯ 2 def hello + 4 def hai + 5 end + 6 end + EOM + end + + it "shows terminal characters" do + code_lines = code_line_array(<<~EOM) + class OH + def hello + def hai + end + end + EOM + + io = StringIO.new + block = CodeBlock.new(lines: code_lines[1]) + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: false, + code_lines: code_lines + ).call + + expect(io.string).to include([ + " 1 class OH", + "❯ 2 def hello", + " 4 end", + " 5 end", + "" + ].join($/)) + + block = CodeBlock.new(lines: code_lines[1]) + io = StringIO.new + DisplayInvalidBlocks.new( + io: io, + blocks: block, + terminal: true, + code_lines: code_lines + ).call + + expect(io.string).to include( + [ + " 1 class OH", + ["❯ 2 ", DisplayCodeWithLineNumbers::TERMINAL_HIGHLIGHT, " def hello"].join, + " 4 end", + " 5 end", + "" + ].join($/ + DisplayCodeWithLineNumbers::TERMINAL_END) + ) + end + end +end diff --git a/spec/syntax_suggest/unit/explain_syntax_spec.rb b/spec/syntax_suggest/unit/explain_syntax_spec.rb new file mode 100644 index 00000000000000..394981dcf662cd --- /dev/null +++ b/spec/syntax_suggest/unit/explain_syntax_spec.rb @@ -0,0 +1,255 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "ExplainSyntax" do + it "handles shorthand syntaxes with non-bracket characters" do + source = <<~EOM + %Q* lol + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + expect(explain.errors.join).to include("unterminated string") + end + + it "handles %w[]" do + source = <<~EOM + node.is_a?(Op) && %w[| ||].include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + + it "doesn't falsely identify strings or symbols as critical chars" do + source = <<~EOM + a = ['(', '{', '[', '|'] + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + + source = <<~EOM + a = [:'(', :'{', :'[', :'|'] + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + + it "finds missing |" do + source = <<~EOM + Foo.call do | + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["|"]) + expect(explain.errors).to eq([explain.why("|")]) + end + + it "finds missing {" do + source = <<~EOM + class Cat + lol = { + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["}"]) + expect(explain.errors).to eq([explain.why("}")]) + end + + it "finds missing }" do + source = <<~EOM + def foo + lol = "foo" => :bar } + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["{"]) + expect(explain.errors).to eq([explain.why("{")]) + end + + it "finds missing [" do + source = <<~EOM + class Cat + lol = [ + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["]"]) + expect(explain.errors).to eq([explain.why("]")]) + end + + it "finds missing ]" do + source = <<~EOM + def foo + lol = ] + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["["]) + expect(explain.errors).to eq([explain.why("[")]) + end + + it "finds missing (" do + source = "def initialize; ); end" + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["("]) + expect(explain.errors).to eq([explain.why("(")]) + end + + it "finds missing )" do + source = "def initialize; (; end" + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([")"]) + expect(explain.errors).to eq([explain.why(")")]) + end + + it "finds missing keyword" do + source = <<~EOM + class Cat + end + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["keyword"]) + expect(explain.errors).to eq([explain.why("keyword")]) + end + + it "finds missing end" do + source = <<~EOM + class Cat + def meow + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["end"]) + expect(explain.errors).to eq([explain.why("end")]) + end + + it "falls back to ripper on unknown errors" do + source = <<~EOM + class Cat + def meow + 1 * + end + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + expect(explain.errors).to eq(RipperErrors.new(source).call.errors) + end + + it "handles an unexpected rescue" do + source = <<~EOM + def foo + if bar + "baz" + else + "foo" + rescue FooBar + nil + end + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["end"]) + end + + # String embeds are `"#{foo} <-- here` + # + # We need to count a `#{` as a `{` + # otherwise it will report that we are + # missing a curly when we are using valid + # string embed syntax + it "is not confused by valid string embed" do + source = <<~'EOM' + foo = "#{hello}" + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + expect(explain.missing).to eq([]) + end + + # Missing string embed beginnings are not a + # syntax error. i.e. `"foo}"` or `"{foo}` or "#foo}" + # would just be strings with extra characters. + # + # However missing the end curly will trigger + # an error: i.e. `"#{foo` + # + # String embed beginning is a `#{` rather than + # a `{`, make sure we handle that case and + # report the correct missing `}` diagnosis + it "finds missing string embed end" do + source = <<~'EOM' + "#{foo + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq(["}"]) + end + end +end diff --git a/spec/syntax_suggest/unit/lex_all_spec.rb b/spec/syntax_suggest/unit/lex_all_spec.rb new file mode 100644 index 00000000000000..0c0df7cfaa297e --- /dev/null +++ b/spec/syntax_suggest/unit/lex_all_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "EndBlockParse" do + it "finds blocks based on `end` keyword" do + source = <<~EOM + describe "cat" # 1 + Cat.call do # 2 + end # 3 + end # 4 + # 5 + it "dog" do # 6 + Dog.call do # 7 + end # 8 + end # 9 + EOM + + # raw_lex = Ripper.lex(source) + # expect(raw_lex.to_s).to_not include("dog") + + lex = LexAll.new(source: source) + expect(lex.map(&:token).to_s).to include("dog") + expect(lex.first.line).to eq(1) + expect(lex.last.line).to eq(9) + end + end +end diff --git a/spec/syntax_suggest/unit/pathname_from_message_spec.rb b/spec/syntax_suggest/unit/pathname_from_message_spec.rb new file mode 100644 index 00000000000000..76756efda9df2d --- /dev/null +++ b/spec/syntax_suggest/unit/pathname_from_message_spec.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + RSpec.describe "PathnameFromMessage" do + it "handles filenames with colons in them" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + + file = dir.join("scr:atch.rb").tap { |p| FileUtils.touch(p) } + + message = "#{file}:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + file = PathnameFromMessage.new(message).call.name + + expect(file).to be_truthy + end + end + + it "checks if the file exists" do + Dir.mktmpdir do |dir| + dir = Pathname(dir) + + file = dir.join("scratch.rb") + # No touch, file does not exist + expect(file.exist?).to be_falsey + + message = "#{file}:2:in `require_relative': /private/tmp/bad.rb:1: syntax error, unexpected `end' (SyntaxError)" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to include(file.to_s) + expect(file).to be_falsey + end + end + + it "does not output error message on syntax error inside of an (eval)" do + message = "(eval):1: invalid multibyte char (UTF-8) (SyntaxError)\n" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to eq("") + expect(file).to be_falsey + end + + it "does not output error message on syntax error inside of streamed code" do + # An example of streamed code is: $ echo "def foo" | ruby + message = "-:1: syntax error, unexpected end-of-input\n" + io = StringIO.new + file = PathnameFromMessage.new(message, io: io).call.name + + expect(io.string).to eq("") + expect(file).to be_falsey + end + end +end diff --git a/spec/syntax_suggest/unit/priority_queue_spec.rb b/spec/syntax_suggest/unit/priority_queue_spec.rb new file mode 100644 index 00000000000000..17361833e509c4 --- /dev/null +++ b/spec/syntax_suggest/unit/priority_queue_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require_relative "../spec_helper" + +module SyntaxSuggest + class CurrentIndex + attr_reader :current_indent + + def initialize(value) + @current_indent = value + end + + def <=>(other) + @current_indent <=> other.current_indent + end + + def inspect + @current_indent + end + end + + RSpec.describe CodeFrontier do + it "works" do + q = PriorityQueue.new + q << 1 + q << 2 + expect(q.elements).to eq([2, 1]) + + q << 3 + expect(q.elements).to eq([3, 1, 2]) + + expect(q.pop).to eq(3) + expect(q.pop).to eq(2) + expect(q.pop).to eq(1) + expect(q.pop).to eq(nil) + + array = [] + q = PriorityQueue.new + array.reverse_each do |v| + q << v + end + expect(q.elements).to eq(array) + + array = [100, 36, 17, 19, 25, 0, 3, 1, 7, 2] + array.reverse_each do |v| + q << v + end + + expect(q.pop).to eq(100) + expect(q.elements).to eq([36, 25, 19, 17, 0, 1, 7, 2, 3]) + + # expected [36, 25, 19, 17, 0, 1, 7, 2, 3] + expect(q.pop).to eq(36) + expect(q.pop).to eq(25) + expect(q.pop).to eq(19) + expect(q.pop).to eq(17) + expect(q.pop).to eq(7) + expect(q.pop).to eq(3) + expect(q.pop).to eq(2) + expect(q.pop).to eq(1) + expect(q.pop).to eq(0) + expect(q.pop).to eq(nil) + end + + it "priority queue" do + frontier = PriorityQueue.new + frontier << CurrentIndex.new(0) + frontier << CurrentIndex.new(1) + + expect(frontier.sorted.map(&:current_indent)).to eq([0, 1]) + + frontier << CurrentIndex.new(1) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 1, 1]) + + frontier << CurrentIndex.new(0) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1]) + + frontier << CurrentIndex.new(10) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1, 10]) + + frontier << CurrentIndex.new(2) + expect(frontier.sorted.map(&:current_indent)).to eq([0, 0, 1, 1, 2, 10]) + + frontier = PriorityQueue.new + values = [18, 18, 0, 18, 0, 18, 18, 18, 18, 16, 18, 8, 18, 8, 8, 8, 16, 6, 0, 0, 16, 16, 4, 14, 14, 12, 12, 12, 10, 12, 12, 12, 12, 8, 10, 10, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 6, 6, 6, 6, 6, 6, 8, 10, 8, 8, 10, 8, 10, 8, 10, 8, 6, 8, 8, 6, 8, 6, 6, 8, 0, 8, 0, 0, 8, 8, 0, 8, 0, 8, 8, 0, 8, 8, 8, 0, 8, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 8, 6, 6, 6, 6, 8, 6, 8, 6, 6, 4, 4, 6, 6, 4, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 6, 6, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 6, 6, 2] + + values.each do |v| + value = CurrentIndex.new(v) + frontier << value # CurrentIndex.new(v) + end + + expect(frontier.sorted.map(&:current_indent)).to eq(values.sort) + end + end +end diff --git a/sprintf.c b/sprintf.c index 09b9bf15d2d60a..bfe25e1d3c8b05 100644 --- a/sprintf.c +++ b/sprintf.c @@ -221,7 +221,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) VALUE result; long scanned = 0; - int coderange = ENC_CODERANGE_7BIT; + enum ruby_coderange_type coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; @@ -246,10 +246,21 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) rb_raise(rb_eArgError, "flag after precision"); \ } +#define update_coderange(partial) do { \ + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen \ + && rb_enc_to_index(enc) /* != ENCINDEX_ASCII_8BIT */) { \ + int cr = coderange; \ + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); \ + ENC_CODERANGE_SET(result, \ + (partial && cr == ENC_CODERANGE_UNKNOWN ? \ + ENC_CODERANGE_BROKEN : (coderange = cr))); \ + } \ + } while (0) ++argc; --argv; StringValue(fmt); enc = rb_enc_get(fmt); + rb_must_asciicompat(fmt); orig = fmt; fmt = rb_str_tmp_frozen_acquire(fmt); p = RSTRING_PTR(fmt); @@ -272,10 +283,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) rb_raise(rb_eArgError, "incomplete format specifier; use %%%% (double %%) instead"); } PUSH(p, t - p); - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { - scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); - ENC_CODERANGE_SET(result, coderange); - } + update_coderange(FALSE); if (t >= end) { /* end of fmt string */ goto sprint_exit; @@ -441,33 +449,39 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { - if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { - rb_raise(rb_eArgError, "%%c requires a character"); - } - c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); - RB_GC_GUARD(tmp); - } - else { - c = NUM2INT(val); - n = rb_enc_codelen(c, enc); + flags |= FPREC; + prec = 1; + str = tmp; + goto format_s1; } + n = NUM2INT(val); + if (n >= 0) n = rb_enc_codelen((c = n), enc); if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } + int encidx = rb_ascii8bit_appendable_encoding_index(enc, c); + if (encidx >= 0 && encidx != rb_enc_to_index(enc)) { + /* special case */ + rb_enc_associate_index(result, encidx); + enc = rb_enc_from_index(encidx); + coderange = ENC_CODERANGE_VALID; + } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { - CHECK(n); + --width; + CHECK(n + (width > 0 ? width : 0)); rb_enc_mbcput(c, &buf[blen], enc); blen += n; - if (width > 1) FILL(' ', width-1); + if (width > 0) FILL_(' ', width); } else { - if (width > 1) FILL(' ', width-1); - CHECK(n); + --width; + CHECK(n + (width > 0 ? width : 0)); + if (width > 0) FILL_(' ', width); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } @@ -487,15 +501,10 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) else { str = rb_obj_as_string(arg); } + format_s1: len = RSTRING_LEN(str); rb_str_set_len(result, blen); - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { - int cr = coderange; - scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); - ENC_CODERANGE_SET(result, - (cr == ENC_CODERANGE_UNKNOWN ? - ENC_CODERANGE_BROKEN : (coderange = cr))); - } + update_coderange(TRUE); enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); @@ -511,16 +520,16 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; + CHECK(len + width); if (!(flags&FMINUS)) { - FILL(' ', width); + FILL_(' ', width); width = 0; } - CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { - FILL(' ', width); + FILL_(' ', width); } rb_enc_associate(result, enc); break; @@ -927,6 +936,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) flags = FNONE; } + update_coderange(FALSE); sprint_exit: rb_str_tmp_frozen_release(orig, fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. @@ -1145,36 +1155,54 @@ ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int s return cp; } -VALUE -rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +static void +ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) { rb_printf_buffer_extra buffer; #define f buffer.base - VALUE result; + VALUE klass = RBASIC(result)->klass; + int coderange = ENC_CODERANGE(result); + long scanned = 0; + + if (coderange != ENC_CODERANGE_UNKNOWN) scanned = p - RSTRING_PTR(result); f._flags = __SWR | __SSTR; f._bf._size = 0; - f._w = 120; - result = rb_str_buf_new(f._w); - if (enc) { - if (rb_enc_mbminlen(enc) > 1) { - /* the implementation deeply depends on plain char */ - rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", - rb_enc_name(enc)); - } - rb_enc_associate(result, enc); - } + f._w = rb_str_capacity(result); f._bf._base = (unsigned char *)result; - f._p = (unsigned char *)RSTRING_PTR(result); + f._p = (unsigned char *)p; RBASIC_CLEAR_CLASS(result); f.vwrite = ruby__sfvwrite; f.vextra = ruby__sfvextra; buffer.value = 0; BSD_vfprintf(&f, fmt, ap); - RBASIC_SET_CLASS_RAW(result, rb_cString); - rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); + RBASIC_SET_CLASS_RAW(result, klass); + p = RSTRING_PTR(result); + long blen = (char *)f._p - p; + if (scanned < blen) { + rb_str_coderange_scan_restartable(p + scanned, p + blen, rb_enc_get(result), &coderange); + ENC_CODERANGE_SET(result, coderange); + } + rb_str_resize(result, blen); #undef f +} + +VALUE +rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +{ + const int initial_len = 120; + VALUE result; + result = rb_str_buf_new(initial_len); + if (enc) { + if (rb_enc_mbminlen(enc) > 1) { + /* the implementation deeply depends on plain char */ + rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", + rb_enc_name(enc)); + } + rb_enc_associate(result, enc); + } + ruby_vsprintf0(result, RSTRING_PTR(result), fmt, ap); return result; } @@ -1213,26 +1241,9 @@ rb_sprintf(const char *format, ...) VALUE rb_str_vcatf(VALUE str, const char *fmt, va_list ap) { - rb_printf_buffer_extra buffer; -#define f buffer.base - VALUE klass; - StringValue(str); rb_str_modify(str); - f._flags = __SWR | __SSTR; - f._bf._size = 0; - f._w = rb_str_capacity(str); - f._bf._base = (unsigned char *)str; - f._p = (unsigned char *)RSTRING_END(str); - klass = RBASIC(str)->klass; - RBASIC_CLEAR_CLASS(str); - f.vwrite = ruby__sfvwrite; - f.vextra = ruby__sfvextra; - buffer.value = 0; - BSD_vfprintf(&f, fmt, ap); - RBASIC_SET_CLASS_RAW(str, klass); - rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); -#undef f + ruby_vsprintf0(str, RSTRING_END(str), fmt, ap); return str; } diff --git a/string.c b/string.c index 538104e67e0624..8ad5863094d750 100644 --- a/string.c +++ b/string.c @@ -2502,7 +2502,6 @@ rb_str_modify_expand(VALUE str, long expand) else if (expand > 0) { RESIZE_CAPA_TERM(str, len + expand, termlen); } - ENC_CODERANGE_CLEAR(str); } /* As rb_str_modify(), but don't clear coderange */ @@ -2531,6 +2530,9 @@ void rb_must_asciicompat(VALUE str) { rb_encoding *enc = rb_enc_get(str); + if (!enc) { + rb_raise(rb_eTypeError, "not encoding capable object"); + } if (!rb_enc_asciicompat(enc)) { rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); } @@ -3077,16 +3079,16 @@ rb_str_set_len(VALUE str, long len) VALUE rb_str_resize(VALUE str, long len) { - long slen; - int independent; - if (len < 0) { rb_raise(rb_eArgError, "negative string size (or size too big)"); } - independent = str_independent(str); - ENC_CODERANGE_CLEAR(str); - slen = RSTRING_LEN(str); + int independent = str_independent(str); + long slen = RSTRING_LEN(str); + + if (slen > len && ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { + ENC_CODERANGE_CLEAR(str); + } { long capa; @@ -3483,17 +3485,13 @@ rb_str_concat(VALUE str1, VALUE str2) return rb_str_append(str1, str2); } - encidx = rb_enc_to_index(enc); - if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) { - /* US-ASCII automatically extended to ASCII-8BIT */ + encidx = rb_ascii8bit_appendable_encoding_index(enc, code); + if (encidx >= 0) { char buf[1]; buf[0] = (char)code; - if (code > 0xFF) { - rb_raise(rb_eRangeError, "%u out of char range", code); - } rb_str_cat(str1, buf, 1); - if (encidx == ENCINDEX_US_ASCII && code > 127) { - rb_enc_associate_index(str1, ENCINDEX_ASCII_8BIT); + if (encidx != rb_enc_to_index(enc)) { + rb_enc_associate_index(str1, encidx); ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID); } } @@ -3526,6 +3524,26 @@ rb_str_concat(VALUE str1, VALUE str2) return str1; } +int +rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code) +{ + int encidx = rb_enc_to_index(enc); + + if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) { + /* US-ASCII automatically extended to ASCII-8BIT */ + if (code > 0xFF) { + rb_raise(rb_eRangeError, "%u out of char range", code); + } + if (encidx == ENCINDEX_US_ASCII && code > 127) { + return ENCINDEX_ASCII_8BIT; + } + return encidx; + } + else { + return -1; + } +} + /* * call-seq: * prepend(*other_strings) -> string @@ -5439,7 +5457,7 @@ rb_str_aset(VALUE str, VALUE indx, VALUE val) * string[index] = new_string * string[start, length] = new_string * string[range] = new_string - * string[regexp, capture = 0) = new_string + * string[regexp, capture = 0] = new_string * string[substring] = new_string * * Replaces all, some, or none of the contents of +self+; returns +new_string+. @@ -6781,7 +6799,15 @@ rb_str_inspect(VALUE str) prev = p; continue; } - if ((enc == resenc && rb_enc_isprint(c, enc)) || + /* The special casing of 0x85 (NEXT_LINE) here is because + * Oniguruma historically treats it as printable, but it + * doesn't match the print POSIX bracket class or character + * property in regexps. + * + * See Ruby Bug #16842 for details: + * https://bugs.ruby-lang.org/issues/16842 + */ + if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) || (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) { continue; } diff --git a/template/Makefile.in b/template/Makefile.in index 63c46cec20f2b6..0437ac0005998e 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -89,7 +89,9 @@ optflags = @optflags@ debugflags = @debugflags@ warnflags = @warnflags@ @strict_warnflags@ cppflags = @cppflags@ -XCFLAGS = @XCFLAGS@ $(INCFLAGS) +RUBY_DEVEL = @RUBY_DEVEL@ # "yes" or empty +_RUBY_DEVEL_enabled = $(RUBY_DEVEL:no=) +XCFLAGS = @XCFLAGS@ $(INCFLAGS) $(_RUBY_DEVEL_enabled:yes=-DRUBY_DEVEL=1) USE_RUBYGEMS = @USE_RUBYGEMS@ USE_RUBYGEMS_ = $(USE_RUBYGEMS:yes=) CPPFLAGS = @CPPFLAGS@ $(USE_RUBYGEMS_:no=-DDISABLE_RUBYGEMS=1) @@ -395,12 +397,13 @@ uncommon.mk: $(srcdir)/common.mk reconfig-args = $(srcdir)/$(CONFIGURE) $(yes_silence:yes=--silent) $(configure_args) config.status-args = ./config.status $(yes_silence:yes=--silent) --recheck reconfig-exec-0 = test -t 1 && { : $${CONFIGURE_TTY=yes}; export CONFIGURE_TTY; }; exec 3>&1; exit `exec 4>&1; { "$$@" 3>&- 4>&-; echo $$? 1>&4; } | fgrep -v '(cached)' 1>&3 3>&- 4>&-` -reconfig-exec-1 = set -x; "$$@" +reconfig-exec-1 = set -x; exec "$$@" +reconfig-exec-yes = $(reconfig-exec-1) reconfig config.status: $(srcdir)/$(CONFIGURE) $(srcdir)/enc/Makefile.in \ $(hdrdir)/ruby/version.h $(ABI_VERSION_HDR) @PWD= MINIRUBY="$(MINIRUBY)"; export MINIRUBY; \ - set $(SHELL) $($@-args); $(reconfig-exec-$(V)) + set $(SHELL) $($@-args); $(reconfig-exec-$(silence:no=$(V))) $(srcdir)/$(CONFIGURE): $(srcdir)/configure.ac $(CHDIR) $(srcdir) && exec $(AUTOCONF) -o $(@F) @@ -530,12 +533,16 @@ ext/clean.mk ext/distclean.mk ext/realclean.mk:: ext/clean:: ext/clean.sub ext/distclean:: ext/distclean.sub ext/realclean:: ext/realclean.sub -gems/clean:: gems/clean.sub -gems/distclean:: gems/distclean.sub -gems/realclean:: gems/realclean.sub +.bundle/clean:: .bundle/clean.sub +.bundle/distclean:: .bundle/distclean.sub +.bundle/realclean:: .bundle/realclean.sub + +ext/clean.sub .bundle/clean.sub:: ext/clean.mk +ext/distclean.sub .bundle/distclean.sub:: ext/distclean.mk +ext/realclean.sub .bundle/realclean.sub:: ext/realclean.mk ext/clean.sub ext/distclean.sub ext/realclean.sub \ -gems/clean.sub gems/distclean.sub gems/realclean.sub:: +.bundle/clean.sub .bundle/distclean.sub .bundle/realclean.sub:: $(Q) set dummy `echo "${EXTS}" | tr , ' '`; shift; \ test "$$#" = 0 && set .; \ set dummy `\ @@ -551,7 +558,7 @@ gems/clean.sub gems/distclean.sub gems/realclean.sub:: fi; \ done || true -ext/distclean ext/realclean gems/distclean gems/realclean:: +ext/distclean ext/realclean .bundle/distclean .bundle/realclean:: $(Q) set dummy `echo "${EXTS}" | tr , ' '`; shift; \ test "$$#" = 0 && set .; \ cd $(@D) 2>/dev/null && \ diff --git a/template/exts.mk.tmpl b/template/exts.mk.tmpl index c5f8478d76d79c..5595a08da1d1dd 100644 --- a/template/exts.mk.tmpl +++ b/template/exts.mk.tmpl @@ -154,7 +154,7 @@ ext/extinit.<%=objext%>: % end $(Q)<%= submake %><%=mflags%> V=$(V) $(@F) % if /^(dist|real)clean$/ =~ tgt - $(Q)$(RM) $(@D)/exts.mk + $(Q)$(RM) <%=t[%r[\A(?:\.[^/]+/)?(?:[^/]+/){2}]]%>exts.mk $(Q)$(RMDIRS) $(@D) % end % end diff --git a/template/fake.rb.in b/template/fake.rb.in index 9417f3ad77e3b9..df19ffd989a131 100644 --- a/template/fake.rb.in +++ b/template/fake.rb.in @@ -10,7 +10,9 @@ end if inc = arg['i'] src = inc == '-' ? STDIN.read : File.read(inc) def src.value(name) - eval(self[/\bruby_#{name}(?:\[\])?\s*=\s*((?:"(?:\\.|[^\"\\])*"\s*)*(?=;)|[^{};]+)/m, 1].gsub(/#/, '\\#')) + eval(self[/\bruby_#{name}(?:\[\])?\s*=\s*((?:"(?:\\.|[^\"\\])*"\s*)*(?=;)|[^{};]+)/m, 1]. + gsub(/#/, '\\#'). + gsub(/((?:\G|[^\\])(?:\\\\)*)\n/, '\1')) end arg['versions'] = version = {} File.read(File.join(arg['srcdir'], 'version.c')). diff --git a/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb b/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb new file mode 100644 index 00000000000000..4320c1f20dc233 --- /dev/null +++ b/test/-ext-/arith_seq/test_arith_seq_beg_len_step.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: false +require 'test/unit' + +class Test_ArithSeq < Test::Unit::TestCase + def test_beg_len_step + assert_separately([], <<-"end;") #do + require '-test-/arith_seq/beg_len_step' + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 0, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 1, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__([1, 2, 3], 3, 0) + assert_equal(false, r) + + r, = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 0, 0) + assert_equal(nil, r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 1, 0) + assert_equal([true, 1, 0, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 2, 0) + assert_equal([true, 1, 1, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 3, 0) + assert_equal([true, 1, 2, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 4, 0) + assert_equal([true, 1, 3, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 5, 0) + assert_equal([true, 1, 3, 1], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(2), 24, 0) + assert_equal([true, 14, 0, 2], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(3), 24, 0) + assert_equal([true, 14, 0, 3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(3), 22, 0) + assert_equal([true, 12, 0, 3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__((-10..10).step(-3), 22, 0) + assert_equal([true, 10, 3, -3], r) + + r = Enumerator::ArithmeticSequence.__beg_len_step__(1..3, 0, 1) + assert_equal([true, 1, 3, 1], r) + end; + end +end diff --git a/test/-ext-/econv/test_append.rb b/test/-ext-/econv/test_append.rb new file mode 100644 index 00000000000000..f8c1d2add690a2 --- /dev/null +++ b/test/-ext-/econv/test_append.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: false +require 'test/unit' +require "-test-/econv" + +class Test_EConvAppend < Test::Unit::TestCase + def test_econv_str_append_valid + ec = Bug::EConv.new("utf-8", "cp932") + dst = "\u3044".encode("cp932") + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_predicate(dst, :valid_encoding?) + end + + def test_econv_str_append_broken + ec = Bug::EConv.new("utf-8", "cp932") + dst = "" + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_not_predicate(dst, :valid_encoding?) + end +end diff --git a/test/-ext-/eval/test_eval.rb b/test/-ext-/eval/test_eval.rb index 27952996e29c90..e37d301b2eaa1e 100644 --- a/test/-ext-/eval/test_eval.rb +++ b/test/-ext-/eval/test_eval.rb @@ -4,9 +4,9 @@ class EvalTest < Test::Unit::TestCase def test_rb_eval_string - a = 1 + _a = 1 assert_equal [self, 1, __method__], rb_eval_string(%q{ - [self, a, __method__] + [self, _a, __method__] }) end end diff --git a/test/-ext-/test_abi.rb b/test/-ext-/test_abi.rb index 59e70107a5dd57..d3ea6bb9b105fe 100644 --- a/test/-ext-/test_abi.rb +++ b/test/-ext-/test_abi.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +return unless RUBY_PATCHLEVEL < 0 + class TestABI < Test::Unit::TestCase def test_require_lib_with_incorrect_abi_on_dev_ruby omit "ABI is not checked" unless abi_checking_supported? diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index 5baf87db75dea3..a3be193a134cba 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -23,7 +23,6 @@ def teardown ENV.update(@environ) end - def test_cgi_escape assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1)) assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding) @@ -70,6 +69,54 @@ def test_cgi_unescape_accept_charset end; end + def test_cgi_escapeURIComponent + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1)) + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) + end + + def test_cgi_escapeURIComponent_with_unreserved_characters + assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", + CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), + "should not encode any unreserved characters, as per RFC3986 Section 2.3") + end + + def test_cgi_escapeURIComponent_with_invalid_byte_sequence + assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8"))) + end + + def test_cgi_escapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent + str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar")) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_unescapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent_accept_charset + return unless defined?(::Encoding) + + assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} + assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + assert_equal("", CGI.unescapeURIComponent('')) + end; + end + def test_cgi_pretty assert_equal("\n \n \n\n",CGI.pretty("")) assert_equal("\n\t\n\t\n\n",CGI.pretty("","\t")) diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index d2b74008ebb46e..c740462c01dd1b 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -247,6 +247,9 @@ def test_seeked_string_io def assert_parse_errors_out(data, **options) assert_raise(CSV::MalformedCSVError) do timeout = 0.2 + if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled? + timeout = 1 # for --yjit-call-threshold=1 + end if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled? timeout = 5 # for --jit-wait end diff --git a/test/date/test_date_conv.rb b/test/date/test_date_conv.rb index d41ff45d859a6b..ed478b41bb3c8d 100644 --- a/test/date/test_date_conv.rb +++ b/test/date/test_date_conv.rb @@ -77,6 +77,11 @@ def test_to_time__from_datetime assert_equal([2004, 9, 19, 1, 2, 3, 456789], [t.year, t.mon, t.mday, t.hour, t.min, t.sec, t.usec]) + d = DateTime.new(1582, 10, 3, 1, 2, 3, 0) + 456789.to_r/86400000000 + t = d.to_time.utc + assert_equal([1582, 10, 13, 1, 2, 3, 456789], + [t.year, t.mon, t.mday, t.hour, t.min, t.sec, t.usec]) + if Time.allocate.respond_to?(:nsec) d = DateTime.new(2004, 9, 19, 1, 2, 3, 0) + 456789123.to_r/86400000000000 t = d.to_time.utc @@ -100,6 +105,10 @@ def test_to_date__from_time t = Time.utc(2004, 9, 19, 1, 2, 3, 456789) d = t.to_date assert_equal([2004, 9, 19, 0], [d.year, d.mon, d.mday, d.day_fraction]) + + t = Time.utc(1582, 10, 13, 1, 2, 3, 456789) + d = t.to_date # using ITALY + assert_equal([1582, 10, 3, 0], [d.year, d.mon, d.mday, d.day_fraction]) end def test_to_date__from_date @@ -136,6 +145,14 @@ def test_to_datetime__from_time [d.year, d.mon, d.mday, d.hour, d.min, d.sec, d.sec_fraction, d.offset]) + t = Time.utc(1582, 10, 13, 1, 2, 3, 456789) + d = t.to_datetime # using ITALY + assert_equal([1582, 10, 3, 1, 2, 3, + 456789.to_r/1000000, + 0], + [d.year, d.mon, d.mday, d.hour, d.min, d.sec, + d.sec_fraction, d.offset]) + t = Time.now d = t.to_datetime require 'time' diff --git a/test/drb/drbtest.rb b/test/drb/drbtest.rb index 3c33aedb6f1763..56d73eb5097f6b 100644 --- a/test/drb/drbtest.rb +++ b/test/drb/drbtest.rb @@ -90,6 +90,7 @@ def setup_service(service_name) end def teardown + return if @omitted @ext.stop_service if defined?(@ext) && @ext if defined?(@service_name) && @service_name @drb_service.manager.unregist(@service_name) diff --git a/test/drb/test_drbssl.rb b/test/drb/test_drbssl.rb index 0254c7ab50d8bc..4369c6614b9796 100644 --- a/test/drb/test_drbssl.rb +++ b/test/drb/test_drbssl.rb @@ -41,6 +41,10 @@ def start class TestDRbSSLCore < Test::Unit::TestCase include DRbCore def setup + if RUBY_PLATFORM.match?(/mingw/) + @omitted = true + omit 'This test seems to randomly hang on GitHub Actions MinGW' + end @drb_service = DRbSSLService.new super setup_service 'ut_drb_drbssl.rb' diff --git a/test/error_highlight/test_error_highlight.rb b/test/error_highlight/test_error_highlight.rb index 5b7c05e5f49607..c4a998092b5686 100644 --- a/test/error_highlight/test_error_highlight.rb +++ b/test/error_highlight/test_error_highlight.rb @@ -1150,7 +1150,7 @@ def v.foo; 1; end def test_custom_formatter custom_formatter = Object.new def custom_formatter.message_for(spot) - "\n\n" + spot.inspect + "\n\n" + spot.except(:script_lines).inspect end original_formatter, ErrorHighlight.formatter = ErrorHighlight.formatter, custom_formatter @@ -1231,4 +1231,30 @@ def test_spoofed_filename end end end + + def raise_name_error + 1.time + end + + def test_spot_with_backtrace_location + lineno = __LINE__ + begin + raise_name_error + rescue NameError => exc + end + + spot = ErrorHighlight.spot(exc).except(:script_lines) + assert_equal(lineno - 4, spot[:first_lineno]) + assert_equal(lineno - 4, spot[:last_lineno]) + assert_equal(5, spot[:first_column]) + assert_equal(10, spot[:last_column]) + assert_equal(" 1.time\n", spot[:snippet]) + + spot = ErrorHighlight.spot(exc, backtrace_location: exc.backtrace_locations[1]).except(:script_lines) + assert_equal(lineno + 2, spot[:first_lineno]) + assert_equal(lineno + 2, spot[:last_lineno]) + assert_equal(6, spot[:first_column]) + assert_equal(22, spot[:last_column]) + assert_equal(" raise_name_error\n", spot[:snippet]) + end end diff --git a/test/excludes/Psych/TestDateTime.rb b/test/excludes/Psych/TestDateTime.rb new file mode 100644 index 00000000000000..63d99be809279a --- /dev/null +++ b/test/excludes/Psych/TestDateTime.rb @@ -0,0 +1,4 @@ +exclude(:test_new_datetime, < expected but was + # <"-e:12:in `p': \e[1mexecution expired (\e[1;4mTimeout::Error\e[m\e[1m)\e[m">. + omit if /freebsd/ =~ RUBY_PLATFORM + run_pty("#{<<~"begin;"}\n#{<<~'end;'}") do |r, w, _| begin; require 'timeout' @@ -383,19 +392,12 @@ def test_intr assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("Interrupt", cc, r, w) unless /linux|solaris/ =~ RUBY_PLATFORM end - # This test fails randomly on FreeBSD 13 - # http://rubyci.s3.amazonaws.com/freebsd13/ruby-master/log/20220304T163001Z.fail.html.gz - # - # 1) Failure: - # TestIO_Console#test_intr [/usr/home/chkbuild/chkbuild/tmp/build/20220304T163001Z/ruby/test/io/console/test_io_console.rb:387]: - # <"25"> expected but was - # <"-e:12:in `p': \e[1mexecution expired (\e[1;4mTimeout::Error\e[m\e[1m)\e[m">. - if (cc = ctrl["dsusp"]) && /freebsd/ !~ RUBY_PLATFORM + if cc = ctrl["dsusp"] assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) end - if (cc = ctrl["lnext"]) && /freebsd/ !~ RUBY_PLATFORM + if cc = ctrl["lnext"] assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) assert_ctrl("#{cc.ord}", cc, r, w) diff --git a/test/lib/jit_support.rb b/test/lib/jit_support.rb index c2822a80e4d4a8..b8ba06e570f249 100644 --- a/test/lib/jit_support.rb +++ b/test/lib/jit_support.rb @@ -65,7 +65,7 @@ def supported? def yjit_supported? # e.g. x86_64-linux, x64-mswin64_140, x64-mingw32, x64-mingw-ucrt - RUBY_PLATFORM.match?(/^(x86_64|x64)-/) + RUBY_PLATFORM.match?(/^(x86_64|x64|arm64|aarch64)-/) end # AppVeyor's Visual Studio 2013 / 2015 are known to spuriously generate broken pch / pdb, like: diff --git a/test/mkmf/base.rb b/test/mkmf/base.rb index e097c396d6e018..ec42bca100b935 100644 --- a/test/mkmf/base.rb +++ b/test/mkmf/base.rb @@ -106,6 +106,7 @@ def setup end def teardown + return if @omitted rbconfig0 = @rbconfig mkconfig0 = @mkconfig RbConfig.module_eval { diff --git a/test/mkmf/test_constant.rb b/test/mkmf/test_constant.rb index f6834c7f284f98..f22b82ff950b4f 100644 --- a/test/mkmf/test_constant.rb +++ b/test/mkmf/test_constant.rb @@ -2,6 +2,14 @@ require_relative 'base' class TestMkmfTryConstant < TestMkmf + def setup + if ENV.key?('APPVEYOR') + @omitted = true + omit 'This test fails too often on AppVeyor' + end + super + end + def test_simple assert_equal( 0, mkmf {try_constant("0")}, MKMFLOG) assert_equal( 1, mkmf {try_constant("1")}, MKMFLOG) diff --git a/test/psych/helper.rb b/test/psych/helper.rb index 0643139d8c0cff..4e82887c6d1c2c 100644 --- a/test/psych/helper.rb +++ b/test/psych/helper.rb @@ -51,7 +51,7 @@ def assert_to_yaml( obj, yaml, loader = :load ) :UseVersion => true, :UseHeader => true, :SortKeys => true ) )) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_to_yaml obj, yaml, :unsafe_load end @@ -61,7 +61,7 @@ def assert_to_yaml( obj, yaml, loader = :load ) def assert_parse_only( obj, yaml ) begin assert_equal obj, Psych::load( yaml ) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_equal obj, Psych::unsafe_load( yaml ) end assert_equal obj, Psych::parse( yaml ).transform @@ -79,7 +79,7 @@ def assert_cycle( obj ) assert_equal(obj, Psych.load(v.tree.yaml)) assert_equal(obj, Psych::load(Psych.dump(obj))) assert_equal(obj, Psych::load(obj.to_yaml)) - rescue Psych::DisallowedClass, Psych::BadAlias + rescue Psych::DisallowedClass, Psych::BadAlias, Psych::AliasesNotEnabled assert_equal(obj, Psych.unsafe_load(v.tree.yaml)) assert_equal(obj, Psych::unsafe_load(Psych.dump(obj))) assert_equal(obj, Psych::unsafe_load(obj.to_yaml)) diff --git a/test/psych/test_array.rb b/test/psych/test_array.rb index 28b76da785e8ef..0dc82439d44c90 100644 --- a/test/psych/test_array.rb +++ b/test/psych/test_array.rb @@ -57,6 +57,22 @@ def test_self_referential assert_cycle(@list) end + def test_recursive_array + @list << @list + + loaded = Psych.load(Psych.dump(@list), aliases: true) + + assert_same loaded, loaded.last + end + + def test_recursive_array_uses_alias + @list << @list + + assert_raise(AliasesNotEnabled) do + Psych.load(Psych.dump(@list), aliases: false) + end + end + def test_cycle assert_cycle(@list) end diff --git a/test/psych/test_hash.rb b/test/psych/test_hash.rb index 5374781339209c..31eba8580bb96f 100644 --- a/test/psych/test_hash.rb +++ b/test/psych/test_hash.rb @@ -102,26 +102,66 @@ def test_cycles end def test_ref_append - hash = Psych.unsafe_load(<<-eoyml) ---- -foo: &foo - hello: world -bar: - <<: *foo -eoyml + hash = Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: + <<: *foo + eoyml assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) end + def test_anchor_reuse + hash = Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: *foo + eoyml + assert_equal({"foo"=>{"hello"=>"world"}, "bar"=>{"hello"=>"world"}}, hash) + assert_same(hash.fetch("foo"), hash.fetch("bar")) + end + + def test_raises_if_anchor_not_defined + assert_raise(Psych::AnchorNotDefined) do + Psych.unsafe_load(<<~eoyml) + --- + foo: &foo + hello: world + bar: *not_foo + eoyml + end + end + + def test_recursive_hash + h = { } + h["recursive_reference"] = h + + loaded = Psych.load(Psych.dump(h), aliases: true) + + assert_same loaded, loaded.fetch("recursive_reference") + end + + def test_recursive_hash_uses_alias + h = { } + h["recursive_reference"] = h + + assert_raise(AliasesNotEnabled) do + Psych.load(Psych.dump(h), aliases: false) + end + end + def test_key_deduplication unless String.method_defined?(:-@) && (-("a" * 20)).equal?((-("a" * 20))) pend "This Ruby implementation doesn't support string deduplication" end - hashes = Psych.load(<<-eoyml) ---- -- unique_identifier: 1 -- unique_identifier: 2 -eoyml + hashes = Psych.load(<<~eoyml) + --- + - unique_identifier: 1 + - unique_identifier: 2 + eoyml assert_same hashes[0].keys.first, hashes[1].keys.first end diff --git a/test/psych/test_merge_keys.rb b/test/psych/test_merge_keys.rb index dcf4f1fce3ffae..2f55a1ed8ecbf1 100644 --- a/test/psych/test_merge_keys.rb +++ b/test/psych/test_merge_keys.rb @@ -117,7 +117,7 @@ def test_missing_merge_key bar: << : *foo eoyml - exp = assert_raise(Psych::BadAlias) { Psych.load yaml } + exp = assert_raise(Psych::AnchorNotDefined) { Psych.load(yaml, aliases: true) } assert_match 'foo', exp.message end diff --git a/test/psych/test_object.rb b/test/psych/test_object.rb index 0faf6b244db2e5..21c27794ea5d4e 100644 --- a/test/psych/test_object.rb +++ b/test/psych/test_object.rb @@ -36,10 +36,19 @@ def test_tag_round_trip def test_cyclic_references foo = Foo.new(nil) foo.parent = foo - loaded = Psych.unsafe_load Psych.dump foo + loaded = Psych.load(Psych.dump(foo), permitted_classes: [Foo], aliases: true) assert_instance_of(Foo, loaded) - assert_equal loaded, loaded.parent + assert_same loaded, loaded.parent + end + + def test_cyclic_reference_uses_alias + foo = Foo.new(nil) + foo.parent = foo + + assert_raise(AliasesNotEnabled) do + Psych.load(Psych.dump(foo), permitted_classes: [Foo], aliases: false) + end end end end diff --git a/test/psych/test_safe_load.rb b/test/psych/test_safe_load.rb index b52d6048b30b9a..a9ed73752816d2 100644 --- a/test/psych/test_safe_load.rb +++ b/test/psych/test_safe_load.rb @@ -19,18 +19,31 @@ class Foo; end end end - def test_no_recursion - x = [] - x << x - assert_raise(Psych::BadAlias) do - Psych.safe_load Psych.dump(x) + def test_raises_when_alias_found_if_alias_parsing_not_enabled + yaml_with_aliases = <<~YAML + --- + a: &ABC + k1: v1 + k2: v2 + b: *ABC + YAML + + assert_raise(Psych::AliasesNotEnabled) do + Psych.safe_load(yaml_with_aliases) end end - def test_explicit_recursion - x = [] - x << x - assert_equal(x, Psych.safe_load(Psych.dump(x), permitted_classes: [], permitted_symbols: [], aliases: true)) + def test_aliases_are_parsed_when_alias_parsing_is_enabled + yaml_with_aliases = <<~YAML + --- + a: &ABC + k1: v1 + k2: v2 + b: *ABC + YAML + + result = Psych.safe_load(yaml_with_aliases, aliases: true) + assert_same result.fetch("a"), result.fetch("b") end def test_permitted_symbol diff --git a/test/rdoc/test_rdoc_markdown.rb b/test/rdoc/test_rdoc_markdown.rb index c223c44c1225f3..ca76c34f439650 100644 --- a/test/rdoc/test_rdoc_markdown.rb +++ b/test/rdoc/test_rdoc_markdown.rb @@ -761,7 +761,6 @@ def test_parse_note_multiple and an extra note.[^2] [^1]: With a footnote - [^2]: Which should be numbered correctly MD diff --git a/test/reline/test_reline.rb b/test/reline/test_reline.rb index 8828e419852577..82447fd16cb4df 100644 --- a/test/reline/test_reline.rb +++ b/test/reline/test_reline.rb @@ -397,6 +397,12 @@ def test_read_io # TODO in Reline::Core end + def test_dumb_terminal + lib = File.expand_path("../../lib", __dir__) + out = IO.popen([{"TERM"=>"dumb"}, "ruby", "-I#{lib}", "-rreline", "-e", "p Reline::IOGate"], &:read) + assert_equal("Reline::GeneralIO", out.chomp) + end + def get_reline_encoding if encoding = Reline::IOGate.encoding encoding diff --git a/test/rinda/test_rinda.rb b/test/rinda/test_rinda.rb index d8340e0fc48dff..74d8d363b4b705 100644 --- a/test/rinda/test_rinda.rb +++ b/test/rinda/test_rinda.rb @@ -498,7 +498,7 @@ class TupleSpaceProxyTest < Test::Unit::TestCase def setup if RUBY_PLATFORM.match?(/mingw/) @omitted = true - omit 'This test seems to randomly hang on GitHub Actions MinGW UCRT64' + omit 'This test seems to randomly hang on GitHub Actions MinGW' end super ThreadGroup.new.add(Thread.current) @@ -583,6 +583,22 @@ def test_take_bug_8215 end end +module RingIPv4 + def ipv4_mc(rf) + begin + v4mc = rf.make_socket('239.0.0.1') + rescue Errno::ENETUNREACH, Errno::ENOBUFS, Errno::ENODEV + omit 'IPv4 multicast not available' + end + + begin + yield v4mc + ensure + v4mc.close + end + end +end + module RingIPv6 def prepare_ipv6(r) begin @@ -625,6 +641,7 @@ def ipv6_mc(rf, hops = nil) end class TestRingServer < Test::Unit::TestCase + include RingIPv4 def setup @port = Rinda::Ring_PORT @@ -697,27 +714,23 @@ def test_make_socket_unicast end def test_make_socket_ipv4_multicast - begin - v4mc = @rs.make_socket('239.0.0.1') - rescue Errno::ENOBUFS => e - omit "Missing multicast support in OS: #{e.message}" - end - - begin - if Socket.const_defined?(:SO_REUSEPORT) then - assert(v4mc.getsockopt(:SOCKET, :SO_REUSEPORT).bool) - else - assert(v4mc.getsockopt(:SOCKET, :SO_REUSEADDR).bool) - end - rescue TypeError - if /aix/ =~ RUBY_PLATFORM - omit "Known bug in getsockopt(2) on AIX" + ipv4_mc(@rs) do |v4mc| + begin + if Socket.const_defined?(:SO_REUSEPORT) then + assert(v4mc.getsockopt(:SOCKET, :SO_REUSEPORT).bool) + else + assert(v4mc.getsockopt(:SOCKET, :SO_REUSEADDR).bool) + end + rescue TypeError + if /aix/ =~ RUBY_PLATFORM + omit "Known bug in getsockopt(2) on AIX" + end + raise $! end - raise $! - end - assert_equal('0.0.0.0', v4mc.local_address.ip_address) - assert_equal(@port, v4mc.local_address.ip_port) + assert_equal('0.0.0.0', v4mc.local_address.ip_address) + assert_equal(@port, v4mc.local_address.ip_port) + end end def test_make_socket_ipv6_multicast @@ -746,7 +759,7 @@ def test_ring_server_ipv4_multicast @rs.shutdown begin @rs = Rinda::RingServer.new(@ts, [['239.0.0.1', '0.0.0.0']], @port) - rescue Errno::ENOBUFS => e + rescue Errno::ENOBUFS, Errno::ENODEV => e omit "Missing multicast support in OS: #{e.message}" end @@ -848,6 +861,7 @@ def wait_for(n) class TestRingFinger < Test::Unit::TestCase include RingIPv6 + include RingIPv4 def setup @rf = Rinda::RingFinger.new @@ -867,12 +881,10 @@ def test_make_socket_unicast end def test_make_socket_ipv4_multicast - v4mc = @rf.make_socket('239.0.0.1') - - assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_LOOP).ipv4_multicast_loop) - assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) - ensure - v4mc.close if v4mc + ipv4_mc(@rf) do |v4mc| + assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_LOOP).ipv4_multicast_loop) + assert_equal(1, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) + end end def test_make_socket_ipv6_multicast @@ -884,10 +896,9 @@ def test_make_socket_ipv6_multicast def test_make_socket_ipv4_multicast_hops @rf.multicast_hops = 2 - v4mc = @rf.make_socket('239.0.0.1') - assert_equal(2, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) - ensure - v4mc.close if v4mc + ipv4_mc(@rf) do |v4mc| + assert_equal(2, v4mc.getsockopt(:IPPROTO_IP, :IP_MULTICAST_TTL).ipv4_multicast_ttl) + end end def test_make_socket_ipv6_multicast_hops diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb index 83130668be79b0..27e00070234a68 100644 --- a/test/ripper/test_lexer.rb +++ b/test/ripper/test_lexer.rb @@ -100,6 +100,20 @@ def test_stack_at_on_heredoc_beg assert_equal expect, Ripper.lex(src).map {|e| e[1]} end + def test_end_of_script_char + all_assertions do |all| + ["a", %w"[a ]", %w"{, }", "if"].each do |src, append| + expected = Ripper.lex(src).map {|e| e[1]} + ["\0b", "\4b", "\32b"].each do |eof| + c = "#{src}#{eof}#{append}" + all.for(c) do + assert_equal expected, Ripper.lex(c).map {|e| e[1]} + end + end + end + end + end + def test_slice assert_equal "string\#{nil}\n", Ripper.slice(%(<).each do |op| assert_kind_of(Numeric, Complex(1).__send__(op, x)) end end diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 905416911a96e8..669b004b83317e 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -460,6 +460,48 @@ def test_long_unc end end + def test_file_open_newline_option + Dir.mktmpdir(__method__.to_s) do |tmpdir| + path = File.join(tmpdir, "foo") + test = lambda do |newline| + File.open(path, "wt", newline: newline) do |f| + f.write "a\n" + f.puts "b" + end + File.binread(path) + end + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\r\nb\r\n", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + + test = lambda do |newline| + File.open(path, "rt", newline: newline) do |f| + f.read + end + end + + File.binwrite(path, "a\nb\n") + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\nb\n", test.(:crlf)) + assert_equal("a\nb\n", test.(:cr)) + + File.binwrite(path, "a\r\nb\r\n") + assert_equal("a\r\nb\r\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + # Work on both Windows and non-Windows + assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf)) + assert_equal("a\r\nb\r\n", test.(:cr)) + + File.binwrite(path, "a\rb\r") + assert_equal("a\rb\r", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\rb\r", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + end + end + def test_open_nul Dir.mktmpdir(__method__.to_s) do |tmpdir| path = File.join(tmpdir, "foo") diff --git a/test/ruby/test_float.rb b/test/ruby/test_float.rb index 57a46fce921b99..fdc5d28ed7b95f 100644 --- a/test/ruby/test_float.rb +++ b/test/ruby/test_float.rb @@ -141,6 +141,9 @@ def test_strtod assert_raise(ArgumentError){Float("1__1")} assert_raise(ArgumentError){Float("1.")} assert_raise(ArgumentError){Float("1.e+00")} + assert_raise(ArgumentError){Float("0x.1")} + assert_raise(ArgumentError){Float("0x1.")} + assert_raise(ArgumentError){Float("0x1.0")} assert_raise(ArgumentError){Float("0x1.p+0")} # add expected behaviour here. assert_equal(10, Float("1_0")) diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index a5d7f4dbaa83a9..d2f1e21e33e4b1 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -402,6 +402,32 @@ def test_expand_heap eom end + def test_thrashing_for_young_objects + # This test prevents bugs like [Bug #18929] + + assert_separately %w[--disable-gem], __FILE__, __LINE__, <<-RUBY + # Warmup to make sure heap stabilizes + 1_000_000.times { Object.new } + + before_stats = GC.stat + + 1_000_000.times { Object.new } + + # Previous loop may have caused GC to be in an intermediate state, + # running a minor GC here will guarantee that GC will be complete + GC.start(full_mark: false) + + after_stats = GC.stat + + # Should not be thrashing in page creation + assert_equal before_stats[:heap_allocated_pages], after_stats[:heap_allocated_pages] + assert_equal 0, after_stats[:heap_tomb_pages] + assert_equal 0, after_stats[:total_freed_pages] + # Only young objects, so should not trigger major GC + assert_equal before_stats[:major_gc_count], after_stats[:major_gc_count] + RUBY + end + def test_gc_internals assert_not_nil GC::INTERNAL_CONSTANTS[:HEAP_PAGE_OBJ_LIMIT] assert_not_nil GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index 91423f81ea2067..83d16d462effe1 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -304,6 +304,20 @@ def test_AREF_fstring_key assert_equal before, ObjectSpace.count_objects[:T_STRING] end + def test_AREF_fstring_key_default_proc + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + h = Hash.new do |h, k| + k.frozen? + end + + str = "foo" + refute str.frozen? # assumes this file is frozen_string_literal: false + refute h[str] + refute h["foo"] + end; + end + def test_ASET_fstring_key a, b = {}, {} assert_equal 1, a["abc"] = 1 diff --git a/test/ruby/test_integer.rb b/test/ruby/test_integer.rb index a2b181c6422c84..c3e11498bec528 100644 --- a/test/ruby/test_integer.rb +++ b/test/ruby/test_integer.rb @@ -704,4 +704,21 @@ def o.to_int; 1; end def o.to_int; Object.new; end assert_raise_with_message(TypeError, /can't convert Object to Integer/) {Integer.try_convert(o)} end + + def test_ceildiv + assert_equal(0, 0.ceildiv(3)) + assert_equal(1, 1.ceildiv(3)) + assert_equal(1, 3.ceildiv(3)) + assert_equal(2, 4.ceildiv(3)) + + assert_equal(-1, 4.ceildiv(-3)) + assert_equal(-1, -4.ceildiv(3)) + assert_equal(2, -4.ceildiv(-3)) + + assert_equal(3, 3.ceildiv(1.2)) + assert_equal(3, 3.ceildiv(6/5r)) + + assert_equal(10, (10**100-11).ceildiv(10**99-1)) + assert_equal(11, (10**100-9).ceildiv(10**99-1)) + end end diff --git a/test/ruby/test_io.rb b/test/ruby/test_io.rb index 6a3d7594cf04e9..d6fcf16ddd1ee2 100644 --- a/test/ruby/test_io.rb +++ b/test/ruby/test_io.rb @@ -2602,6 +2602,8 @@ def test_foreach bug = '[ruby-dev:31525]' assert_raise(ArgumentError, bug) {IO.foreach} + assert_raise(ArgumentError, "[Bug #18767] [ruby-core:108499]") {IO.foreach(__FILE__, 0){}} + a = nil assert_nothing_raised(ArgumentError, bug) {a = IO.foreach(t.path).to_a} assert_equal(["foo\n", "bar\n", "baz\n"], a, bug) diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index c00bf59e1875c3..da04ae7fa72029 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -892,10 +892,22 @@ def test_sprintf_c assert_raise(Encoding::CompatibilityError) { "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")] } + + assert_equal("\u3042".encode('Windows-31J'), "%c" % "\u3042\u3044".encode('Windows-31J')) end def test_sprintf_p Encoding.list.each do |e| + unless e.ascii_compatible? + format = e.dummy? ? "%p".force_encoding(e) : "%p".encode(e) + assert_raise(Encoding::CompatibilityError) do + sprintf(format, nil) + end + assert_raise(Encoding::CompatibilityError) do + format % nil + end + next + end format = "%p".force_encoding(e) ['', 'a', "\xC2\xA1", "\x00"].each do |s| s.force_encoding(e) diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb index 361d18dd4b4363..fc5cd9e93e7fb4 100644 --- a/test/ruby/test_marshal.rb +++ b/test/ruby/test_marshal.rb @@ -33,7 +33,7 @@ def fact(n) end def test_marshal - a = [1, 2, 3, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)] + a = [1, 2, 3, 2**32, 2**64, [4,5,"foo"], {1=>"bar"}, 2.5, fact(30)] assert_equal a, Marshal.load(Marshal.dump(a)) [[1,2,3,4], [81, 2, 118, 3146]].each { |w,x,y,z| @@ -47,6 +47,26 @@ def test_marshal } end + def test_marshal_integers + a = [] + [-2, -1, 0, 1, 2].each do |i| + 0.upto(65).map do |exp| + a << 2**exp + i + end + end + assert_equal a, Marshal.load(Marshal.dump(a)) + + a = [2**32, []]*2 + assert_equal a, Marshal.load(Marshal.dump(a)) + + a = [2**32, 2**32, []]*2 + assert_equal a, Marshal.load(Marshal.dump(a)) + end + + def test_marshal_small_bignum_backref + assert_equal [2**32, 2**32], Marshal.load("\x04\b[\al+\b\x00\x00\x00\x00\x01\x00@\x06") + end + StrClone = String.clone def test_marshal_cloned_class assert_instance_of(StrClone, Marshal.load(Marshal.dump(StrClone.new("abc")))) diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb index 83e499913a4bb4..7e440095c8d879 100644 --- a/test/ruby/test_method.rb +++ b/test/ruby/test_method.rb @@ -199,11 +199,6 @@ def m.foo; end assert_equal(o.method(:foo), o.method(:foo)) assert_equal(o.method(:foo), o.method(:bar)) assert_not_equal(o.method(:foo), o.method(:baz)) - - class << o - private :bar - end - assert_not_equal(o.method(:foo), o.method(:bar)) end def test_hash @@ -330,8 +325,8 @@ class << PUBLIC_SINGLETON_TEST def PUBLIC_SINGLETON_TEST.def; end end def test_define_singleton_method_public - assert_equal(true, PUBLIC_SINGLETON_TEST.method(:dsm).public?) - assert_equal(true, PUBLIC_SINGLETON_TEST.method(:def).public?) + assert_nil(PUBLIC_SINGLETON_TEST.dsm) + assert_nil(PUBLIC_SINGLETON_TEST.def) end def test_define_singleton_method_no_proc @@ -1061,20 +1056,28 @@ def foo; end assert_equal(sm, im.clone.bind(o).super_method) end - def test_super_method_removed + def test_super_method_removed_public c1 = Class.new {private def foo; end} c2 = Class.new(c1) {public :foo} c3 = Class.new(c2) {def foo; end} c1.class_eval {undef foo} m = c3.instance_method(:foo) m = assert_nothing_raised(NameError, Feature9781) {break m.super_method} - assert_nil(m, Feature9781) + assert_equal c2, m.owner + end + + def test_super_method_removed_regular + c1 = Class.new { def foo; end } + c2 = Class.new(c1) { def foo; end } + assert_equal c1.instance_method(:foo), c2.instance_method(:foo).super_method + c1.remove_method :foo + assert_equal nil, c2.instance_method(:foo).super_method end def test_prepended_public_zsuper mod = EnvUtil.labeled_module("Mod") {private def foo; :ok end} - mods = [mod] obj = Object.new.extend(mod) + mods = [obj.singleton_class] class << obj public :foo end @@ -1084,7 +1087,7 @@ class << obj end m = obj.method(:foo) assert_equal(mods, mods.map {m.owner.tap {m = m.super_method}}) - assert_nil(m) + assert_nil(m.super_method) end def test_super_method_with_prepended_module @@ -1197,48 +1200,63 @@ def foo assert_nil(super_method) end - def test_method_visibility_predicates - v = Visibility.new - assert_equal(true, v.method(:mv1).public?) - assert_equal(true, v.method(:mv2).private?) - assert_equal(true, v.method(:mv3).protected?) - assert_equal(false, v.method(:mv2).public?) - assert_equal(false, v.method(:mv3).private?) - assert_equal(false, v.method(:mv1).protected?) - end + # Bug 18435 + def test_instance_methods_owner_consistency + a = Module.new { def method1; end } - def test_unbound_method_visibility_predicates - assert_equal(true, Visibility.instance_method(:mv1).public?) - assert_equal(true, Visibility.instance_method(:mv2).private?) - assert_equal(true, Visibility.instance_method(:mv3).protected?) - assert_equal(false, Visibility.instance_method(:mv2).public?) - assert_equal(false, Visibility.instance_method(:mv3).private?) - assert_equal(false, Visibility.instance_method(:mv1).protected?) - end + b = Class.new do + include a + protected :method1 + end - class VisibilitySub < Visibility - protected :mv1 - public :mv2 - private :mv3 + assert_equal [:method1], b.instance_methods(false) + assert_equal b, b.instance_method(:method1).owner end - def test_method_visibility_predicates_with_subclass_visbility_change - v = VisibilitySub.new - assert_equal(false, v.method(:mv1).public?) - assert_equal(false, v.method(:mv2).private?) - assert_equal(false, v.method(:mv3).protected?) - assert_equal(true, v.method(:mv2).public?) - assert_equal(true, v.method(:mv3).private?) - assert_equal(true, v.method(:mv1).protected?) + def test_zsuper_method_removed + a = EnvUtil.labeled_class('A') do + private + def foo(arg = nil) + 1 + end + end + line = __LINE__ - 4 + + b = EnvUtil.labeled_class('B', a) do + public :foo + end + + unbound = b.instance_method(:foo) + + assert_equal unbound, b.public_instance_method(:foo) + assert_equal "#", unbound.inspect + assert_equal [[:opt, :arg]], unbound.parameters + + a.remove_method(:foo) + + assert_equal [[:rest]], unbound.parameters + assert_equal "#", unbound.inspect + + obj = b.new + assert_raise_with_message(NoMethodError, /super: no superclass method `foo'/) { unbound.bind_call(obj) } end - def test_unbound_method_visibility_predicates_with_subclass_visbility_change - assert_equal(false, VisibilitySub.instance_method(:mv1).public?) - assert_equal(false, VisibilitySub.instance_method(:mv2).private?) - assert_equal(false, VisibilitySub.instance_method(:mv3).protected?) - assert_equal(true, VisibilitySub.instance_method(:mv2).public?) - assert_equal(true, VisibilitySub.instance_method(:mv3).private?) - assert_equal(true, VisibilitySub.instance_method(:mv1).protected?) + # Bug #18751 + def method_equality_visbility_alias + c = Class.new do + class << self + alias_method :n, :new + private :new + end + end + + assert_equal c.method(:n), c.method(:new) + + assert_not_equal c.method(:n), Class.method(:new) + assert_equal c.method(:n) == Class.instance_method(:new).bind(c) + + assert_not_equal c.method(:new), Class.method(:new) + assert_equal c.method(:new), Class.instance_method(:new).bind(c) end def rest_parameter(*rest) @@ -1359,7 +1377,7 @@ module M2 ::Object.prepend(M2) m = Object.instance_method(:x) - assert_equal M, m.owner + assert_equal M2, m.owner end; end diff --git a/test/ruby/test_mjit.rb b/test/ruby/test_mjit.rb index 02be88aa322687..9cd93855bd9f3b 100644 --- a/test/ruby/test_mjit.rb +++ b/test/ruby/test_mjit.rb @@ -749,7 +749,7 @@ def b end def a - # Calling #b should be vm_exec, not direct mjit_exec. + # Calling #b should be vm_exec, not direct jit_exec. # Otherwise `1` on local variable would be purged. 1 + b end @@ -782,9 +782,9 @@ def a def test_catching_deep_exception assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '1', success_count: 4) begin; - def catch_true(paths, prefixes) # catch_except_p: TRUE - prefixes.each do |prefix| # catch_except_p: TRUE - paths.each do |path| # catch_except_p: FALSE + def catch_true(paths, prefixes) # catch_except_p: true + prefixes.each do |prefix| # catch_except_p: true + paths.each do |path| # catch_except_p: false return path end end diff --git a/test/ruby/test_rubyoptions.rb b/test/ruby/test_rubyoptions.rb index ed2bc3538c63ba..cf6829cf8800b4 100644 --- a/test/ruby/test_rubyoptions.rb +++ b/test/ruby/test_rubyoptions.rb @@ -1128,8 +1128,7 @@ def test_null_script end def test_mjit_debug - # mswin uses prebuilt precompiled header. Thus it does not show a pch compilation log to check "-O0 -O1". - if JITSupport.supported? && !RUBY_PLATFORM.match?(/mswin/) + if JITSupport.supported? env = { 'MJIT_SEARCH_BUILD_DIR' => 'true' } assert_in_out_err([env, "--disable-yjit", "--mjit-debug=-O0 -O1", "--mjit-verbose=2", "" ], "", [], /-O0 -O1/) end diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index f2e73eb58dce43..c453ecd350b9b4 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -362,11 +362,16 @@ def test_skip def test_char assert_equal("a", sprintf("%c", 97)) assert_equal("a", sprintf("%c", ?a)) - assert_raise(ArgumentError) { sprintf("%c", sprintf("%c%c", ?a, ?a)) } + assert_equal("a", sprintf("%c", "a")) + assert_equal("a", sprintf("%c", sprintf("%c%c", ?a, ?a))) assert_equal(" " * (BSIZ - 1) + "a", sprintf(" " * (BSIZ - 1) + "%c", ?a)) assert_equal(" " * (BSIZ - 1) + "a", sprintf(" " * (BSIZ - 1) + "%-1c", ?a)) assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a)) assert_equal("a" + " " * BSIZ, sprintf("%-#{ BSIZ + 1 }c", ?a)) + assert_raise(ArgumentError) { sprintf("%c", -1) } + s = sprintf("%c".encode(Encoding::US_ASCII), 0x80) + assert_equal("\x80".b, s) + assert_predicate(s, :valid_encoding?) end def test_string @@ -507,6 +512,16 @@ def test_named_typed_enc end end + def test_coderange + format_str = "wrong constant name %s" + interpolated_str = "\u3042" + assert_predicate format_str, :ascii_only? + refute_predicate interpolated_str, :ascii_only? + + str = format_str % interpolated_str + refute_predicate str, :ascii_only? + end + def test_named_default h = Hash.new('world') assert_equal("hello world", "hello %{location}" % h) diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index d37924dec1904e..ab14a3c17bb7f2 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2614,6 +2614,11 @@ def test_inspect_nul assert_equal '"\x0012"', s.inspect, bug8290 end + def test_inspect_next_line + bug16842 = '[ruby-core:98231]' + assert_equal '"\\u0085"', 0x85.chr(Encoding::UTF_8).inspect, bug16842 + end + def test_partition assert_equal(%w(he l lo), S("hello").partition(/l/)) assert_equal(%w(he l lo), S("hello").partition("l")) diff --git a/test/ruby/test_thread_queue.rb b/test/ruby/test_thread_queue.rb index 1c852474b4277f..723450ad23d971 100644 --- a/test/ruby/test_thread_queue.rb +++ b/test/ruby/test_thread_queue.rb @@ -168,6 +168,24 @@ def test_sized_queue_pop_non_block end end + def test_sized_queue_push_timeout + q = Thread::SizedQueue.new(1) + + q << 1 + assert_equal 1, q.size + + t1 = Thread.new { q.push(2, timeout: 1) } + assert_equal t1, t1.join(2) + assert_nil t1.value + + t2 = Thread.new { q.push(2, timeout: 0.1) } + assert_equal t2, t2.join(1) + assert_nil t2.value + ensure + t1&.kill&.join + t2&.kill&.join + end + def test_sized_queue_push_interrupt q = Thread::SizedQueue.new(1) q.push(1) diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index c8b0034e060075..73737be0ad7457 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2305,5 +2305,7 @@ def test_newline_options assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) + assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true)) + assert_equal("A\nB\nC", s.encode(usascii, newline: :lf)) end end diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index 46eefbb48e4ce0..e569986ef21c36 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -334,8 +334,6 @@ def setup # capture output Gem::DefaultUserInteraction.ui = Gem::MockGemUi.new - ENV["TMPDIR"] = @tempdir - @orig_SYSTEM_WIDE_CONFIG_FILE = Gem::ConfigFile::SYSTEM_WIDE_CONFIG_FILE Gem::ConfigFile.send :remove_const, :SYSTEM_WIDE_CONFIG_FILE Gem::ConfigFile.send :const_set, :SYSTEM_WIDE_CONFIG_FILE, @@ -465,7 +463,14 @@ def teardown Dir.chdir @current_dir + # Prevent a race condition on removing TMPDIR being written by MJIT + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.pause(wait: false) + end FileUtils.rm_rf @tempdir + if defined?(RubyVM::MJIT.enabled?) && RubyVM::MJIT.enabled? + RubyVM::MJIT.resume + end ENV.replace(@orig_env) @@ -599,7 +604,7 @@ def have_git? end def in_path?(executable) # :nodoc: - return true if %r{\A([A-Z]:|/)} =~ executable and File.exist? executable + return true if %r{\A([A-Z]:|/)} =~ executable && File.exist?(executable) ENV["PATH"].split(File::PATH_SEPARATOR).any? do |directory| File.exist? File.join directory, executable @@ -849,7 +854,7 @@ def new_default_spec(name, version, deps = nil, *files) # or a +block+ can be given for full customization of the specification. def util_spec(name, version = 2, deps = nil, *files) # :yields: specification - raise "deps or block, not both" if deps and block_given? + raise "deps or block, not both" if deps && block_given? spec = Gem::Specification.new do |s| s.platform = Gem::Platform::RUBY @@ -1279,10 +1284,10 @@ def self.rubybin rubyexe = "#{ruby}.exe" 3.times do - if File.exist? ruby and File.executable? ruby and !File.directory? ruby + if File.exist?(ruby) && File.executable?(ruby) && !File.directory?(ruby) return File.expand_path(ruby) end - if File.exist? rubyexe and File.executable? rubyexe + if File.exist?(rubyexe) && File.executable?(rubyexe) return File.expand_path(rubyexe) end ruby = File.join("..", ruby) @@ -1592,7 +1597,7 @@ def stub(name, val_or_callable, *block_args) metaclass = class << self; self; end - if respond_to? name and not methods.map(&:to_s).include? name.to_s + if respond_to?(name) && !methods.map(&:to_s).include?(name.to_s) metaclass.send :define_method, name do |*args| super(*args) end diff --git a/test/rubygems/test_gem_config_file.rb b/test/rubygems/test_gem_config_file.rb index fbc7c85757e98a..e23773a133c9d1 100644 --- a/test/rubygems/test_gem_config_file.rb +++ b/test/rubygems/test_gem_config_file.rb @@ -35,7 +35,7 @@ def teardown def test_initialize assert_equal @temp_conf, @cfg.config_file_name - assert_equal false, @cfg.backtrace + assert_equal true, @cfg.backtrace assert_equal true, @cfg.update_sources assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold assert_equal true, @cfg.verbose @@ -239,6 +239,12 @@ def test_handle_arguments end def test_handle_arguments_backtrace + File.open @temp_conf, "w" do |fp| + fp.puts ":backtrace: false" + end + + util_config_file %W[--config-file=#{@temp_conf}] + assert_equal false, @cfg.backtrace args = %w[--backtrace] @@ -275,6 +281,12 @@ def test_handle_arguments_override end def test_handle_arguments_traceback + File.open @temp_conf, "w" do |fp| + fp.puts ":backtrace: false" + end + + util_config_file %W[--config-file=#{@temp_conf}] + assert_equal false, @cfg.backtrace args = %w[--traceback] @@ -288,7 +300,7 @@ def test_handle_arguments_norc assert_equal @temp_conf, @cfg.config_file_name File.open @temp_conf, "w" do |fp| - fp.puts ":backtrace: true" + fp.puts ":backtrace: false" fp.puts ":update_sources: false" fp.puts ":bulk_threshold: 10" fp.puts ":verbose: false" @@ -300,7 +312,7 @@ def test_handle_arguments_norc util_config_file args - assert_equal false, @cfg.backtrace + assert_equal true, @cfg.backtrace assert_equal true, @cfg.update_sources assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold assert_equal true, @cfg.verbose @@ -386,7 +398,7 @@ def test_rubygems_api_key_equals_bad_permission end def test_write - @cfg.backtrace = true + @cfg.backtrace = false @cfg.update_sources = false @cfg.bulk_threshold = 10 @cfg.verbose = false @@ -398,7 +410,7 @@ def test_write util_config_file # These should not be written out to the config file. - assert_equal false, @cfg.backtrace, "backtrace" + assert_equal true, @cfg.backtrace, "backtrace" assert_equal Gem::ConfigFile::DEFAULT_BULK_THRESHOLD, @cfg.bulk_threshold, "bulk_threshold" assert_equal true, @cfg.update_sources, "update_sources" diff --git a/test/rubygems/test_gem_dependency_installer.rb b/test/rubygems/test_gem_dependency_installer.rb index 9db904ba537482..2b0b874b2d6727 100644 --- a/test/rubygems/test_gem_dependency_installer.rb +++ b/test/rubygems/test_gem_dependency_installer.rb @@ -1051,8 +1051,8 @@ def test_find_gems_with_sources_prerelease releases = set.all_specs - assert releases.any? {|s| s.name == "a" and s.version.to_s == "1" } - refute releases.any? {|s| s.name == "a" and s.version.to_s == "1.a" } + assert releases.any? {|s| s.name == "a" && s.version.to_s == "1" } + refute releases.any? {|s| s.name == "a" && s.version.to_s == "1.a" } dependency.prerelease = true diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock index 86221bf249e295..0f1fa7c430e5a8 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.lock @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.28" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7df1d7911fef801edda0b789cca202f3486dff5073eb13dbb85b4715e6f94a" +checksum = "24b22a374fc2e92eb6f49d7efe4eb7663655c6e9455d9259ed3342cc1599da85" dependencies = [ "bindgen", "linkify", @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.28" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa9b908035cb531820f8f3977c538c318308cfaa77da1c6b436577c06db230" +checksum = "3cd23b6dd929b7d50ccb35a6d3aa77dec364328ab9cb304dd32c629332491671" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml index ae32c194b2959f..c9ba5c27bd696e 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.28", features = ["gem"] } +rb-sys = { version = "0.9.30", features = ["gem"] } diff --git a/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb b/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb index 21c4fd1c8dec29..0e04f0de5e1444 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb +++ b/test/rubygems/test_gem_ext_cargo_builder/custom_name/build.rb @@ -1,4 +1,4 @@ -if ENV["RUBYOPT"] or defined? Gem +if ENV["RUBYOPT"] || defined? Gem ENV.delete "RUBYOPT" require "rbconfig" diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock index 0a95dceca232fc..5e602fcf92db1f 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.lock @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.28" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7df1d7911fef801edda0b789cca202f3486dff5073eb13dbb85b4715e6f94a" +checksum = "24b22a374fc2e92eb6f49d7efe4eb7663655c6e9455d9259ed3342cc1599da85" dependencies = [ "bindgen", "linkify", @@ -164,9 +164,9 @@ dependencies = [ [[package]] name = "rb-sys-build" -version = "0.9.28" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa9b908035cb531820f8f3977c538c318308cfaa77da1c6b436577c06db230" +checksum = "3cd23b6dd929b7d50ccb35a6d3aa77dec364328ab9cb304dd32c629332491671" dependencies = [ "regex", "shell-words", diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml index 7cced882e98132..8e3f623728b3ce 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -rb-sys = { version = "0.9.28", features = ["gem"] } +rb-sys = { version = "0.9.30", features = ["gem"] } diff --git a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb index 81b12f99ec0255..f404aa34688c41 100644 --- a/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb +++ b/test/rubygems/test_gem_ext_cargo_builder/rust_ruby_example/build.rb @@ -1,4 +1,4 @@ -if ENV["RUBYOPT"] or defined? Gem +if ENV["RUBYOPT"] || defined? Gem ENV.delete "RUBYOPT" require "rbconfig" diff --git a/test/rubygems/test_gem_installer.rb b/test/rubygems/test_gem_installer.rb index 55f0a074b8776b..0d0746ec84332b 100644 --- a/test/rubygems/test_gem_installer.rb +++ b/test/rubygems/test_gem_installer.rb @@ -473,7 +473,7 @@ def test_generate_bin_script_no_perms end end ensure - FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG or win_platform?) + FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG || win_platform?) end def test_generate_bin_script_no_shebang @@ -577,7 +577,7 @@ def test_generate_bin_symlink_no_perms end end ensure - FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG or win_platform?) + FileUtils.chmod 0755, util_inst_bindir unless ($DEBUG || win_platform?) end def test_generate_bin_symlink_update_newer diff --git a/test/rubygems/test_gem_package_tar_reader.rb b/test/rubygems/test_gem_package_tar_reader.rb index 86ffff4fe52e2e..19860eb7e82090 100644 --- a/test/rubygems/test_gem_package_tar_reader.rb +++ b/test/rubygems/test_gem_package_tar_reader.rb @@ -29,7 +29,7 @@ def test_rewind str = tar_file_header("lib/foo", "", 010644, content.size, Time.now) + - content + "\0" * (512 - content.size) + content + "\0" * (512 - content.size) str << "\0" * 1024 io = TempIO.new(str) diff --git a/test/rubygems/test_gem_platform.rb b/test/rubygems/test_gem_platform.rb index 0fb5bf59a521f8..576f150219cd3a 100644 --- a/test/rubygems/test_gem_platform.rb +++ b/test/rubygems/test_gem_platform.rb @@ -119,8 +119,8 @@ def test_initialize "i586-linux" => ["x86", "linux", nil], "i486-linux" => ["x86", "linux", nil], "i386-linux" => ["x86", "linux", nil], - "i586-linux-gnu" => ["x86", "linux", nil], - "i386-linux-gnu" => ["x86", "linux", nil], + "i586-linux-gnu" => ["x86", "linux", "gnu"], + "i386-linux-gnu" => ["x86", "linux", "gnu"], "i386-mingw32" => ["x86", "mingw32", nil], "x64-mingw-ucrt" => ["x64", "mingw", "ucrt"], "i386-mswin32" => ["x86", "mswin32", nil], @@ -135,7 +135,13 @@ def test_initialize "i386-solaris2.8" => ["x86", "solaris", "2.8"], "mswin32" => ["x86", "mswin32", nil], "x86_64-linux" => ["x86_64", "linux", nil], + "x86_64-linux-gnu" => ["x86_64", "linux", "gnu"], "x86_64-linux-musl" => ["x86_64", "linux", "musl"], + "x86_64-linux-uclibc" => ["x86_64", "linux", "uclibc"], + "arm-linux-eabi" => ["arm", "linux", "eabi"], + "arm-linux-gnueabi" => ["arm", "linux", "gnueabi"], + "arm-linux-musleabi" => ["arm", "linux", "musleabi"], + "arm-linux-uclibceabi" => ["arm", "linux", "uclibceabi"], "x86_64-openbsd3.9" => ["x86_64", "openbsd", "3.9"], "x86_64-openbsd4.0" => ["x86_64", "openbsd", "4.0"], "x86_64-openbsd" => ["x86_64", "openbsd", nil], @@ -144,6 +150,7 @@ def test_initialize test_cases.each do |arch, expected| platform = Gem::Platform.new arch assert_equal expected, platform.to_a, arch.inspect + assert_equal expected, Gem::Platform.new(platform.to_s).to_a, arch.inspect end end @@ -262,6 +269,70 @@ def test_nil_cpu_arch_is_treated_as_universal assert((with_x86_arch === with_nil_arch), "x86 =~ nil") end + def test_nil_version_is_treated_as_any_version + x86_darwin_8 = Gem::Platform.new "i686-darwin8.0" + x86_darwin_nil = Gem::Platform.new "i686-darwin" + + assert((x86_darwin_8 === x86_darwin_nil), "8.0 =~ nil") + assert((x86_darwin_nil === x86_darwin_8), "nil =~ 8.0") + end + + def test_nil_version_is_stricter_for_linux_os + x86_linux = Gem::Platform.new "i686-linux" + x86_linux_gnu = Gem::Platform.new "i686-linux-gnu" + x86_linux_musl = Gem::Platform.new "i686-linux-musl" + x86_linux_uclibc = Gem::Platform.new "i686-linux-uclibc" + + # a naked linux runtime is implicit gnu, as it represents the common glibc-linked runtime + assert(x86_linux === x86_linux_gnu, "linux =~ linux-gnu") + assert(x86_linux_gnu === x86_linux, "linux-gnu =~ linux") + + # musl and explicit gnu should differ + refute(x86_linux_gnu === x86_linux_musl, "linux-gnu =~ linux-musl") + refute(x86_linux_musl === x86_linux_gnu, "linux-musl =~ linux-gnu") + + # explicit libc differ + refute(x86_linux_uclibc === x86_linux_musl, "linux-uclibc =~ linux-musl") + refute(x86_linux_musl === x86_linux_uclibc, "linux-musl =~ linux-uclibc") + + # musl host runtime accepts libc-generic or statically linked gems... + assert(x86_linux === x86_linux_musl, "linux =~ linux-musl") + # ...but implicit gnu runtime generally does not accept musl-specific gems + refute(x86_linux_musl === x86_linux, "linux-musl =~ linux") + + # other libc are not glibc compatible + refute(x86_linux === x86_linux_uclibc, "linux =~ linux-uclibc") + refute(x86_linux_uclibc === x86_linux, "linux-uclibc =~ linux") + end + + def test_eabi_version_is_stricter_for_linux_os + arm_linux_eabi = Gem::Platform.new "arm-linux-eabi" + arm_linux_gnueabi = Gem::Platform.new "arm-linux-gnueabi" + arm_linux_musleabi = Gem::Platform.new "arm-linux-musleabi" + arm_linux_uclibceabi = Gem::Platform.new "arm-linux-uclibceabi" + + # a naked linux runtime is implicit gnu, as it represents the common glibc-linked runtime + assert(arm_linux_eabi === arm_linux_gnueabi, "linux-eabi =~ linux-gnueabi") + assert(arm_linux_gnueabi === arm_linux_eabi, "linux-gnueabi =~ linux-eabi") + + # musl and explicit gnu should differ + refute(arm_linux_gnueabi === arm_linux_musleabi, "linux-gnueabi =~ linux-musleabi") + refute(arm_linux_musleabi === arm_linux_gnueabi, "linux-musleabi =~ linux-gnueabi") + + # explicit libc differ + refute(arm_linux_uclibceabi === arm_linux_musleabi, "linux-uclibceabi =~ linux-musleabi") + refute(arm_linux_musleabi === arm_linux_uclibceabi, "linux-musleabi =~ linux-uclibceabi") + + # musl host runtime accepts libc-generic or statically linked gems... + assert(arm_linux_eabi === arm_linux_musleabi, "linux-eabi =~ linux-musleabi") + # ...but implicit gnu runtime generally does not accept musl-specific gems + refute(arm_linux_musleabi === arm_linux_eabi, "linux-musleabi =~ linux-eabi") + + # other libc are not glibc compatible + refute(arm_linux_eabi === arm_linux_uclibceabi, "linux-eabi =~ linux-uclibceabi") + refute(arm_linux_uclibceabi === arm_linux_eabi, "linux-uclibceabi =~ linux-eabi") + end + def test_equals3_cpu_arm arm = Gem::Platform.new "arm-linux" armv5 = Gem::Platform.new "armv5-linux" diff --git a/test/rubygems/test_gem_resolver.rb b/test/rubygems/test_gem_resolver.rb index 22712447292fc7..c816d5484ba7ba 100644 --- a/test/rubygems/test_gem_resolver.rb +++ b/test/rubygems/test_gem_resolver.rb @@ -322,16 +322,15 @@ def test_picks_highest_version def test_picks_best_platform is = Gem::Resolver::IndexSpecification unknown = Gem::Platform.new "unknown" - a2_p1 = a3_p2 = nil spec_fetcher do |fetcher| fetcher.spec "a", 2 - a2_p1 = fetcher.spec "a", 2 do |s| + fetcher.spec "a", 2 do |s| s.platform = Gem::Platform.local end - a3_p2 = fetcher.spec "a", 3 do |s| + fetcher.spec "a", 3 do |s| s.platform = unknown end end @@ -357,6 +356,41 @@ def test_picks_best_platform assert_resolves_to [a2_p1.spec], res end + def test_does_not_pick_musl_variants_on_non_musl_linux + util_set_arch "aarch64-linux" do + is = Gem::Resolver::IndexSpecification + + linux_musl = Gem::Platform.new("aarch64-linux-musl") + + spec_fetcher do |fetcher| + fetcher.spec "libv8-node", "15.14.0.1" do |s| + s.platform = Gem::Platform.local + end + + fetcher.spec "libv8-node", "15.14.0.1" do |s| + s.platform = linux_musl + end + end + + v15 = v("15.14.0.1") + source = Gem::Source.new @gem_repo + + s = set + + v15_linux = is.new s, "libv8-node", v15, source, Gem::Platform.local.to_s + v15_linux_musl = is.new s, "libv8-node", v15, source, linux_musl.to_s + + s.add v15_linux + s.add v15_linux_musl + + ad = make_dep "libv8-node", "= 15.14.0.1" + + res = Gem::Resolver.new([ad], s) + + assert_resolves_to [v15_linux.spec], res + end + end + def test_only_returns_spec_once a1 = util_spec "a", "1", "c" => "= 1" b1 = util_spec "b", "1", "c" => "= 1" diff --git a/test/rubygems/test_gem_resolver_installer_set.rb b/test/rubygems/test_gem_resolver_installer_set.rb index 32e1faa28d4368..7617919e2ced8c 100644 --- a/test/rubygems/test_gem_resolver_installer_set.rb +++ b/test/rubygems/test_gem_resolver_installer_set.rb @@ -51,8 +51,27 @@ def test_add_always_install_platform assert_equal %w[a-1], set.always_install.map {|s| s.full_name } end + def test_add_always_install_platform_if_gem_platforms_modified_by_platform_flag + freebsd = Gem::Platform.new "x86-freebsd-9" + + spec_fetcher do |fetcher| + fetcher.download "a", 1 + fetcher.download "a", 1 do |s| + s.platform = freebsd + end + end + + # equivalent to --platform=x86-freebsd-9 + Gem.platforms << freebsd + set = Gem::Resolver::InstallerSet.new :both + + set.add_always_install dep("a") + + assert_equal %w[a-1-x86-freebsd-9], set.always_install.map {|s| s.full_name } + end + def test_add_always_install_index_spec_platform - a_1_local, a_1_local_gem = util_gem "a", 1 do |s| + _, a_1_local_gem = util_gem "a", 1 do |s| s.platform = Gem::Platform.local end @@ -200,6 +219,18 @@ def test_find_all_prerelease set.find_all(req).map {|spec| spec.full_name }.sort end + def test_find_all_prerelease_dependencies_with_add_local + activesupport_7_1_0_alpha = util_spec "activesupport", "7.1.0.alpha" + + install_gem activesupport_7_1_0_alpha + + set = Gem::Resolver::InstallerSet.new :both + + req = Gem::Resolver::DependencyRequest.new dep("activesupport", ">= 4.2.0"), nil + + assert_equal %w[activesupport-7.1.0.alpha], set.find_all(req).map {|spec| spec.full_name } + end + def test_load_spec specs = spec_fetcher do |fetcher| fetcher.spec "a", 2 diff --git a/test/rubygems/test_gem_specification.rb b/test/rubygems/test_gem_specification.rb index cf0dba4331fc3c..8ce8293f33a742 100644 --- a/test/rubygems/test_gem_specification.rb +++ b/test/rubygems/test_gem_specification.rb @@ -796,7 +796,7 @@ def test_self_load_relative assert_equal File.join(@tempdir, "a-2.gemspec"), spec.loaded_from end - if RUBY_ENGINE == "ruby" and RUBY_VERSION < "2.7" + if RUBY_ENGINE == "ruby" && RUBY_VERSION < "2.7" def test_self_load_tainted full_path = @a2.spec_file write_file full_path do |io| @@ -1450,7 +1450,7 @@ def test_build_extensions_extensions_dir_unwritable @ext.build_extensions assert_path_not_exist @ext.extension_dir ensure - unless ($DEBUG or win_platform? or Process.uid.zero? or Gem.java_platform?) + unless ($DEBUG || win_platform? || Process.uid.zero? || Gem.java_platform?) FileUtils.chmod 0755, File.join(@ext.base_dir, "extensions") FileUtils.chmod 0755, @ext.base_dir end diff --git a/test/rubygems/test_require.rb b/test/rubygems/test_require.rb index f933bbb5d5c036..6135acea924a03 100644 --- a/test/rubygems/test_require.rb +++ b/test/rubygems/test_require.rb @@ -269,7 +269,7 @@ def test_activate_via_require_respects_loaded_files assert_includes $LOAD_PATH, rubylibdir message = proc { "this test relies on the b-2 gem lib/ to be before stdlib to make sense\n" + - $LOAD_PATH.pretty_inspect + $LOAD_PATH.pretty_inspect } assert_operator $LOAD_PATH.index(b2.load_paths[0]), :<, $LOAD_PATH.index(rubylibdir), message diff --git a/test/rubygems/utilities.rb b/test/rubygems/utilities.rb index 5f8f763cb56bd0..c01f7acd481b45 100644 --- a/test/rubygems/utilities.rb +++ b/test/rubygems/utilities.rb @@ -39,9 +39,9 @@ def initialize end def find_data(path) - return Gem.read_binary path.path if URI === path and "file" == path.scheme + return Gem.read_binary path.path if URI === path && "file" == path.scheme - if URI === path and "URI::#{path.scheme.upcase}" != path.class.name + if URI === path && "URI::#{path.scheme.upcase}" != path.class.name raise ArgumentError, "mismatch for scheme #{path.scheme} and class #{path.class}" end @@ -67,7 +67,7 @@ def fetch_path(path, mtime = nil, head = false) if data.respond_to?(:call) data.call else - if path.to_s.end_with?(".gz") and not data.nil? and not data.empty? + if path.to_s.end_with?(".gz") && !data.nil? && !data.empty? data = Gem::Util.gunzip data end data @@ -76,7 +76,7 @@ def fetch_path(path, mtime = nil, head = false) def cache_update_path(uri, path = nil, update = true) if data = fetch_path(uri) - File.open(path, "wb") {|io| io.write data } if path and update + File.open(path, "wb") {|io| io.write data } if path && update data else Gem.read_binary(path) if path diff --git a/thread.c b/thread.c index dd8937488932cd..907d4f8fdcf8a9 100644 --- a/thread.c +++ b/thread.c @@ -1364,14 +1364,14 @@ sleep_hrtime_until(rb_thread_t *th, rb_hrtime_t end, unsigned int fl) void rb_thread_sleep_forever(void) { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK); } void rb_thread_sleep_deadly(void) { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); sleep_forever(GET_THREAD(), SLEEP_DEADLOCKABLE|SLEEP_SPURIOUS_CHECK); } @@ -1395,7 +1395,7 @@ rb_thread_sleep_deadly_allow_spurious_wakeup(VALUE blocker, VALUE timeout, rb_hr rb_fiber_scheduler_block(scheduler, blocker, timeout); } else { - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); if (end) { sleep_hrtime_until(GET_THREAD(), end, SLEEP_SPURIOUS_CHECK); } @@ -1492,7 +1492,7 @@ blocking_region_begin(rb_thread_t *th, struct rb_blocking_region_buffer *region, th->status = THREAD_STOPPED; rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__); - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); RB_GC_SAVE_MACHINE_CONTEXT(th); thread_sched_to_waiting(TH_SCHED(th)); @@ -1520,7 +1520,7 @@ blocking_region_end(rb_thread_t *th, struct rb_blocking_region_buffer *region) th->status = region->prev_status; } - RUBY_DEBUG_LOG("%s", ""); + RUBY_DEBUG_LOG(""); VM_ASSERT(th == GET_THREAD()); } diff --git a/thread_sync.c b/thread_sync.c index 0359ac2214d808..4ae404ec055bb7 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -624,7 +624,8 @@ rb_mutex_synchronize_m(VALUE self) return rb_mutex_synchronize(self, rb_yield, Qundef); } -void rb_mutex_allow_trap(VALUE self, int val) +void +rb_mutex_allow_trap(VALUE self, int val) { Check_TypedStruct(self, &mutex_data_type); @@ -714,7 +715,8 @@ queue_ptr(VALUE obj) #define QUEUE_CLOSED FL_USER5 static rb_hrtime_t -queue_timeout2hrtime(VALUE timeout) { +queue_timeout2hrtime(VALUE timeout) +{ if (NIL_P(timeout)) { return (rb_hrtime_t)0; } @@ -1227,39 +1229,15 @@ rb_szqueue_max_set(VALUE self, VALUE vmax) return vmax; } -static int -szqueue_push_should_block(int argc, const VALUE *argv) -{ - int should_block = 1; - rb_check_arity(argc, 1, 2); - if (argc > 1) { - should_block = !RTEST(argv[1]); - } - return should_block; -} - -/* - * Document-method: Thread::SizedQueue#push - * call-seq: - * push(object, non_block=false) - * enq(object, non_block=false) - * <<(object) - * - * Pushes +object+ to the queue. - * - * If there is no space left in the queue, waits until space becomes - * available, unless +non_block+ is true. If +non_block+ is true, the - * thread isn't suspended, and +ThreadError+ is raised. - */ - static VALUE -rb_szqueue_push(int argc, VALUE *argv, VALUE self) +rb_szqueue_push(rb_execution_context_t *ec, VALUE self, VALUE object, VALUE non_block, VALUE timeout) { + rb_hrtime_t end = queue_timeout2hrtime(timeout); + bool timed_out = false; struct rb_szqueue *sq = szqueue_ptr(self); - int should_block = szqueue_push_should_block(argc, argv); while (queue_length(self, &sq->q) >= sq->max) { - if (!should_block) { + if (RTEST(non_block)) { rb_raise(rb_eThreadError, "queue full"); } else if (queue_closed_p(self)) { @@ -1279,11 +1257,14 @@ rb_szqueue_push(int argc, VALUE *argv, VALUE self) struct queue_sleep_arg queue_sleep_arg = { .self = self, - .timeout = Qnil, - .end = 0 + .timeout = timeout, + .end = end }; - rb_ensure(queue_sleep, (VALUE)&queue_sleep_arg, szqueue_sleep_done, (VALUE)&queue_waiter); + if (!NIL_P(timeout) && rb_hrtime_now() >= end) { + timed_out = true; + break; + } } } @@ -1291,7 +1272,9 @@ rb_szqueue_push(int argc, VALUE *argv, VALUE self) raise_closed_queue_error(self); } - return queue_do_push(self, &sq->q, argv[0]); + if (timed_out) return Qnil; + + return queue_do_push(self, &sq->q, object); } static VALUE @@ -1609,13 +1592,10 @@ Init_thread_sync(void) rb_define_method(rb_cSizedQueue, "close", rb_szqueue_close, 0); rb_define_method(rb_cSizedQueue, "max", rb_szqueue_max_get, 0); rb_define_method(rb_cSizedQueue, "max=", rb_szqueue_max_set, 1); - rb_define_method(rb_cSizedQueue, "push", rb_szqueue_push, -1); rb_define_method(rb_cSizedQueue, "empty?", rb_szqueue_empty_p, 0); rb_define_method(rb_cSizedQueue, "clear", rb_szqueue_clear, 0); rb_define_method(rb_cSizedQueue, "length", rb_szqueue_length, 0); rb_define_method(rb_cSizedQueue, "num_waiting", rb_szqueue_num_waiting, 0); - rb_define_alias(rb_cSizedQueue, "enq", "push"); - rb_define_alias(rb_cSizedQueue, "<<", "push"); rb_define_alias(rb_cSizedQueue, "size", "length"); /* CVar */ diff --git a/thread_sync.rb b/thread_sync.rb index d567ca51af1047..7e4c341ad0c65e 100644 --- a/thread_sync.rb +++ b/thread_sync.rb @@ -41,5 +41,28 @@ def pop(non_block = false, timeout: nil) end alias_method :deq, :pop alias_method :shift, :pop + + # call-seq: + # push(object, non_block=false, timeout: nil) + # enq(object, non_block=false, timeout: nil) + # <<(object) + # + # Pushes +object+ to the queue. + # + # If there is no space left in the queue, waits until space becomes + # available, unless +non_block+ is true. If +non_block+ is true, the + # thread isn't suspended, and +ThreadError+ is raised. + # + # If +timeout+ seconds have passed and no space is available +nil+ is + # returned. + # Otherwise it returns +self+. + def push(object, non_block = false, timeout: nil) + if non_block && timeout + raise ArgumentError, "can't set a timeout if non_block is enabled" + end + Primitive.rb_szqueue_push(object, non_block, timeout) + end + alias_method :enq, :push + alias_method :<<, :push end end diff --git a/thread_win32.c b/thread_win32.c index cbb01d5d84f513..e9deff23cc1a3e 100644 --- a/thread_win32.c +++ b/thread_win32.c @@ -746,7 +746,7 @@ static unsigned long __stdcall timer_thread_func(void *dummy) { rb_vm_t *vm = GET_VM(); - RUBY_DEBUG_LOG("%s", "start"); + RUBY_DEBUG_LOG("start"); rb_w32_set_thread_description(GetCurrentThread(), L"ruby-timer-thread"); while (WaitForSingleObject(timer_thread.lock, TIME_QUANTUM_USEC/1000) == WAIT_TIMEOUT) { @@ -754,7 +754,7 @@ timer_thread_func(void *dummy) ruby_sigchld_handler(vm); /* probably no-op */ rb_threadptr_check_signal(vm->ractor.main_thread); } - RUBY_DEBUG_LOG("%s", "end"); + RUBY_DEBUG_LOG("end"); return 0; } diff --git a/time.c b/time.c index a7a4c5dc1a393e..2b4323a5363c79 100644 --- a/time.c +++ b/time.c @@ -3348,32 +3348,100 @@ tmcmp(struct tm *a, struct tm *b) /* * call-seq: - * Time.utc(year, month = 1, day = 1, hour = 0, min = 0, sec_i = 0, usec = 0) -> new_time - * Time.utc(sec_i, min, hour, day, month, year, dummy, dummy, dummy, dummy) -> new_time + * Time.utc(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) -> new_time + * Time.utc(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) -> new_time * - * Returns a new \Time object based the on given arguments; - * its timezone is UTC. + * Returns a new \Time object based the on given arguments, + * in the UTC timezone. * - * In the first form (up to seven arguments), argument +year+ is required. + * With one to seven arguments given, + * the arguments are interpreted as in the first calling sequence above: * - * Time.utc(2000) # => 2000-01-01 00:00:00 UTC - * Time.utc(0, 1, 2, 3, 4, 5, 6.5) # => 0000-01-02 03:04:05.0000065 UTC + * Time.utc(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) * - * In the second form, all ten arguments are required, - * though the last four are ignored. - * This form is useful for creating a time from a 10-element array - * such as is returned by #to_a. + * Examples: * - * array = Time.now.to_a - * # => [55, 14, 10, 7, 7, 2022, 4, 188, true, "Central Daylight Time"] - * array[5] = 2000 - * Time.utc(*array) # => 2000-07-07 10:14:55 UTC + * Time.utc(2000) # => 2000-01-01 00:00:00 UTC + * Time.utc(-2000) # => -2000-01-01 00:00:00 UTC * - * Parameters: - * :include: doc/time/year.rdoc - * :include: doc/time/mon-min.rdoc - * :include: doc/time/sec_i.rdoc - * :include: doc/time/usec.rdoc + * There are no minimum and maximum values for the required argument +year+. + * + * For the optional arguments: + * + * - +month+: Month in range (1..12), or case-insensitive + * 3-letter month name: + * + * Time.utc(2000, 1) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 12) # => 2000-12-01 00:00:00 UTC + * Time.utc(2000, 'jan') # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 'JAN') # => 2000-01-01 00:00:00 UTC + * + * - +mday+: Month day in range(1..31): + * + * Time.utc(2000, 1, 1) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 31) # => 2000-01-31 00:00:00 UTC + * + * - +hour+: Hour in range (0..23), or 24 if +min+, +sec+, and +usec+ + * are zero: + * + * Time.utc(2000, 1, 1, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 23) # => 2000-01-01 23:00:00 UTC + * Time.utc(2000, 1, 1, 24) # => 2000-01-02 00:00:00 UTC + * + * - +min+: Minute in range (0..59): + * + * Time.utc(2000, 1, 1, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 59) # => 2000-01-01 00:59:00 UTC + * + * - +sec+: Second in range (0..59), or 60 if +usec+ is zero: + * + * Time.utc(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 0, 59) # => 2000-01-01 00:00:59 UTC + * Time.utc(2000, 1, 1, 0, 0, 60) # => 2000-01-01 00:01:00 UTC + * + * - +usec+: Microsecond in range (0..999999): + * + * Time.utc(2000, 1, 1, 0, 0, 0, 0) # => 2000-01-01 00:00:00 UTC + * Time.utc(2000, 1, 1, 0, 0, 0, 999999) # => 2000-01-01 00:00:00.999999 UTC + * + * The values may be: + * + * - Integers, as above. + * - Numerics convertible to integers: + * + * Time.utc(Float(0.0), Rational(1, 1), 1.0, 0.0, 0.0, 0.0, 0.0) + * # => 0000-01-01 00:00:00 UTC + * + * - \String integers: + * + * a = %w[0 1 1 0 0 0 0 0] + * # => ["0", "1", "1", "0", "0", "0", "0", "0"] + * Time.utc(*a) # => 0000-01-01 00:00:00 UTC + * + * When exactly ten arguments are given, + * the arguments are interpreted as in the second calling sequence above: + * + * Time.utc(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) + * + * where the +dummy+ arguments are ignored: + * + * a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * # => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * Time.utc(*a) # => 0005-04-03 02:01:00 UTC + * + * This form is useful for creating a \Time object from a 10-element + * array returned by Time.to_a: + * + * t = Time.new(2000, 1, 2, 3, 4, 5, 6) # => 2000-01-02 03:04:05 +000006 + * a = t.to_a # => [5, 4, 3, 2, 1, 2000, 0, 2, false, nil] + * Time.utc(*a) # => 2000-01-02 03:04:05 UTC + * + * The two forms have their first six arguments in common, + * though in different orders; + * the ranges of these common arguments are the same for both forms; see above. + * + * Raises an exception if the number of arguments is eight, nine, + * or greater than ten. * * Time.gm is an alias for Time.utc. * @@ -3391,36 +3459,19 @@ time_s_mkutc(int argc, VALUE *argv, VALUE klass) /* * call-seq: - * Time.local(year, month = 1, day = 1, hour = 0, min = 0, sec_i = 0, usec = 0) -> new_time - * Time.local(sec, min, hour, day, month, year, dummy, dummy, dummy, dummy) -> new_time - * - * Returns a new \Time object based the on given arguments; - * its timezone is the local timezone. - * - * In the first form (up to seven arguments), argument +year+ is required. - * - * Time.local(2000) # => 2000-01-01 00:00:00 -0600 - * Time.local(0, 1, 2, 3, 4, 5, 6.5) # => 0000-01-02 03:04:05.0000065 -0600 + * Time.local(year, month = 1, mday = 1, hour = 0, min = 0, sec = 0, usec = 0) -> new_time + * Time.local(sec, min, hour, mday, month, year, dummy, dummy, dummy, dummy) -> new_time * - * In the second form, all ten arguments are required, - * though the last four are ignored. - * This form is useful for creating a time from a 10-element array - * such as those returned by #to_a. + * Like Time.utc, except that the returned \Time object + * has the local timezone, not the UTC timezone: * - * array = Time.now.to_a - * # => [57, 18, 10, 7, 7, 2022, 4, 188, true, "Central Daylight Time"] - * array[5] = 2000 - * Time.local(*array) # => 2000-07-07 10:18:57 -0500 + * # With seven arguments. + * Time.local(0, 1, 2, 3, 4, 5, 6) + * # => 0000-01-02 03:04:05.000006 -0600 + * # With exactly ten arguments. + * Time.local(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + * # => 0005-04-03 02:01:00 -0600 * - * Parameters: - * :include: doc/time/year.rdoc - * :include: doc/time/mon-min.rdoc - * :include: doc/time/sec_i.rdoc - * :include: doc/time/usec.rdoc - * - * Time.mktime is an alias for Time.local. - * - * Related: Time.utc. */ static VALUE @@ -3436,18 +3487,18 @@ time_s_mktime(int argc, VALUE *argv, VALUE klass) * call-seq: * to_i -> integer * - * Returns the number of seconds since the Epoch - * for the time represented in +self+: - * - * Time.utc(1970, 1, 1).to_i # => 0 - * t = Time.now.to_i # => 1595263289 + * Returns the value of +self+ as integer + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; + * subseconds are truncated (not rounded): * - * Subseconds are omitted: - * - * t = Time.now # => 2022-07-12 09:13:48.5075976 -0500 - * t.to_i # => 1657635228 + * Time.utc(1970, 1, 1, 0, 0, 0).to_i # => 0 + * Time.utc(1970, 1, 1, 0, 0, 0, 999999).to_i # => 0 + * Time.utc(1950, 1, 1, 0, 0, 0).to_i # => -631152000 + * Time.utc(1990, 1, 1, 0, 0, 0).to_i # => 631152000 * * Time#tv_sec is an alias for Time#to_i. + * + * Related: Time#to_f Time#to_r. */ static VALUE @@ -3463,16 +3514,20 @@ time_to_i(VALUE time) * call-seq: * to_f -> float * - * Returns the value of +self+ as a Float number of seconds - * since the Epoch. + * Returns the value of +self+ as a Float number + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; + * subseconds are included. + * * The stored value of +self+ is a * {Rational}[rdoc-ref:Rational@#method-i-to_f], * which means that the returned value may be approximate: * - * t = Time.now # => 2022-07-07 11:23:18.0784889 -0500 - * t.to_f # => 1657210998.0784888 - * t.to_i # => 1657210998 + * Time.utc(1970, 1, 1, 0, 0, 0).to_f # => 0.0 + * Time.utc(1970, 1, 1, 0, 0, 0, 999999).to_f # => 0.999999 + * Time.utc(1950, 1, 1, 0, 0, 0).to_f # => -631152000.0 + * Time.utc(1990, 1, 1, 0, 0, 0).to_f # => 631152000.0 * + * Related: Time#to_i, Time#to_r. */ static VALUE @@ -3486,13 +3541,14 @@ time_to_f(VALUE time) /* * call-seq: - * time.to_r -> rational + * to_r -> rational * - * Returns the value of +self+ as a Rational number of seconds - * since the Epoch, which is exact: + * Returns the value of +self+ as a Rational exact number of + * {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds]; * * Time.now.to_r # => (16571402750320203/10000000) * + * Related: Time#to_f, Time#to_i. */ static VALUE @@ -3540,7 +3596,7 @@ time_usec(VALUE time) /* * call-seq: - * time.nsec -> integer + * nsec -> integer * * Returns the number of nanoseconds in the subseconds part of +self+ * in the range (0..999_999_999); @@ -3659,20 +3715,20 @@ time_eql(VALUE time1, VALUE time2) /* * call-seq: - * time.utc? -> true or false - * time.gmt? -> true or false + * utc? -> true or false * - * Returns +true+ if _time_ represents a time in UTC (GMT). + * Returns +true+ if +self+ represents a time in UTC (GMT): * - * t = Time.now #=> 2007-11-19 08:15:23 -0600 - * t.utc? #=> false - * t = Time.gm(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true + * now = Time.now + * # => 2022-08-18 10:24:13.5398485 -0500 + * now.utc? # => false + * utc = Time.utc(2000, 1, 1, 20, 15, 1) + * # => 2000-01-01 20:15:01 UTC + * utc.utc? # => true * - * t = Time.now #=> 2007-11-19 08:16:03 -0600 - * t.gmt? #=> false - * t = Time.gm(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.gmt? #=> true + * Time#gmt? is an alias for Time#utc?. + * + * Related: Time.utc. */ static VALUE @@ -3686,11 +3742,11 @@ time_utc_p(VALUE time) /* * call-seq: - * time.hash -> integer + * hash -> integer * - * Returns a hash code for this Time object. + * Returns the integer hash code for +self+. * - * See also Object#hash. + * Related: Object#hash. */ static VALUE @@ -3777,25 +3833,27 @@ time_zonelocal(VALUE time, VALUE off) /* * call-seq: - * time.localtime -> time - * time.localtime(utc_offset) -> time + * localtime -> self or new_time + * localtime(zone) -> new_time + * + * With no argument given: * - * Converts _time_ to local time (using the local time zone in - * effect at the creation time of _time_) modifying the receiver. + * - Returns +self+ if +self+ is a local time. + * - Otherwise returns a new \Time in the user's local timezone: * - * If +utc_offset+ is given, it is used instead of the local time. + * t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + * t.localtime # => 2000-01-01 14:15:01 -0600 * - * t = Time.utc(2000, "jan", 1, 20, 15, 1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true + * With argument +zone+ given, + * returns the new \Time object created by converting + * +self+ to the given time zone: * - * t.localtime #=> 2000-01-01 14:15:01 -0600 - * t.utc? #=> false + * t = Time.utc(2000, 1, 1, 20, 15, 1) # => 2000-01-01 20:15:01 UTC + * t.localtime("-09:00") # => 2000-01-01 11:15:01 -0900 * - * t.localtime("+09:00") #=> 2000-01-02 05:15:01 +0900 - * t.utc? #=> false + * For forms of argument +zone+, see + * {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. * - * If +utc_offset+ is not given and _time_ is local time, just returns - * the receiver. */ static VALUE @@ -3812,20 +3870,18 @@ time_localtime_m(int argc, VALUE *argv, VALUE time) /* * call-seq: - * time.gmtime -> time - * time.utc -> time + * utc -> self + * + * Returns +self+, converted to the UTC timezone: * - * Converts _time_ to UTC (GMT), modifying the receiver. + * t = Time.new(2000) # => 2000-01-01 00:00:00 -0600 + * t.utc? # => false + * t.utc # => 2000-01-01 06:00:00 UTC + * t.utc? # => true * - * t = Time.now #=> 2007-11-19 08:18:31 -0600 - * t.gmt? #=> false - * t.gmtime #=> 2007-11-19 14:18:31 UTC - * t.gmt? #=> true + * Time#gmtime is an alias for Time#utc. * - * t = Time.now #=> 2007-11-19 08:18:51 -0600 - * t.utc? #=> false - * t.utc #=> 2007-11-19 14:18:51 UTC - * t.utc? #=> true + * Related: Time#getutc (returns a new converted \Time object). */ static VALUE @@ -3887,31 +3943,19 @@ time_fixoff(VALUE time) /* * call-seq: - * time.getlocal -> new_time - * time.getlocal(utc_offset) -> new_time - * time.getlocal(timezone) -> new_time + * getlocal(zone = nil) -> new_time * - * Returns a new Time object representing _time_ in - * local time (using the local time zone in effect for this process). + * Returns a new \Time object representing the value of +self+ + * converted to a given timezone; + * if +zone+ is +nil+, the local timezone is used: * - * If +utc_offset+ is given, it is used instead of the local time. - * +utc_offset+ can be given as a human-readable string (eg. "+09:00") - * or as a number of seconds (eg. 32400). + * t = Time.utc(2000) # => 2000-01-01 00:00:00 UTC + * t.getlocal # => 1999-12-31 18:00:00 -0600 + * t.getlocal('+12:00') # => 2000-01-01 12:00:00 +1200 * - * t = Time.utc(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC - * t.utc? #=> true + * For forms of argument +zone+, see + * {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. * - * l = t.getlocal #=> 2000-01-01 14:15:01 -0600 - * l.utc? #=> false - * t == l #=> true - * - * j = t.getlocal("+09:00") #=> 2000-01-02 05:15:01 +0900 - * j.utc? #=> false - * t == j #=> true - * - * k = t.getlocal(9*60*60) #=> 2000-01-02 05:15:01 +0900 - * k.utc? #=> false - * t == k #=> true */ static VALUE @@ -3948,16 +3992,18 @@ time_getlocaltime(int argc, VALUE *argv, VALUE time) /* * call-seq: - * time.getgm -> new_time - * time.getutc -> new_time + * getutc -> new_time * - * Returns a new Time object representing _time_ in UTC. + * Returns a new \Time object representing the value of +self+ + * converted to the UTC timezone: * - * t = Time.local(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 -0600 - * t.gmt? #=> false - * y = t.getgm #=> 2000-01-02 02:15:01 UTC - * y.gmt? #=> true - * t == y #=> true + * local = Time.local(2000) # => 2000-01-01 00:00:00 -0600 + * local.utc? # => false + * utc = local.getutc # => 2000-01-01 06:00:00 UTC + * utc.utc? # => true + * utc == local # => true + * + * Time#getgm is an alias for Time#getutc. */ static VALUE @@ -3979,13 +4025,25 @@ static VALUE strftime_cstr(const char *fmt, size_t len, VALUE time, rb_encoding /* * call-seq: - * time.asctime -> string - * time.ctime -> string + * ctime -> string + * + * Returns a string representation of +self+, + * formatted by strftime('%a %b %e %T %Y') + * or its shorthand version strftime('%c'); + * see {Formats for Dates and Times}[https://docs.ruby-lang.org/en/master/strftime_formatting_rdoc.html]: + * + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.strftime('%a %b %e %T %Y') # => "Sun Dec 31 23:59:59 2000" + * t.strftime('%c') # => "Sun Dec 31 23:59:59 2000" * - * Returns a canonical string representation of _time_. + * Time#asctime is an alias for Time#ctime. + * + * Related: Time#to_s, Time#inspect: + * + * t.inspect # => "2000-12-31 23:59:59.5 +000001" + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * Time.now.asctime #=> "Wed Apr 9 08:56:03 2003" - * Time.now.ctime #=> "Wed Apr 9 08:56:03 2003" */ static VALUE @@ -3996,17 +4054,18 @@ time_asctime(VALUE time) /* * call-seq: - * time.to_s -> string + * to_s -> string + * + * Returns a string representation of +self+, without subseconds: * - * Returns a string representing _time_. Equivalent to calling - * #strftime with the appropriate format string. + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * t = Time.now - * t.to_s #=> "2012-11-10 18:16:12 +0100" - * t.strftime "%Y-%m-%d %H:%M:%S %z" #=> "2012-11-10 18:16:12 +0100" + * Related: Time#ctime, Time#inspect: + * + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.inspect # => "2000-12-31 23:59:59.5 +000001" * - * t.utc.to_s #=> "2012-11-10 17:16:12 UTC" - * t.strftime "%Y-%m-%d %H:%M:%S UTC" #=> "2012-11-10 17:16:12 UTC" */ static VALUE @@ -4023,17 +4082,18 @@ time_to_s(VALUE time) /* * call-seq: - * time.inspect -> string + * inspect -> string + * + * Returns a string representation of +self+ with subseconds: + * + * t = Time.new(2000, 12, 31, 23, 59, 59, 0.5) + * t.inspect # => "2000-12-31 23:59:59.5 +000001" * - * Returns a detailed string representing _time_. Unlike to_s, - * preserves subsecond in the representation for easier debugging. + * Related: Time#ctime, Time#to_s: * - * t = Time.now - * t.inspect #=> "2012-11-10 18:16:12.261257655 +0100" - * t.strftime "%Y-%m-%d %H:%M:%S.%N %z" #=> "2012-11-10 18:16:12.261257655 +0100" + * t.ctime # => "Sun Dec 31 23:59:59 2000" + * t.to_s # => "2000-12-31 23:59:59 +0000" * - * t.utc.inspect #=> "2012-11-10 17:16:12.261257655 UTC" - * t.strftime "%Y-%m-%d %H:%M:%S.%N UTC" #=> "2012-11-10 17:16:12.261257655 UTC" */ static VALUE @@ -5628,13 +5688,14 @@ Init_Time(void) rb_gc_register_mark_object(str_empty); rb_cTime = rb_define_class("Time", rb_cObject); + VALUE scTime = rb_singleton_class(rb_cTime); rb_include_module(rb_cTime, rb_mComparable); rb_define_alloc_func(rb_cTime, time_s_alloc); rb_define_singleton_method(rb_cTime, "utc", time_s_mkutc, -1); rb_define_singleton_method(rb_cTime, "local", time_s_mktime, -1); - rb_define_alias(rb_singleton_class(rb_cTime), "gm", "utc"); - rb_define_alias(rb_singleton_class(rb_cTime), "mktime", "local"); + rb_define_alias(scTime, "gm", "utc"); + rb_define_alias(scTime, "mktime", "local"); rb_define_method(rb_cTime, "to_i", time_to_i, 0); rb_define_method(rb_cTime, "to_f", time_to_f, 0); @@ -5703,7 +5764,7 @@ Init_Time(void) /* methods for marshaling */ rb_define_private_method(rb_cTime, "_dump", time_dump, -1); - rb_define_private_method(rb_singleton_class(rb_cTime), "_load", time_load, 1); + rb_define_private_method(scTime, "_load", time_load, 1); #if 0 /* Time will support marshal_dump and marshal_load in the future (1.9 maybe) */ rb_define_private_method(rb_cTime, "marshal_dump", time_mdump, 0); diff --git a/timev.rb b/timev.rb index a7e70b290f4afb..a2a8bb3cb1577d 100644 --- a/timev.rb +++ b/timev.rb @@ -1,19 +1,49 @@ -# Time is an abstraction of dates and times. Time is stored internally as -# the number of seconds with subsecond since the _Epoch_, -# 1970-01-01 00:00:00 UTC. +# A \Time object represents a date and time: # -# The Time class treats GMT -# (Greenwich Mean Time) and UTC (Coordinated Universal Time) as equivalent. -# GMT is the older way of referring to these baseline times but persists in -# the names of calls on POSIX systems. +# Time.new(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 -0600 # -# Note: A \Time object uses the resolution available on your system clock. +# Although its value can be expressed as a single numeric +# (see {Epoch Seconds}[rdoc-ref:Time@Epoch+Seconds] below), +# it can be convenient to deal with the value by parts: # -# All times may have subsecond. Be aware of this fact when comparing times -# with each other -- times that are apparently equal when displayed may be -# different when compared. -# (Since Ruby 2.7.0, Time#inspect shows subsecond but -# Time#to_s still doesn't show subsecond.) +# t = Time.new(-2000, 1, 1, 0, 0, 0.0) +# # => -2000-01-01 00:00:00 -0600 +# t.year # => -2000 +# t.month # => 1 +# t.mday # => 1 +# t.hour # => 0 +# t.min # => 0 +# t.sec # => 0 +# t.subsec # => 0 +# +# t = Time.new(2000, 12, 31, 23, 59, 59.5) +# # => 2000-12-31 23:59:59.5 -0600 +# t.year # => 2000 +# t.month # => 12 +# t.mday # => 31 +# t.hour # => 23 +# t.min # => 59 +# t.sec # => 59 +# t.subsec # => (1/2) +# +# == Epoch Seconds +# +# Epoch seconds is the exact number of seconds +# (including fractional subseconds) since the Unix Epoch, January 1, 1970. +# +# You can retrieve that value exactly using method Time.to_r: +# +# Time.at(0).to_r # => (0/1) +# Time.at(0.999999).to_r # => (9007190247541737/9007199254740992) +# +# Other retrieval methods such as Time#to_i and Time#to_f +# may return a value that rounds or truncates subseconds. +# +# == \Time Resolution +# +# A \Time object derived from the system clock +# (for example, by method Time.now) +# has the resolution supported by the system. # # == Examples # @@ -178,38 +208,8 @@ # - #ceil: Returns a new time with subseconds raised to a ceiling. # - #floor: Returns a new time with subseconds lowered to a floor. # -# == Timezone Argument -# -# A timezone argument must have +local_to_utc+ and +utc_to_local+ -# methods, and may have +name+, +abbr+, and +dst?+ methods. -# -# The +local_to_utc+ method should convert a Time-like object from -# the timezone to UTC, and +utc_to_local+ is the opposite. The -# result also should be a Time or Time-like object (not necessary to -# be the same class). The #zone of the result is just ignored. -# Time-like argument to these methods is similar to a Time object in -# UTC without subsecond; it has attribute readers for the parts, -# e.g. #year, #month, and so on, and epoch time readers, #to_i. The -# subsecond attributes are fixed as 0, and #utc_offset, #zone, -# #isdst, and their aliases are same as a Time object in UTC. -# Also #to_time, #+, and #- methods are defined. -# -# The +name+ method is used for marshaling. If this method is not -# defined on a timezone object, Time objects using that timezone -# object can not be dumped by Marshal. -# -# The +abbr+ method is used by '%Z' in #strftime. -# -# The +dst?+ method is called with a +Time+ value and should return whether -# the +Time+ value is in daylight savings time in the zone. -# -# === Auto Conversion to Timezone -# -# At loading marshaled data, a timezone name will be converted to a timezone -# object by +find_timezone+ class method, if the method is defined. -# -# Similarly, that class method will be called when a timezone argument does -# not have the necessary methods mentioned above. +# For the forms of argument +zone+, see +# {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. class Time # Creates a new \Time object from the current system time. # This is the same as Time.new without arguments. @@ -217,54 +217,66 @@ class Time # Time.now # => 2009-06-24 12:39:54 +0900 # Time.now(in: '+04:00') # => 2009-06-24 07:39:54 +0400 # - # Parameter: - # :include: doc/time/in.rdoc + # For forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. def self.now(in: nil) Primitive.time_s_now(Primitive.arg!(:in)) end - # _Time_ + # Returns a new \Time object based on the given arguments. + # + # Required argument +time+ may be either of: + # + # - A \Time object, whose value is the basis for the returned time; + # also influenced by optional keyword argument +in:+ (see below). + # - A numeric number of + # {Epoch seconds}[rdoc-ref:Time@Epoch+Seconds] + # for the returned time. + # + # Examples: + # + # t = Time.new(2000, 12, 31, 23, 59, 59) # => 2000-12-31 23:59:59 -0600 + # secs = t.to_i # => 978328799 + # Time.at(secs) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs + 0.5) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(1000000000) # => 2001-09-08 20:46:40 -0500 + # Time.at(0) # => 1969-12-31 18:00:00 -0600 + # Time.at(-1000000000) # => 1938-04-24 17:13:20 -0500 # - # This form accepts a \Time object +time+ - # and optional keyword argument +in+: + # Optional numeric argument +subsec+ and optional symbol argument +units+ + # work together to specify subseconds for the returned time; + # argument +units+ specifies the units for +subsec+: # - # Time.at(Time.new) # => 2021-04-26 08:52:31.6023486 -0500 - # Time.at(Time.new, in: '+09:00') # => 2021-04-26 22:52:31.6023486 +0900 + # - +:millisecond+: +subsec+ in milliseconds: # - # _Seconds_ + # Time.at(secs, 0, :millisecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500, :millisecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000, :millisecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000, :millisecond) # => 2000-12-31 23:59:58 -0600 # - # This form accepts a numeric number of seconds +sec+ - # and optional keyword argument +in+: + # - +:microsecond+ or +:usec+: +subsec+ in microseconds: # - # Time.at(946702800) # => 1999-12-31 23:00:00 -0600 - # Time.at(946702800, in: '+09:00') # => 2000-01-01 14:00:00 +0900 + # Time.at(secs, 0, :microsecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500000, :microsecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000000, :microsecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000000, :microsecond) # => 2000-12-31 23:59:58 -0600 # - # Seconds with Subseconds and Units + # - +:nanosecond+ or +:nsec+: +subsec+ in nanoseconds: # - # This form accepts an integer number of seconds +sec_i+, - # a numeric number of milliseconds +msec+, - # a symbol argument for the subsecond unit type (defaulting to :usec), - # and an optional keyword argument +in+: + # Time.at(secs, 0, :nanosecond) # => 2000-12-31 23:59:59 -0600 + # Time.at(secs, 500000000, :nanosecond) # => 2000-12-31 23:59:59.5 -0600 + # Time.at(secs, 1000000000, :nanosecond) # => 2001-01-01 00:00:00 -0600 + # Time.at(secs, -1000000000, :nanosecond) # => 2000-12-31 23:59:58 -0600 # - # Time.at(946702800, 500, :millisecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500, :millisecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, :usec) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, :microsecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000, :usec, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000, :microsecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000000, :nsec) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000000, :nanosecond) # => 1999-12-31 23:00:00.5 -0600 - # Time.at(946702800, 500000000, :nsec, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 - # Time.at(946702800, 500000000, :nanosecond, in: '+09:00') # => 2000-01-01 14:00:00.5 +0900 # - # Parameters: - # :include: doc/time/sec_i.rdoc - # :include: doc/time/msec.rdoc - # :include: doc/time/usec.rdoc - # :include: doc/time/nsec.rdoc - # :include: doc/time/in.rdoc + # Optional keyword argument +in: zone specifies the timezone + # for the returned time: + # + # Time.at(secs, in: '+12:00') # => 2001-01-01 17:59:59 +1200 + # Time.at(secs, in: '-12:00') # => 2000-12-31 17:59:59 -1200 + # + # For the forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]. # def self.at(time, subsec = false, unit = :microsecond, in: nil) if Primitive.mandatory_only? @@ -274,24 +286,81 @@ def self.at(time, subsec = false, unit = :microsecond, in: nil) end end - # Returns a new \Time object based on the given arguments. + # Returns a new \Time object based on the given arguments, + # by default in the local timezone. # # With no positional arguments, returns the value of Time.now: # - # Time.new # => 2021-04-24 17:27:46.0512465 -0500 + # Time.new # => 2021-04-24 17:27:46.0512465 -0500 + # + # With one to six arguments, returns a new \Time object + # based on the given arguments, in the local timezone. + # + # Time.new(2000, 1, 2, 3, 4, 5) # => 2000-01-02 03:04:05 -0600 + # + # For the positional arguments (other than +zone+): + # + # - +year+: Year, with no range limits: + # + # Time.new(999999999) # => 999999999-01-01 00:00:00 -0600 + # Time.new(-999999999) # => -999999999-01-01 00:00:00 -0600 + # + # - +month+: Month in range (1..12), or case-insensitive + # 3-letter month name: + # + # Time.new(2000, 1) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 12) # => 2000-12-01 00:00:00 -0600 + # Time.new(2000, 'jan') # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 'JAN') # => 2000-01-01 00:00:00 -0600 + # + # - +mday+: Month day in range(1..31): + # + # Time.new(2000, 1, 1) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 31) # => 2000-01-31 00:00:00 -0600 + # + # - +hour+: Hour in range (0..23), or 24 if +min+, +sec+, and +usec+ + # are zero: + # + # Time.new(2000, 1, 1, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 23) # => 2000-01-01 23:00:00 -0600 + # Time.new(2000, 1, 1, 24) # => 2000-01-02 00:00:00 -0600 + # + # - +min+: Minute in range (0..59): + # + # Time.new(2000, 1, 1, 0, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 0, 59) # => 2000-01-01 00:59:00 -0600 + # + # - +sec+: Second in range (0..59), or 60 if +usec+ is zero: + # + # Time.new(2000, 1, 1, 0, 0, 0) # => 2000-01-01 00:00:00 -0600 + # Time.new(2000, 1, 1, 0, 0, 59) # => 2000-01-01 00:00:59 -0600 + # Time.new(2000, 1, 1, 0, 0, 60) # => 2000-01-01 00:01:00 -0600 + # + # These values may be: + # + # - Integers, as above. + # - Numerics convertible to integers: + # + # Time.new(Float(0.0), Rational(1, 1), 1.0, 0.0, 0.0, 0.0) + # # => 0000-01-01 00:00:00 -0600 # - # Otherwise, returns a new \Time object based on the given parameters: + # - \String integers: # - # Time.new(2000) # => 2000-01-01 00:00:00 -0600 - # Time.new(2000, 12, 31, 23, 59, 59.5) # => 2000-12-31 23:59:59.5 -0600 - # Time.new(2000, 12, 31, 23, 59, 59.5, '+09:00') # => 2000-12-31 23:59:59.5 +0900 + # a = %w[0 1 1 0 0 0] + # # => ["0", "1", "1", "0", "0", "0"] + # Time.new(*a) # => 0000-01-01 00:00:00 -0600 # - # Parameters: + # When positional argument +zone+ or keyword argument +in:+ is given, + # the new \Time object is in the specified timezone. + # For the forms of argument +zone+, see + # {Timezone Specifiers}[rdoc-ref:timezones.rdoc]: # - # :include: doc/time/year.rdoc - # :include: doc/time/mon-min.rdoc - # :include: doc/time/sec.rdoc - # :include: doc/time/zone_and_in.rdoc + # Time.new(2000, 1, 1, 0, 0, 0, '+12:00') + # # => 2000-01-01 00:00:00 +1200 + # Time.new(2000, 1, 1, 0, 0, 0, in: '-12:00') + # # => 2000-01-01 00:00:00 -1200 + # Time.new(in: '-12:00') + # # => 2022-08-23 08:49:26.1941467 -1200 # def initialize(year = (now = true), mon = nil, mday = nil, hour = nil, min = nil, sec = nil, zone = nil, in: nil) if zone diff --git a/tool/gem-unpack.rb b/tool/gem-unpack.rb deleted file mode 100644 index c50d47f7978213..00000000000000 --- a/tool/gem-unpack.rb +++ /dev/null @@ -1,34 +0,0 @@ -require 'fileutils' -require 'rubygems' -require 'rubygems/package' - -# This library is used by "make extract-gems" to -# unpack bundled gem files. - -def Gem.unpack(file, dir = ".") - pkg = Gem::Package.new(file) - spec = pkg.spec - target = spec.full_name - Gem.ensure_gem_subdirectories(dir) - gem_dir = File.join(dir, "gems", target) - pkg.extract_files gem_dir - spec_dir = spec.extensions.empty? ? "specifications" : File.join("gems", target) - File.binwrite(File.join(dir, spec_dir, "#{target}.gemspec"), spec.to_ruby) - unless spec.extensions.empty? - spec.dependencies.clear - File.binwrite(File.join(dir, spec_dir, ".bundled.#{target}.gemspec"), spec.to_ruby) - end - if spec.bindir and spec.executables - bindir = File.join(dir, "bin") - Dir.mkdir(bindir) rescue nil - spec.executables.each do |exe| - File.open(File.join(bindir, exe), "wb", 0o777) {|f| - f.print "#!ruby\n", - %[load File.realpath("../gems/#{target}/#{spec.bindir}/#{exe}", __dir__)\n] - } - end - end - FileUtils.rm_rf(Dir.glob("#{gem_dir}/.git*")) - - puts "Unpacked #{file}" -end diff --git a/tool/lib/bundled_gem.rb b/tool/lib/bundled_gem.rb new file mode 100644 index 00000000000000..38c331183d8c69 --- /dev/null +++ b/tool/lib/bundled_gem.rb @@ -0,0 +1,55 @@ +require 'fileutils' +require 'rubygems' +require 'rubygems/package' + +# This library is used by "make extract-gems" to +# unpack bundled gem files. + +module BundledGem + module_function + + def unpack(file, *rest) + pkg = Gem::Package.new(file) + prepare_test(pkg.spec, *rest) {|dir| pkg.extract_files(dir)} + puts "Unpacked #{file}" + end + + def copy(path, *rest) + path, n = File.split(path) + spec = Dir.chdir(path) {Gem::Specification.load(n)} or raise "Cannot load #{path}" + prepare_test(spec, *rest) do |dir| + FileUtils.rm_rf(dir) + files = spec.files.reject {|f| f.start_with?(".git")} + dirs = files.map {|f| File.dirname(f) if f.include?("/")}.uniq + FileUtils.mkdir_p(dirs.map {|d| d ? "#{dir}/#{d}" : dir}.sort_by {|d| d.count("/")}) + files.each do |f| + File.copy_stream(File.join(path, f), File.join(dir, f)) + end + end + puts "Copied #{path}" + end + + def prepare_test(spec, dir = ".") + target = spec.full_name + Gem.ensure_gem_subdirectories(dir) + gem_dir = File.join(dir, "gems", target) + yield gem_dir + spec_dir = spec.extensions.empty? ? "specifications" : File.join("gems", target) + File.binwrite(File.join(dir, spec_dir, "#{target}.gemspec"), spec.to_ruby) + unless spec.extensions.empty? + spec.dependencies.clear + File.binwrite(File.join(dir, spec_dir, ".bundled.#{target}.gemspec"), spec.to_ruby) + end + if spec.bindir and spec.executables + bindir = File.join(dir, "bin") + Dir.mkdir(bindir) rescue nil + spec.executables.each do |exe| + File.open(File.join(bindir, exe), "wb", 0o777) {|f| + f.print "#!ruby\n", + %[load File.realpath("../gems/#{target}/#{spec.bindir}/#{exe}", __dir__)\n] + } + end + end + FileUtils.rm_rf(Dir.glob("#{gem_dir}/.git*")) + end +end diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index 67373139caaaa9..7cd598b1abd6f2 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -268,7 +268,7 @@ def assert_separately(args, file = nil, line = nil, src, ignore_stderr: nil, **o src = <cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup fprintf(f, " val = vm_exec(ec, false);\n"); fprintf(f, " }\n"); diff --git a/tool/ruby_vm/views/mjit_compile.inc.erb b/tool/ruby_vm/views/mjit_compile.inc.erb index 5820f81770265f..0e66f78007db8f 100644 --- a/tool/ruby_vm/views/mjit_compile.inc.erb +++ b/tool/ruby_vm/views/mjit_compile.inc.erb @@ -11,7 +11,7 @@ % # This is an ERB template that generates Ruby code that generates C code that % # generates JIT-ed C code. <%= render 'notice', locals: { - this_file: 'is the main part of compile_insn() in mjit_compile.c', + this_file: 'is the main part of compile_insn() in mjit_compiler.c', edit: __FILE__, } -%> % diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index e3b940e2d5a20c..5415e0f2b4346d 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -73,6 +73,7 @@ pathname: "ruby/pathname", digest: "ruby/digest", error_highlight: "ruby/error_highlight", + syntax_suggest: "ruby/syntax_suggest", un: "ruby/un", win32ole: "ruby/win32ole", } @@ -228,6 +229,7 @@ def sync_default_gems(gem) `git checkout ext/etc/depend` when "date" rm_rf(%w[ext/date test/date]) + cp_r("#{upstream}/doc/date", "doc") cp_r("#{upstream}/ext/date", "ext") cp_r("#{upstream}/lib", "ext/date") cp_r("#{upstream}/test/date", "test") @@ -300,6 +302,7 @@ def sync_default_gems(gem) cp_r(Dir.glob("#{upstream}/lib/did_you_mean*"), "lib") cp_r("#{upstream}/did_you_mean.gemspec", "lib/did_you_mean") cp_r("#{upstream}/test", "test/did_you_mean") + rm_rf("test/did_you_mean/lib") rm_rf(%w[test/did_you_mean/tree_spell/test_explore.rb]) when "erb" rm_rf(%w[lib/erb* test/erb libexec/erb]) @@ -371,6 +374,11 @@ def sync_default_gems(gem) when "open3" sync_lib gem, upstream rm_rf("lib/open3/jruby_windows.rb") + when "syntax_suggest" + sync_lib gem, upstream + rm_rf(%w[spec/syntax_suggest libexec/syntax_suggest]) + cp_r("#{upstream}/spec", "spec/syntax_suggest") + cp_r("#{upstream}/exe/syntax_suggest", "libexec/syntax_suggest") else sync_lib gem, upstream end @@ -540,6 +548,12 @@ def sync_lib(repo, upstream = nil) def update_default_gems(gem, release: false) author, repository = REPOSITORIES[gem.to_sym].split('/') + default_branch = case gem + when 'syntax_suggest' + "main" + else + "master" + end puts "Update #{author}/#{repository}" @@ -565,8 +579,8 @@ def update_default_gems(gem, release: false) last_release = `git tag`.chomp.split.delete_if{|v| v =~ /pre|beta/ }.last `git checkout #{last_release}` else - `git checkout master` - `git rebase origin/master` + `git checkout #{default_branch}` + `git rebase origin/#{default_branch}` end end end diff --git a/tool/test/testunit/test_assertion.rb b/tool/test/testunit/test_assertion.rb index 8c83b447a7ad7d..709b4955729643 100644 --- a/tool/test/testunit/test_assertion.rb +++ b/tool/test/testunit/test_assertion.rb @@ -26,4 +26,28 @@ def test_assert_raise return_in_assert_raise end end + + def test_assert_pattern_list + assert_pattern_list([/foo?/], "foo") + assert_not_pattern_list([/foo?/], "afoo") + assert_not_pattern_list([/foo?/], "foo?") + assert_pattern_list([:*, /foo?/, :*], "foo") + assert_pattern_list([:*, /foo?/], "afoo") + assert_not_pattern_list([:*, /foo?/], "afoo?") + assert_pattern_list([/foo?/, :*], "foo?") + + assert_not_pattern_list(["foo?"], "foo") + assert_not_pattern_list(["foo?"], "afoo") + assert_pattern_list(["foo?"], "foo?") + assert_not_pattern_list([:*, "foo?", :*], "foo") + assert_not_pattern_list([:*, "foo?"], "afoo") + assert_pattern_list([:*, "foo?"], "afoo?") + assert_pattern_list(["foo?", :*], "foo?") + end + + def assert_not_pattern_list(pattern_list, actual, message=nil) + assert_raise(Test::Unit::AssertionFailedError) do + assert_pattern_list(pattern_list, actual, message) + end + end end diff --git a/tool/transform_mjit_header.rb b/tool/transform_mjit_header.rb index 8867c556f0dbaf..21802368247039 100644 --- a/tool/transform_mjit_header.rb +++ b/tool/transform_mjit_header.rb @@ -169,10 +169,6 @@ def self.windows? RUBY_PLATFORM =~ /mswin|mingw|msys/ end - def self.cl_exe?(cc) - cc =~ /\Acl(\z| |\.exe)/ - end - # If code has macro which only supported compilers predefine, return true. def self.supported_header?(code) SUPPORTED_CC_MACROS.any? { |macro| code =~ /^#\s*define\s+#{Regexp.escape(macro)}\b/ } @@ -220,13 +216,9 @@ def self.with_code(code) cc = ARGV[0] code = File.binread(ARGV[1]) # Current version of the header file. outfile = ARGV[2] -if MJITHeader.cl_exe?(cc) - cflags = '-DMJIT_HEADER -Zs' -else - cflags = '-S -DMJIT_HEADER -fsyntax-only -Werror=implicit-function-declaration -Werror=implicit-int -Wfatal-errors' -end +cflags = '-S -DMJIT_HEADER -fsyntax-only -Werror=implicit-function-declaration -Werror=implicit-int -Wfatal-errors' -if !MJITHeader.cl_exe?(cc) && !MJITHeader.supported_header?(code) +if !MJITHeader.supported_header?(code) puts "This compiler (#{cc}) looks not supported for MJIT. Giving up to generate MJIT header." MJITHeader.write("#error MJIT does not support '#{cc}' yet", outfile) exit @@ -234,7 +226,7 @@ def self.with_code(code) MJITHeader.remove_predefined_macros!(code) -if MJITHeader.windows? # transformation is broken with Windows headers for now +if MJITHeader.windows? # transformation is broken on Windows and the platform is not supported MJITHeader.remove_harmful_macros!(code) MJITHeader.check_code!(code, cc, cflags, 'initial') puts "\nSkipped transforming external functions to static on Windows." diff --git a/tool/update-bundled_gems.rb b/tool/update-bundled_gems.rb index 5b9c6b6974edaa..bed1cfc52bc3f1 100755 --- a/tool/update-bundled_gems.rb +++ b/tool/update-bundled_gems.rb @@ -16,5 +16,7 @@ $F[3..-1] = [] end end - $_ = [gem.name, gem.version, uri, *$F[3..-1]].join(" ") + f = [gem.name, gem.version.to_s, uri, *$F[3..-1]] + $_.gsub!(/\S+\s*/) {|s| f.shift.ljust(s.size)} + $_ = [$_, *f].join(" ") unless f.empty? end diff --git a/transcode.c b/transcode.c index 939e9567f9f6c6..535e436b039a87 100644 --- a/transcode.c +++ b/transcode.c @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; static VALUE sym_universal_newline; static VALUE sym_crlf_newline; static VALUE sym_cr_newline; +static VALUE sym_lf_newline; #ifdef ENABLE_ECONV_NEWLINE_OPTION static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; #endif @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR: + case ECONV_LF_NEWLINE_DECORATOR: case 0: break; default: @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) decorators_ret[num_decorators++] = "crlf_newline"; if (ecflags & ECONV_CR_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "lf_newline"; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "universal_newline"; @@ -1812,6 +1816,12 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) return data.ascii_compat_name; } +/* + * Append `len` bytes pointed by `ss` to `dst` with converting with `ec`. + * + * If the result of the conversion is not compatible with the encoding of + * `dst`, `dst` may not be valid encoding. + */ VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) { @@ -1819,11 +1829,19 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) unsigned char *ds, *dp, *de; rb_econv_result_t res; int max_output; + enum ruby_coderange_type coderange; + rb_encoding *dst_enc = ec->destination_encoding; if (NIL_P(dst)) { dst = rb_str_buf_new(len); - if (ec->destination_encoding) - rb_enc_associate(dst, ec->destination_encoding); + if (dst_enc) { + rb_enc_associate(dst, dst_enc); + } + coderange = ENC_CODERANGE_7BIT; // scan from the start + } + else { + dst_enc = rb_enc_get(dst); + coderange = rb_enc_str_coderange(dst); } if (ec->last_tc) @@ -1832,13 +1850,13 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) max_output = 1; do { + int cr; long dlen = RSTRING_LEN(dst); if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) { unsigned long new_capa = (unsigned long)dlen + len + max_output; if (LONG_MAX < new_capa) rb_raise(rb_eArgError, "too long string"); - rb_str_resize(dst, new_capa); - rb_str_set_len(dst, dlen); + rb_str_modify_expand(dst, new_capa - dlen); } sp = (const unsigned char *)ss; se = sp + len; @@ -1846,6 +1864,18 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) de = ds + rb_str_capacity(dst); dp = ds += dlen; res = rb_econv_convert(ec, &sp, se, &dp, de, flags); + switch (coderange) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + cr = (int)coderange; + rb_str_coderange_scan_restartable((char *)ds, (char *)dp, dst_enc, &cr); + coderange = cr; + ENC_CODERANGE_SET(dst, coderange); + break; + case ENC_CODERANGE_UNKNOWN: + case ENC_CODERANGE_BROKEN: + break; + } len -= (const char *)sp - ss; ss = (const char *)sp; rb_str_set_len(dst, dlen + (dp - ds)); @@ -1956,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec) case ECONV_CR_NEWLINE_DECORATOR: dname = "cr_newline"; break; + case ECONV_LF_NEWLINE_DECORATOR: + dname = "lf_newline"; + break; } if (dname) { @@ -2014,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "cr_newline"); } + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "lf_newline"); + } if (ecflags & ECONV_XML_TEXT_DECORATOR) { rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "xml_text"); @@ -2489,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags) ecflags |= ECONV_CR_NEWLINE_DECORATOR; } else if (v == sym_lf) { - /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ + ecflags |= ECONV_LF_NEWLINE_DECORATOR; } else if (SYMBOL_P(v)) { rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, @@ -2518,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags) setflags |= ECONV_CR_NEWLINE_DECORATOR; newlineflag |= !NIL_P(v); + v = rb_hash_aref(opt, sym_lf_newline); + if (RTEST(v)) + setflags |= ECONV_LF_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); + switch (newlineflag) { case 1: ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; @@ -3255,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * :undef => :replace # replace undefined conversion * :replace => string # replacement string ("?" or "\uFFFD" if not specified) * :newline => :universal # decorator for converting CRLF and CR to LF + * :newline => :lf # decorator for converting CRLF and CR to LF when writing * :newline => :crlf # decorator for converting LF to CRLF * :newline => :cr # decorator for converting LF to CR * :universal_newline => true # decorator for converting CRLF and CR to LF * :crlf_newline => true # decorator for converting LF to CRLF * :cr_newline => true # decorator for converting LF to CR + * :lf_newline => true # decorator for converting CRLF and CR to LF when writing * :xml => :text # escape as XML CharData. * :xml => :attr # escape as XML AttValue * integer form: @@ -3267,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Encoding::Converter::UNDEF_REPLACE * Encoding::Converter::UNDEF_HEX_CHARREF * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::LF_NEWLINE_DECORATOR * Encoding::Converter::CRLF_NEWLINE_DECORATOR * Encoding::Converter::CR_NEWLINE_DECORATOR * Encoding::Converter::XML_TEXT_DECORATOR @@ -3309,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Convert LF to CRLF. * [:cr_newline => true] * Convert LF to CR. + * [:lf_newline => true] + * Convert CRLF and CR to LF (when writing). * [:xml => :text] * Escape as XML CharData. * This form can be used as an HTML 4.0 #PCDATA. @@ -4411,6 +4458,7 @@ Init_transcode(void) sym_universal_newline = ID2SYM(rb_intern_const("universal_newline")); sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline")); sym_cr_newline = ID2SYM(rb_intern_const("cr_newline")); + sym_lf_newline = ID2SYM(rb_intern("lf_newline")); sym_partial_input = ID2SYM(rb_intern_const("partial_input")); #ifdef ENABLE_ECONV_NEWLINE_OPTION @@ -4507,6 +4555,12 @@ InitVM_transcode(void) */ rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + /* Document-const: LF_NEWLINE_DECORATOR + * + * Decorator for converting CRLF and CR to LF when writing + */ + rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR)); + /* Document-const: CRLF_NEWLINE_DECORATOR * * Decorator for converting LF to CRLF diff --git a/variable.c b/variable.c index a3512adc99b976..056a1000b8edee 100644 --- a/variable.c +++ b/variable.c @@ -2605,7 +2605,7 @@ autoload_load_needed(VALUE _arguments) return Qfalse; } - if (autoload_data->mutex == Qnil) { + if (NIL_P(autoload_data->mutex)) { autoload_data->mutex = rb_mutex_new(); autoload_data->fork_gen = GET_VM()->fork_gen; } diff --git a/version.c b/version.c index 8e6d5fb0242e0e..993e4f9bbfe1cb 100644 --- a/version.c +++ b/version.c @@ -25,6 +25,32 @@ #define EXIT_SUCCESS 0 #endif +#ifdef RUBY_REVISION +# if RUBY_PATCHLEVEL == -1 +# ifndef RUBY_BRANCH_NAME +# define RUBY_BRANCH_NAME "master" +# endif +# define RUBY_REVISION_STR " "RUBY_BRANCH_NAME" "RUBY_REVISION +# else +# define RUBY_REVISION_STR " revision "RUBY_REVISION +# endif +#else +# define RUBY_REVISION "HEAD" +# define RUBY_REVISION_STR "" +#endif +#if !defined RUBY_RELEASE_DATETIME || RUBY_PATCHLEVEL != -1 +# undef RUBY_RELEASE_DATETIME +# define RUBY_RELEASE_DATETIME RUBY_RELEASE_DATE +#endif + +# define RUBY_DESCRIPTION_PRE \ + "ruby "RUBY_VERSION \ + RUBY_PATCHLEVEL_STR \ + " ("RUBY_RELEASE_DATETIME \ + RUBY_REVISION_STR")" +# define RUBY_DESCRIPTION_POST \ + " ["RUBY_PLATFORM"]" + #define PRINT(type) puts(ruby_##type) #define MKSTR(type) rb_obj_freeze(rb_usascii_str_new_static(ruby_##type, sizeof(ruby_##type)-1)) #define MKINT(name) INT2FIX(ruby_##name) @@ -49,7 +75,9 @@ const int ruby_patchlevel = RUBY_PATCHLEVEL; const char ruby_description[] = RUBY_DESCRIPTION_PRE RUBY_DESCRIPTION_POST; const char ruby_description_pre[] = RUBY_DESCRIPTION_PRE; const char ruby_description_post[] = RUBY_DESCRIPTION_POST; -const char ruby_copyright[] = RUBY_COPYRIGHT; +const char ruby_copyright[] = "ruby - Copyright (C) " + RUBY_BIRTH_YEAR_STR "-" RUBY_RELEASE_YEAR_STR " " + RUBY_AUTHOR; const char ruby_engine[] = "ruby"; // Enough space for any combination of option flags diff --git a/version.h b/version.h index 34210e6cf545f1..687deae2cf030d 100644 --- a/version.h +++ b/version.h @@ -14,8 +14,8 @@ #define RUBY_PATCHLEVEL -1 #define RUBY_RELEASE_YEAR 2022 -#define RUBY_RELEASE_MONTH 8 -#define RUBY_RELEASE_DAY 4 +#define RUBY_RELEASE_MONTH 9 +#define RUBY_RELEASE_DAY 1 #include "ruby/version.h" #include "ruby/internal/abi.h" @@ -38,18 +38,25 @@ #define RUBY_RELEASE_DAY_STR STRINGIZE(RUBY_RELEASE_DAY) #endif +#ifdef RUBY_ABI_VERSION +# define RUBY_ABI_VERSION_SUFFIX "+"STRINGIZE(RUBY_ABI_VERSION) +#else +# define RUBY_ABI_VERSION_SUFFIX "" +#endif #if !defined RUBY_LIB_VERSION && defined RUBY_LIB_VERSION_STYLE # if RUBY_LIB_VERSION_STYLE == 3 # define RUBY_LIB_VERSION STRINGIZE(RUBY_API_VERSION_MAJOR)"."STRINGIZE(RUBY_API_VERSION_MINOR) \ - "."STRINGIZE(RUBY_API_VERSION_TEENY)"+"STRINGIZE(RUBY_ABI_VERSION) + "."STRINGIZE(RUBY_API_VERSION_TEENY) RUBY_ABI_VERSION_SUFFIX # elif RUBY_LIB_VERSION_STYLE == 2 # define RUBY_LIB_VERSION STRINGIZE(RUBY_API_VERSION_MAJOR)"."STRINGIZE(RUBY_API_VERSION_MINOR) \ - "+"STRINGIZE(RUBY_ABI_VERSION) + RUBY_ABI_VERSION_SUFFIX # endif #endif #if RUBY_PATCHLEVEL == -1 #define RUBY_PATCHLEVEL_STR "dev" +#elif defined RUBY_ABI_VERSION +#error RUBY_ABI_VERSION is defined in non-development branch #else #define RUBY_PATCHLEVEL_STR "" #endif @@ -58,35 +65,4 @@ # include "revision.h" #endif -#ifdef RUBY_REVISION -# if RUBY_PATCHLEVEL == -1 -# ifndef RUBY_BRANCH_NAME -# define RUBY_BRANCH_NAME "master" -# endif -# define RUBY_REVISION_STR " "RUBY_BRANCH_NAME" "RUBY_REVISION -# else -# define RUBY_REVISION_STR " revision "RUBY_REVISION -# endif -#else -# define RUBY_REVISION "HEAD" -# define RUBY_REVISION_STR "" -#endif -#if !defined RUBY_RELEASE_DATETIME || RUBY_PATCHLEVEL != -1 -# undef RUBY_RELEASE_DATETIME -# define RUBY_RELEASE_DATETIME RUBY_RELEASE_DATE -#endif - -# define RUBY_DESCRIPTION_PRE \ - "ruby "RUBY_VERSION \ - RUBY_PATCHLEVEL_STR \ - " ("RUBY_RELEASE_DATETIME \ - RUBY_REVISION_STR")" -# define RUBY_DESCRIPTION_POST \ - " ["RUBY_PLATFORM"]" -# define RUBY_COPYRIGHT \ - "ruby - Copyright (C) " \ - RUBY_BIRTH_YEAR_STR"-" \ - RUBY_RELEASE_YEAR_STR" " \ - RUBY_AUTHOR - #endif /* RUBY_TOPLEVEL_VERSION_H */ diff --git a/vm.c b/vm.c index bdf86cc3df4d79..5a319bffd04c9e 100644 --- a/vm.c +++ b/vm.c @@ -377,6 +377,103 @@ extern VALUE rb_vm_invoke_bmethod(rb_execution_context_t *ec, rb_proc_t *proc, V const rb_callable_method_entry_t *me); static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler); +#if USE_MJIT +# ifdef MJIT_HEADER +NOINLINE(static COLDFUNC VALUE mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body)); +# else +static inline VALUE mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body); +# endif +static VALUE +mjit_check_iseq(rb_execution_context_t *ec, const rb_iseq_t *iseq, struct rb_iseq_constant_body *body) +{ + uintptr_t func_i = (uintptr_t)(body->jit_func); + ASSUME(func_i <= LAST_JIT_ISEQ_FUNC); + switch ((enum rb_mjit_iseq_func)func_i) { + case NOT_ADDED_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_added); + if (body->total_calls == mjit_opts.min_calls) { + rb_mjit_add_iseq_to_process(iseq); + if (UNLIKELY(mjit_opts.wait)) { + return rb_mjit_wait_call(ec, body); + } + } + break; + case NOT_READY_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_ready); + break; + case NOT_COMPILED_JIT_ISEQ_FUNC: + RB_DEBUG_COUNTER_INC(mjit_exec_not_compiled); + break; + default: // to avoid warning with LAST_JIT_ISEQ_FUNC + break; + } + return Qundef; +} + +// Try to execute the current iseq in ec. Use JIT code if it is ready. +// If it is not, add ISEQ to the compilation queue and return Qundef for MJIT. +// YJIT compiles on the thread running the iseq. +static inline VALUE +jit_exec(rb_execution_context_t *ec) +{ + const rb_iseq_t *iseq = ec->cfp->iseq; + struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); + bool yjit_enabled = false; +# ifndef MJIT_HEADER + // Don't want to compile with YJIT or use code generated by YJIT + // when running inside code generated by MJIT. + yjit_enabled = rb_yjit_enabled_p(); +# endif + + if (mjit_call_p || yjit_enabled) { + body->total_calls++; + } + +# ifndef MJIT_HEADER + if (yjit_enabled && !mjit_call_p && body->total_calls == rb_yjit_call_threshold()) { + // If we couldn't generate any code for this iseq, then return + // Qundef so the interpreter will handle the call. + if (!rb_yjit_compile_iseq(iseq, ec)) { + return Qundef; + } + } +# endif + + if (!(mjit_call_p || yjit_enabled)) + return Qundef; + + RB_DEBUG_COUNTER_INC(jit_exec); + + mjit_func_t func = body->jit_func; + + // YJIT tried compiling this function once before and couldn't do + // it, so return Qundef so the interpreter handles it. + if (yjit_enabled && func == 0) { + return Qundef; + } + + if (UNLIKELY((uintptr_t)func <= LAST_JIT_ISEQ_FUNC)) { +# ifdef MJIT_HEADER + RB_DEBUG_COUNTER_INC(mjit_frame_JT2VM); +# else + RB_DEBUG_COUNTER_INC(mjit_frame_VM2VM); +# endif + return mjit_check_iseq(ec, iseq, body); + } + +# ifdef MJIT_HEADER + RB_DEBUG_COUNTER_INC(mjit_frame_JT2JT); +# else + RB_DEBUG_COUNTER_INC(mjit_frame_VM2JT); +# endif + RB_DEBUG_COUNTER_INC(mjit_exec_call_func); + // Under SystemV x64 calling convention + // ec -> RDI + // cfp -> RSI + return func(ec, ec->cfp); +} +#endif + #include "vm_insnhelper.c" #ifndef MJIT_HEADER @@ -2189,8 +2286,8 @@ hook_before_rewind(rb_execution_context_t *ec, const rb_control_frame_t *cfp, void *code; // }; - If mjit_exec is already called before calling vm_exec, `mjit_enable_p` should - be FALSE to avoid calling `mjit_exec` twice. + If jit_exec is already called before calling vm_exec, `jit_enable_p` should + be FALSE to avoid calling `jit_exec` twice. */ static inline VALUE @@ -2206,7 +2303,7 @@ struct rb_vm_exec_context { VALUE initial; VALUE result; enum ruby_tag_type state; - bool mjit_enable_p; + bool jit_enable_p; }; static void @@ -2235,7 +2332,7 @@ vm_exec_bottom_main(void *context) struct rb_vm_exec_context *ctx = (struct rb_vm_exec_context *)context; ctx->state = TAG_NONE; - if (!ctx->mjit_enable_p || (ctx->result = mjit_exec(ctx->ec)) == Qundef) { + if (!ctx->jit_enable_p || (ctx->result = jit_exec(ctx->ec)) == Qundef) { ctx->result = vm_exec_core(ctx->ec, ctx->initial); } vm_exec_enter_vm_loop(ctx->ec, ctx, ctx->tag, true); @@ -2250,12 +2347,12 @@ vm_exec_bottom_rescue(void *context) } VALUE -vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) +vm_exec(rb_execution_context_t *ec, bool jit_enable_p) { struct rb_vm_exec_context ctx = { .ec = ec, .initial = 0, .result = Qundef, - .mjit_enable_p = mjit_enable_p, + .jit_enable_p = jit_enable_p, }; struct rb_wasm_try_catch try_catch; @@ -2277,7 +2374,7 @@ vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) #else VALUE -vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) +vm_exec(rb_execution_context_t *ec, bool jit_enable_p) { enum ruby_tag_type state; VALUE result = Qundef; @@ -2287,7 +2384,7 @@ vm_exec(rb_execution_context_t *ec, bool mjit_enable_p) _tag.retval = Qnil; if ((state = EC_EXEC_TAG()) == TAG_NONE) { - if (!mjit_enable_p || (result = mjit_exec(ec)) == Qundef) { + if (!jit_enable_p || (result = jit_exec(ec)) == Qundef) { result = vm_exec_core(ec, initial); } goto vm_loop_start; /* fallback to the VM */ @@ -3947,7 +4044,7 @@ Init_vm_objects(void) } /* Stub for builtin function when not building YJIT units*/ -#if !YJIT_BUILD +#if !USE_YJIT void Init_builtin_yjit(void) {} #endif @@ -4208,51 +4305,48 @@ usage_analysis_register_running(VALUE self) return RBOOL(ruby_vm_collect_usage_func_register != 0); } +static VALUE +usage_analysis_clear(VALUE self, ID usage_hash) +{ + VALUE uh; + uh = rb_const_get(self, usage_hash); + rb_hash_clear(uh); + + return Qtrue; +} + + /* :nodoc: */ static VALUE usage_analysis_insn_clear(VALUE self) { - ID usage_hash; - ID bigram_hash; - VALUE uh; - VALUE bh; - - CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); - CONST_ID(bigram_hash, "USAGE_ANALYSIS_INSN_BIGRAM"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - bh = rb_const_get(rb_cRubyVM, bigram_hash); - rb_hash_clear(uh); - rb_hash_clear(bh); + ID usage_hash; + ID bigram_hash; - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + CONST_ID(bigram_hash, "USAGE_ANALYSIS_INSN_BIGRAM"); + usage_analysis_clear(rb_cRubyVM, usage_hash); + return usage_analysis_clear(rb_cRubyVM, bigram_hash); } /* :nodoc: */ static VALUE usage_analysis_operand_clear(VALUE self) { - ID usage_hash; - VALUE uh; - - CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - rb_hash_clear(uh); + ID usage_hash; - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + return usage_analysis_clear(self, usage_hash); } /* :nodoc: */ static VALUE usage_analysis_register_clear(VALUE self) { - ID usage_hash; - VALUE uh; + ID usage_hash; - CONST_ID(usage_hash, "USAGE_ANALYSIS_REGS"); - uh = rb_const_get(rb_cRubyVM, usage_hash); - rb_hash_clear(uh); - - return Qtrue; + CONST_ID(usage_hash, "USAGE_ANALYSIS_REGS"); + return usage_analysis_clear(self, usage_hash); } #else diff --git a/vm_backtrace.c b/vm_backtrace.c index 5bd588df127e85..2e898507dfb995 100644 --- a/vm_backtrace.c +++ b/vm_backtrace.c @@ -1176,12 +1176,14 @@ rb_vm_thread_backtrace_locations(int argc, const VALUE *argv, VALUE thval) return thread_backtrace_to_ary(argc, argv, thval, 0); } -VALUE rb_vm_backtrace(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) +VALUE +rb_vm_backtrace(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) { return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 1); } -VALUE rb_vm_backtrace_locations(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) +VALUE +rb_vm_backtrace_locations(int argc, const VALUE * argv, struct rb_execution_context_struct * ec) { return ec_backtrace_to_ary(ec, argc, argv, 0, 0, 0); } diff --git a/vm_core.h b/vm_core.h index 18db4e004a9c2c..4465c018512e16 100644 --- a/vm_core.h +++ b/vm_core.h @@ -478,7 +478,7 @@ struct rb_iseq_constant_body { iseq_bits_t single; } mark_bits; - char catch_except_p; /* If a frame of this ISeq may catch exception, set TRUE */ + bool catch_except_p; // If a frame of this ISeq may catch exception, set true. // If true, this ISeq is leaf *and* backtraces are not used, for example, // by rb_profile_frames. We verify only leafness on VM_CHECK_MODE though. // Note that GC allocations might use backtraces due to @@ -493,13 +493,12 @@ struct rb_iseq_constant_body { /* The following fields are MJIT related info. */ VALUE (*jit_func)(struct rb_execution_context_struct *, struct rb_control_frame_struct *); /* function pointer for loaded native code */ - long unsigned total_calls; /* number of total calls with `mjit_exec()` */ + long unsigned total_calls; /* number of total calls with `jit_exec()` */ struct rb_mjit_unit *jit_unit; #endif #if USE_YJIT // YJIT stores some data on each iseq. - // Note: Cannot use YJIT_BUILD here since yjit.h includes this header. void *yjit_payload; #endif }; diff --git a/vm_debug.h b/vm_debug.h index 59561056488001..9c7fc65f7c1f92 100644 --- a/vm_debug.h +++ b/vm_debug.h @@ -86,6 +86,15 @@ void ruby_debug_log(const char *file, int line, const char *func_name, const cha void ruby_debug_log_print(unsigned int n); bool ruby_debug_log_filter(const char *func_name, const char *file_name); +#if RBIMPL_COMPILER_IS(GCC) && defined(__OPTIMIZE__) +# define ruby_debug_log(...) \ + RB_GNUC_EXTENSION_BLOCK( \ + RBIMPL_WARNING_PUSH(); \ + RBIMPL_WARNING_IGNORED(-Wformat-zero-length); \ + ruby_debug_log(__VA_ARGS__); \ + RBIMPL_WARNING_POP()) +#endif + // convenient macro to log even if the USE_RUBY_DEBUG_LOG macro is not specified. // You can use this macro for temporary usage (you should not commit it). #define _RUBY_DEBUG_LOG(...) ruby_debug_log(__FILE__, __LINE__, RUBY_FUNCTION_NAME_STRING, "" __VA_ARGS__) diff --git a/vm_eval.c b/vm_eval.c index c7669cbb858e2a..db8ca455d94ec6 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -20,7 +20,7 @@ static inline VALUE vm_yield_with_cref(rb_execution_context_t *ec, int argc, con static inline VALUE vm_yield(rb_execution_context_t *ec, int argc, const VALUE *argv, int kw_splat); static inline VALUE vm_yield_with_block(rb_execution_context_t *ec, int argc, const VALUE *argv, VALUE block_handler, int kw_splat); static inline VALUE vm_yield_force_blockarg(rb_execution_context_t *ec, VALUE args); -VALUE vm_exec(rb_execution_context_t *ec, bool mjit_enable_p); +VALUE vm_exec(rb_execution_context_t *ec, bool jit_enable_p); static void vm_set_eval_stack(rb_execution_context_t * th, const rb_iseq_t *iseq, const rb_cref_t *cref, const struct rb_block *base_block); static int vm_collect_local_variables_in_heap(const VALUE *dfp, const struct local_var_list *vars); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 2ff48d26626eeb..ab1394c7ca8c3f 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3182,9 +3182,11 @@ ci_missing_reason(const struct rb_callinfo *ci) return stat; } +static VALUE vm_call_method_missing(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling); + static VALUE vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, - struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol) + struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol, int flags) { ASSUME(calling->argc >= 0); /* Also assumes CALLER_SETUP_ARG is already done. */ @@ -3194,9 +3196,7 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, VALUE recv = calling->recv; VALUE klass = CLASS_OF(recv); ID mid = rb_check_id(&symbol); - int flags = VM_CALL_FCALL | - VM_CALL_OPT_SEND | - (calling->kw_splat ? VM_CALL_KW_SPLAT : 0); + flags |= VM_CALL_OPT_SEND | (calling->kw_splat ? VM_CALL_KW_SPLAT : 0); if (UNLIKELY(! mid)) { mid = idMethodMissing; @@ -3243,7 +3243,30 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, { .method_missing_reason = missing_reason }, rb_callable_method_entry_with_refinements(klass, mid, NULL)); - return vm_call_method(ec, reg_cfp, calling); + if (flags & VM_CALL_FCALL) { + return vm_call_method(ec, reg_cfp, calling); + } + + const struct rb_callcache *cc = calling->cc; + VM_ASSERT(callable_method_entry_p(vm_cc_cme(cc))); + + if (vm_cc_cme(cc) != NULL) { + switch (METHOD_ENTRY_VISI(vm_cc_cme(cc))) { + case METHOD_VISI_PUBLIC: /* likely */ + return vm_call_method_each_type(ec, reg_cfp, calling); + case METHOD_VISI_PRIVATE: + vm_cc_method_missing_reason_set(cc, MISSING_PRIVATE); + break; + case METHOD_VISI_PROTECTED: + vm_cc_method_missing_reason_set(cc, MISSING_PROTECTED); + break; + default: + VM_UNREACHABLE(vm_call_method); + } + return vm_call_method_missing(ec, reg_cfp, calling); + } + + return vm_call_method_nome(ec, reg_cfp, calling); } static VALUE @@ -3283,7 +3306,7 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct calling->argc -= 1; DEC_SP(1); - return vm_call_symbol(ec, reg_cfp, calling, calling->ci, sym); + return vm_call_symbol(ec, reg_cfp, calling, calling->ci, sym, VM_CALL_FCALL); } } @@ -4097,7 +4120,7 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, VALUE symbol = VM_BH_TO_SYMBOL(block_handler); CALLER_SETUP_ARG(reg_cfp, calling, ci); calling->recv = TOPN(--calling->argc); - return vm_call_symbol(ec, reg_cfp, calling, ci, symbol); + return vm_call_symbol(ec, reg_cfp, calling, ci, symbol, 0); } } @@ -4357,6 +4380,14 @@ vm_concat_array(VALUE ary1, VALUE ary2st) return rb_ary_concat(tmp1, tmp2); } +// YJIT implementation is using the C function +// and needs to call a non-static function +VALUE +rb_vm_concat_array(VALUE ary1, VALUE ary2st) +{ + return vm_concat_array(ary1, ary2st); +} + static VALUE vm_splat_array(VALUE flag, VALUE ary) { @@ -4372,6 +4403,8 @@ vm_splat_array(VALUE flag, VALUE ary) } } +// YJIT implementation is using the C function +// and needs to call a non-static function VALUE rb_vm_splat_array(VALUE flag, VALUE ary) { @@ -4761,16 +4794,16 @@ vm_sendish( #ifdef MJIT_HEADER /* When calling ISeq which may catch an exception from JIT-ed - code, we should not call mjit_exec directly to prevent the + code, we should not call jit_exec directly to prevent the caller frame from being canceled. That's because the caller frame may have stack values in the local variables and the cancelling the caller frame will purge them. But directly - calling mjit_exec is faster... */ + calling jit_exec is faster... */ if (ISEQ_BODY(GET_ISEQ())->catch_except_p) { VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); return vm_exec(ec, true); } - else if ((val = mjit_exec(ec)) == Qundef) { + else if ((val = jit_exec(ec)) == Qundef) { VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); return vm_exec(ec, false); } @@ -4779,9 +4812,8 @@ vm_sendish( } #else /* When calling from VM, longjmp in the callee won't purge any - JIT-ed caller frames. So it's safe to directly call - mjit_exec. */ - return mjit_exec(ec); + JIT-ed caller frames. So it's safe to directly call jit_exec. */ + return jit_exec(ec); #endif } @@ -4819,7 +4851,7 @@ vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd) // going to use this string for interpolation, it's fine to use the // frozen string. VALUE val = rb_mod_name(recv); - if (val == Qnil) { + if (NIL_P(val)) { val = rb_mod_to_s(recv); } return val; @@ -5458,7 +5490,8 @@ vm_opt_aref_with(VALUE recv, VALUE key) { if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash && BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG) && - rb_hash_compare_by_id_p(recv) == Qfalse) { + rb_hash_compare_by_id_p(recv) == Qfalse && + !FL_TEST(recv, RHASH_PROC_DEFAULT)) { return rb_hash_aref(recv, key); } else { diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 9dc5ee6f515f2e..3de53d8372e0ec 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -4,7 +4,7 @@ SHELL = $(COMSPEC) ECHO1 = $(V:1=@:) RUNCMD = $(COMSPEC) /c MKFILES = Makefile verconf.mk -NULLCMD = type nul +NULLCMD = exit /b0. # exit ignores the rest NULL = nul CHDIR = cd PATH_SEPARATOR = ; @@ -148,7 +148,10 @@ PLATFORM_DIR = win32 arch = $(ARCH)-$(PLATFORM) sitearch = $(ARCH)-$(RT) !if !defined(ruby_version) -ruby_version = $(MAJOR).$(MINOR).0+$(ABI_VERSION) +ruby_version = $(MAJOR).$(MINOR).0 +!endif +!if defined(ABI_VERSION) +ruby_version = $(ruby_version)+$(ABI_VERSION) !endif !if !defined(RUBY_VERSION_NAME) RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version) @@ -316,33 +319,7 @@ CPPFLAGS = $(DEFS) $(ARCHDEFS) $(CPPFLAGS) CPPFLAGS = -DDISABLE_RUBYGEMS $(CPPFLAGS) !endif !ifndef MJIT_SUPPORT -MJIT_SUPPORT = yes -!endif -!if "$(CPPOUTFLAG)" == ">" -MJIT_HEADER_FLAGS = -!else -MJIT_HEADER_FLAGS = -P -!endif -MJIT_HEADER_SUFFIX = -MJIT_HEADER_ARCH = -MJIT_HEADER_INSTALL_DIR = include/$(RUBY_VERSION_NAME)/$(arch) -MJIT_PRECOMPILED_HEADER_NAME = rb_mjit_header-$(RUBY_PROGRAM_VERSION).pch -MJIT_PRECOMPILED_HEADER = $(MJIT_HEADER_INSTALL_DIR)/$(MJIT_PRECOMPILED_HEADER_NAME) -!ifndef MJIT_CC -MJIT_CC = $(CC) -!endif -!ifndef MJIT_OPTFLAGS -# TODO: Use only $(OPTFLAGS) for performance. It requires to modify flags for precompiled header too. -# For now, using flags used for building precompiled header to make JIT succeed. -MJIT_OPTFLAGS = -DMJIT_HEADER $(CFLAGS) $(XCFLAGS) $(CPPFLAGS) -!endif -!ifndef MJIT_DEBUGFLAGS -# TODO: Make this work... Another header for debug build needs to be installed first. -MJIT_DEBUGFLAGS = $(empty) $(DEBUGFLAGS) $(empty) -MJIT_DEBUGFLAGS = $(MJIT_DEBUGFLAGS: -Zi = -Z7 ) -!endif -!ifndef MJIT_LDSHARED -MJIT_LDSHARED = $(MJIT_CC) -LD +MJIT_SUPPORT = no !endif POSTLINK = @@ -624,6 +601,9 @@ $(CONFIG_H): $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub #endif !endif #define RUBY_MSVCRT_VERSION $(RT_VER) +!if defined(ABI_VERSION) +#define RUBY_ABI_VERSION $(ABI_VERSION) +!endif #define STDC_HEADERS 1 #define HAVE_SYS_TYPES_H 1 #define HAVE_SYS_STAT_H 1 @@ -912,11 +892,7 @@ $(CONFIG_H): $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub #define RUBY_COREDLL "$(RT)" #define RUBY_PLATFORM "$(arch)" #define RUBY_SITEARCH "$(sitearch)" -!if "$(MJIT_SUPPORT)" == "yes" -#define USE_MJIT 1 -!else #define USE_MJIT 0 -!endif #endif /* $(guard) */ << @@ -1332,42 +1308,6 @@ probes.h: {$(VPATH)}probes.dmyh #include "$(*F).dmyh" < $(NULL) || exit 0 - $(Q)$(RM) $(arch_hdrdir)/rb_mjit_header-*.pch - $(Q)$(RM) $(arch_hdrdir)/rb_mjit_header-*.$(OBJEXT) - -# Non-mswin environment is not using prebuilt precompiled header because upgrading compiler -# or changing compiler options may break MJIT so build (currently only by --mjit-debug though). -# -# But mswin is building precompiled header because cl.exe cannot leave macro after preprocess. -# As a workaround to use macro without installing many source files, it uses precompiled header -# without sufficient guard for a broken build. -# -# TODO: Fix the above issue by including VC version in header name, and create another header -# for --mjit-debug as well. -$(TIMESTAMPDIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=).time: probes.h vm.$(OBJEXT) - $(ECHO) building $(@F:.time=.pch) - $(Q) $(CC) -DMJIT_HEADER $(CFLAGS: -Zi = -Z7 ) $(XCFLAGS:-DRUBY_EXPORT =) -URUBY_EXPORT $(CPPFLAGS) $(srcdir)/vm.c -c -Yc \ - $(COUTFLAG)$(@F:.time=.)$(OBJEXT) -Fd$(@F:.time=.pdb) -Fp$(@F:.time=.pch).new -Z7 - $(Q) $(IFCHANGE) "--timestamp=$@" $(@F:.time=.pch) $(@F:.time=.pch).new - -$(MJIT_PRECOMPILED_HEADER_NAME): $(TIMESTAMPDIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=).time - -$(MJIT_PRECOMPILED_HEADER): $(MJIT_PRECOMPILED_HEADER_NAME) - $(Q) $(MAKEDIRS) $(MJIT_HEADER_INSTALL_DIR) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME) $@ - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) $(MJIT_HEADER_INSTALL_DIR)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) - $(Q) $(MAKEDIRS) $(arch_hdrdir) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME) $(arch_hdrdir)/$(MJIT_PRECOMPILED_HEADER_NAME) - $(Q) $(MAKE_LINK) $(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) $(arch_hdrdir)/$(MJIT_PRECOMPILED_HEADER_NAME:.pch=.)$(OBJEXT) - INSNS = opt_sc.inc optinsn.inc optunifs.inc insns.inc insns_info.inc \ vmtc.inc vm.inc mjit_compile.inc @@ -1393,46 +1333,3 @@ loadpath: verconf.h @$(CPP) $(XCFLAGS) $(CPPFLAGS) $(srcdir)/loadpath.c | \ sed -e '1,/^const char ruby_initial_load_paths/d;/;/,$$d' \ -e '/^^ /!d;s/ *"\\\\0"$$//;s/" *"//g' - -mjit_config.h: $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub - @echo making <<$@ -#ifndef RUBY_MJIT_CONFIG_H -#define RUBY_MJIT_CONFIG_H 1 - -#define MJIT_CONFIG_ESCAPED_EQ "=" -#define MJIT_HEADER_INSTALL_DIR "/$(MJIT_HEADER_INSTALL_DIR)" -#define MJIT_MIN_HEADER_NAME "$(MJIT_MIN_HEADER_NAME)" -#define MJIT_PRECOMPILED_HEADER_NAME "$(MJIT_PRECOMPILED_HEADER_NAME)" -<> $@ - @echo /* MJIT_CC_COMMON */>> $@ - @ - @(set sep=#define MJIT_CFLAGS ) & \ - for %I in ($(RUNTIMEFLAG) $(ARCH_FLAG)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_CFLAGS */>> $@ - @ - @(set sep=#define MJIT_OPTFLAGS ) & \ - for %I in ($(MJIT_OPTFLAGS:^==" MJIT_CONFIG_ESCAPED_EQ ")) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_OPTFLAGS */>> $@ - @ - @(set sep=#define MJIT_DEBUGFLAGS ) & \ - for %I in ($(MJIT_DEBUGFLAGS:^==" MJIT_CONFIG_ESCAPED_EQ ")) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_DEBUGFLAGS */>> $@ - @ - @(set sep=#define MJIT_LDSHARED ) & \ - for %I in ($(MJIT_LDSHARED)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_LDSHARED */>> $@ - @ - @(set sep=#define MJIT_DLDFLAGS ) & \ - for %I in ($(DLDFLAGS)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_DLDFLAGS */>> $@ - @ - @(set sep=#define MJIT_LIBS ) & \ - for %I in ($(LIBRUBYARG_SHARED)) do @(call echo.%%sep%%"%%~I", \& set sep= ) >> $@ - @echo /* MJIT_LIBS */>> $@ - @ - @echo.>> $@ - @echo #endif /* RUBY_MJIT_CONFIG_H */>> $@ - @$(Q:@=: :) type $@ diff --git a/win32/configure.bat b/win32/configure.bat index 573f8bf0e581c3..a3df0bb4eb8092 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -34,10 +34,10 @@ if "%1" == "--enable-install-static-library" goto :enable-lib if "%1" == "--disable-install-static-library" goto :disable-lib if "%1" == "--enable-debug-env" goto :enable-debug-env if "%1" == "--disable-debug-env" goto :disable-debug-env +if "%1" == "--enable-devel" goto :enable-devel +if "%1" == "--disable-devel" goto :disable-devel if "%1" == "--enable-rubygems" goto :enable-rubygems if "%1" == "--disable-rubygems" goto :disable-rubygems -if "%1" == "--enable-mjit-support" goto :enable-mjit-support -if "%1" == "--disable-mjit-support" goto :disable-mjit-support if "%1" == "--extout" goto :extout if "%1" == "--path" goto :path if "%1" == "--with-baseruby" goto :baseruby @@ -143,23 +143,23 @@ goto :loop ; echo>>confargs.tmp %1 \ shift goto :loop ; -:enable-rubygems - echo>> ~tmp~.mak "USE_RUBYGEMS=yes" \ +:enable-devel + echo>> ~tmp~.mak "RUBY_DEVEL=yes" \ echo>>confargs.tmp %1 \ shift goto :loop ; -:disable-rubygems - echo>> ~tmp~.mak "USE_RUBYGEMS=no" \ +:disable-devel + echo>> ~tmp~.mak "RUBY_DEVEL=no" \ echo>>confargs.tmp %1 \ shift goto :loop ; -:enable-mjit-support - echo>> ~tmp~.mak "MJIT_SUPPORT=yes" \ +:enable-rubygems + echo>> ~tmp~.mak "USE_RUBYGEMS=yes" \ echo>>confargs.tmp %1 \ shift goto :loop ; -:disable-mjit-support - echo>> ~tmp~.mak "MJIT_SUPPORT=no" \ +:disable-rubygems + echo>> ~tmp~.mak "USE_RUBYGEMS=no" \ echo>>confargs.tmp %1 \ shift goto :loop ; diff --git a/win32/setup.mak b/win32/setup.mak index c84d4066eabbdc..17e321b98427a1 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -80,6 +80,9 @@ $(BANG)else HAVE_BASERUBY = no $(BANG)endif << +!if "$(RUBY_DEVEL)" == "yes" + RUBY_DEVEL = yes +!endif !if "$(GIT)" != "" @echo GIT = $(GIT)>> $(MAKEFILE) !endif @@ -175,6 +178,7 @@ main(void) -version-: nul verconf.mk verconf.mk: nul + @findstr /R /C:"^#define RUBY_ABI_VERSION " $(srcdir:/=\)\include\ruby\internal\abi.h > $(@) @$(CPP) -I$(srcdir) -I$(srcdir)/include <<"Creating $(@)" > $(*F).bat && cmd /c $(*F).bat > $(@) @echo off #define RUBY_REVISION 0 @@ -195,9 +199,9 @@ echo RUBY_RELEASE_DAY = %ruby_release_day:~-2% echo MAJOR = RUBY_VERSION_MAJOR echo MINOR = RUBY_VERSION_MINOR echo TEENY = RUBY_VERSION_TEENY -echo ABI_VERSION = RUBY_ABI_VERSION #if defined RUBY_PATCHLEVEL && RUBY_PATCHLEVEL < 0 -echo RUBY_DEVEL = yes +#include "$(@F)" +echo ABI_VERSION = RUBY_ABI_VERSION #endif set /a MSC_VER = _MSC_VER #if _MSC_VER >= 1920 @@ -291,7 +295,6 @@ AS = $(AS) -nologo (echo AS = $(AS:64=) -nologo) || \ (echo AS = $(AS) -nologo) ) >>$(MAKEFILE) !endif - @(for %I in (cl.exe) do @set MJIT_CC=%~$$PATH:I) && (call echo MJIT_CC = "%MJIT_CC:\=/%" -nologo>>$(MAKEFILE)) @type << >>$(MAKEFILE) $(BANG)include $$(srcdir)/win32/Makefile.sub diff --git a/win32/win32.c b/win32/win32.c index 6d3e368565c4a5..edf89be4b18305 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -5406,7 +5406,8 @@ wrename(const WCHAR *oldpath, const WCHAR *newpath) } /* License: Ruby's */ -int rb_w32_urename(const char *from, const char *to) +int +rb_w32_urename(const char *from, const char *to) { WCHAR *wfrom; WCHAR *wto; @@ -5425,7 +5426,8 @@ int rb_w32_urename(const char *from, const char *to) } /* License: Ruby's */ -int rb_w32_rename(const char *from, const char *to) +int +rb_w32_rename(const char *from, const char *to) { WCHAR *wfrom; WCHAR *wto; diff --git a/yjit.c b/yjit.c index 1a2f71a9599f32..a8341910706e9d 100644 --- a/yjit.c +++ b/yjit.c @@ -80,6 +80,18 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size) } } +// `start` is inclusive and `end` is exclusive. +void +rb_yjit_icache_invalidate(void *start, void *end) +{ + // Clear/invalidate the instruction cache. Compiles to nothing on x86_64 + // but required on ARM before running freshly written code. + // On Darwin it's the same as calling sys_icache_invalidate(). +#ifdef __GNUC__ + __builtin___clear_cache(start, end); +#endif +} + # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x))) // For a given raw_sample (frame), set the hash with the caller's @@ -399,6 +411,18 @@ rb_str_bytesize(VALUE str) return LONG2NUM(RSTRING_LEN(str)); } +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + // This is defined only as a named struct inside rb_iseq_constant_body. // By giving it a separate typedef, we make it nameable by rust-bindgen. // Bindgen's temp/anon name isn't guaranteed stable. @@ -454,61 +478,61 @@ rb_get_cikw_keywords_idx(const struct rb_callinfo_kwarg *cikw, int idx) } rb_method_visibility_t -rb_METHOD_ENTRY_VISI(rb_callable_method_entry_t *me) +rb_METHOD_ENTRY_VISI(const rb_callable_method_entry_t *me) { return METHOD_ENTRY_VISI(me); } rb_method_type_t -rb_get_cme_def_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_type(const rb_callable_method_entry_t *cme) { return cme->def->type; } ID -rb_get_cme_def_body_attr_id(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_attr_id(const rb_callable_method_entry_t *cme) { return cme->def->body.attr.id; } enum method_optimized_type -rb_get_cme_def_body_optimized_type(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_type(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.type; } unsigned int -rb_get_cme_def_body_optimized_index(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_optimized_index(const rb_callable_method_entry_t *cme) { return cme->def->body.optimized.index; } rb_method_cfunc_t * -rb_get_cme_def_body_cfunc(rb_callable_method_entry_t *cme) +rb_get_cme_def_body_cfunc(const rb_callable_method_entry_t *cme) { return UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); } uintptr_t -rb_get_def_method_serial(rb_method_definition_t *def) +rb_get_def_method_serial(const rb_method_definition_t *def) { return def->method_serial; } ID -rb_get_def_original_id(rb_method_definition_t *def) +rb_get_def_original_id(const rb_method_definition_t *def) { return def->original_id; } int -rb_get_mct_argc(rb_method_cfunc_t *mct) +rb_get_mct_argc(const rb_method_cfunc_t *mct) { return mct->argc; } void * -rb_get_mct_func(rb_method_cfunc_t *mct) +rb_get_mct_func(const rb_method_cfunc_t *mct) { return (void*)mct->func; // this field is defined as type VALUE (*func)(ANYARGS) } @@ -519,104 +543,104 @@ rb_get_def_iseq_ptr(rb_method_definition_t *def) return def_iseq_ptr(def); } -rb_iseq_t * -rb_get_iseq_body_local_iseq(rb_iseq_t *iseq) +const rb_iseq_t * +rb_get_iseq_body_local_iseq(const rb_iseq_t *iseq) { return iseq->body->local_iseq; } unsigned int -rb_get_iseq_body_local_table_size(rb_iseq_t *iseq) +rb_get_iseq_body_local_table_size(const rb_iseq_t *iseq) { return iseq->body->local_table_size; } VALUE * -rb_get_iseq_body_iseq_encoded(rb_iseq_t *iseq) +rb_get_iseq_body_iseq_encoded(const rb_iseq_t *iseq) { return iseq->body->iseq_encoded; } bool -rb_get_iseq_body_builtin_inline_p(rb_iseq_t *iseq) +rb_get_iseq_body_builtin_inline_p(const rb_iseq_t *iseq) { return iseq->body->builtin_inline_p; } unsigned -rb_get_iseq_body_stack_max(rb_iseq_t *iseq) +rb_get_iseq_body_stack_max(const rb_iseq_t *iseq) { return iseq->body->stack_max; } bool -rb_get_iseq_flags_has_opt(rb_iseq_t *iseq) +rb_get_iseq_flags_has_opt(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_opt; } bool -rb_get_iseq_flags_has_kw(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kw(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kw; } bool -rb_get_iseq_flags_has_post(rb_iseq_t *iseq) +rb_get_iseq_flags_has_post(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_post; } bool -rb_get_iseq_flags_has_kwrest(rb_iseq_t *iseq) +rb_get_iseq_flags_has_kwrest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_kwrest; } bool -rb_get_iseq_flags_has_rest(rb_iseq_t *iseq) +rb_get_iseq_flags_has_rest(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_rest; } bool -rb_get_iseq_flags_has_block(rb_iseq_t *iseq) +rb_get_iseq_flags_has_block(const rb_iseq_t *iseq) { return iseq->body->param.flags.has_block; } bool -rb_get_iseq_flags_has_accepts_no_kwarg(rb_iseq_t *iseq) +rb_get_iseq_flags_has_accepts_no_kwarg(const rb_iseq_t *iseq) { return iseq->body->param.flags.accepts_no_kwarg; } const rb_seq_param_keyword_struct * -rb_get_iseq_body_param_keyword(rb_iseq_t *iseq) +rb_get_iseq_body_param_keyword(const rb_iseq_t *iseq) { return iseq->body->param.keyword; } unsigned -rb_get_iseq_body_param_size(rb_iseq_t *iseq) +rb_get_iseq_body_param_size(const rb_iseq_t *iseq) { return iseq->body->param.size; } int -rb_get_iseq_body_param_lead_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_lead_num(const rb_iseq_t *iseq) { return iseq->body->param.lead_num; } int -rb_get_iseq_body_param_opt_num(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_num(const rb_iseq_t *iseq) { return iseq->body->param.opt_num; } const VALUE * -rb_get_iseq_body_param_opt_table(rb_iseq_t *iseq) +rb_get_iseq_body_param_opt_table(const rb_iseq_t *iseq) { return iseq->body->param.opt_table; } @@ -651,7 +675,7 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2) } struct rb_control_frame_struct * -rb_get_ec_cfp(rb_execution_context_t *ec) +rb_get_ec_cfp(const rb_execution_context_t *ec) { return ec->cfp; } @@ -785,7 +809,7 @@ rb_RSTRUCT_SET(VALUE st, int k, VALUE v) } const struct rb_callinfo * -rb_get_call_data_ci(struct rb_call_data *cd) +rb_get_call_data_ci(const struct rb_call_data *cd) { return cd->ci; } diff --git a/yjit.h b/yjit.h index cf420df251791d..c9dc52c9b670e3 100644 --- a/yjit.h +++ b/yjit.h @@ -15,24 +15,15 @@ # define YJIT_STATS RUBY_DEBUG #endif -// We generate x86 assembly -#if (defined(__x86_64__) && !defined(_WIN32)) || (defined(_WIN32) && defined(_M_AMD64)) // x64 platforms without mingw/msys -# define YJIT_SUPPORTED_P 1 -#else -# define YJIT_SUPPORTED_P 0 -#endif +#if USE_YJIT -// Is the output binary going to include YJIT? -#if USE_MJIT && USE_YJIT && YJIT_SUPPORTED_P -# define YJIT_BUILD 1 +// We generate x86 or arm64 assembly +#if defined(_WIN32) ? defined(_M_AMD64) : (defined(__x86_64__) || defined(__aarch64__)) +// x86_64 platforms without mingw/msys or x64-mswin #else -# define YJIT_BUILD 0 +# error YJIT unsupported platform #endif -#undef YJIT_SUPPORTED_P - -#if YJIT_BUILD - // Expose these as declarations since we are building YJIT. bool rb_yjit_enabled_p(void); unsigned rb_yjit_call_threshold(void); @@ -54,7 +45,7 @@ void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic); void rb_yjit_tracing_invalidate_all(void); #else -// !YJIT_BUILD +// !USE_YJIT // In these builds, YJIT could never be turned on. Provide dummy implementations. static inline bool rb_yjit_enabled_p(void) { return false; } @@ -76,6 +67,6 @@ static inline void rb_yjit_before_ractor_spawn(void) {} static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic) {} static inline void rb_yjit_tracing_invalidate_all(void) {} -#endif // #if YJIT_BUILD +#endif // #if USE_YJIT #endif // #ifndef YJIT_H diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index f54addc7957ba3..df4083638d40bd 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -70,6 +70,9 @@ fn main() { .allowlist_function("rb_str_buf_append") .allowlist_function("rb_str_dup") + // From encindex.h + .allowlist_type("ruby_preserved_encindex") + // This struct is public to Ruby C extensions // From include/ruby/internal/core/rbasic.h .allowlist_type("RBasic") @@ -240,6 +243,7 @@ fn main() { .allowlist_var("VM_ENV_DATA_INDEX_SPECVAL") .allowlist_var("VM_ENV_DATA_INDEX_FLAGS") .allowlist_var("VM_ENV_DATA_SIZE") + .allowlist_function("rb_iseq_path") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -265,8 +269,11 @@ fn main() { .allowlist_function("rb_yjit_for_each_iseq") .allowlist_function("rb_yjit_obj_written") .allowlist_function("rb_yjit_str_simple_append") + .allowlist_function("rb_RSTRING_PTR") + .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") .allowlist_function("rb_yjit_exit_locations_dict") + .allowlist_function("rb_yjit_icache_invalidate") // from vm_sync.h .allowlist_function("rb_vm_barrier") @@ -282,6 +289,7 @@ fn main() { .allowlist_function("rb_vm_insn_addr2opcode") .allowlist_function("rb_iseqw_to_iseq") .allowlist_function("rb_iseq_each") + .allowlist_function("rb_iseq_method_name") // From builtin.h .allowlist_type("rb_builtin_function.*") @@ -304,6 +312,62 @@ fn main() { // From include/ruby/debug.h .allowlist_function("rb_profile_frames") + // Functions used for code generation + .allowlist_function("rb_insn_name") + .allowlist_function("rb_insn_len") + .allowlist_function("rb_yarv_class_of") + .allowlist_function("rb_get_ec_cfp") + .allowlist_function("rb_get_cfp_pc") + .allowlist_function("rb_get_cfp_sp") + .allowlist_function("rb_get_cfp_self") + .allowlist_function("rb_get_cfp_ep") + .allowlist_function("rb_get_cfp_ep_level") + .allowlist_function("rb_get_cme_def_type") + .allowlist_function("rb_get_cme_def_body_attr_id") + .allowlist_function("rb_get_cme_def_body_optimized_type") + .allowlist_function("rb_get_cme_def_body_optimized_index") + .allowlist_function("rb_get_cme_def_body_cfunc") + .allowlist_function("rb_get_def_method_serial") + .allowlist_function("rb_get_def_original_id") + .allowlist_function("rb_get_mct_argc") + .allowlist_function("rb_get_mct_func") + .allowlist_function("rb_get_def_iseq_ptr") + .allowlist_function("rb_iseq_encoded_size") + .allowlist_function("rb_get_iseq_body_local_iseq") + .allowlist_function("rb_get_iseq_body_iseq_encoded") + .allowlist_function("rb_get_iseq_body_stack_max") + .allowlist_function("rb_get_iseq_flags_has_opt") + .allowlist_function("rb_get_iseq_flags_has_kw") + .allowlist_function("rb_get_iseq_flags_has_rest") + .allowlist_function("rb_get_iseq_flags_has_post") + .allowlist_function("rb_get_iseq_flags_has_kwrest") + .allowlist_function("rb_get_iseq_flags_has_block") + .allowlist_function("rb_get_iseq_flags_has_accepts_no_kwarg") + .allowlist_function("rb_get_iseq_body_local_table_size") + .allowlist_function("rb_get_iseq_body_param_keyword") + .allowlist_function("rb_get_iseq_body_param_size") + .allowlist_function("rb_get_iseq_body_param_lead_num") + .allowlist_function("rb_get_iseq_body_param_opt_num") + .allowlist_function("rb_get_iseq_body_param_opt_table") + .allowlist_function("rb_get_cikw_keyword_len") + .allowlist_function("rb_get_cikw_keywords_idx") + .allowlist_function("rb_get_call_data_ci") + .allowlist_function("rb_yarv_str_eql_internal") + .allowlist_function("rb_yarv_ary_entry_internal") + .allowlist_function("rb_yarv_fix_mod_fix") + .allowlist_function("rb_FL_TEST") + .allowlist_function("rb_FL_TEST_RAW") + .allowlist_function("rb_RB_TYPE_P") + .allowlist_function("rb_BASIC_OP_UNREDEFINED_P") + .allowlist_function("rb_RSTRUCT_LEN") + .allowlist_function("rb_RSTRUCT_SET") + .allowlist_function("rb_vm_ci_argc") + .allowlist_function("rb_vm_ci_mid") + .allowlist_function("rb_vm_ci_flag") + .allowlist_function("rb_vm_ci_kwarg") + .allowlist_function("rb_METHOD_ENTRY_VISI") + .allowlist_function("rb_RCLASS_ORIGIN") + // We define VALUE manually, don't import it .blocklist_type("VALUE") diff --git a/yjit/src/asm/arm64/README.md b/yjit/src/asm/arm64/README.md new file mode 100644 index 00000000000000..edae5773e8abb3 --- /dev/null +++ b/yjit/src/asm/arm64/README.md @@ -0,0 +1,16 @@ +# Arm64 + +This module is responsible for encoding YJIT operands into an appropriate Arm64 encoding. + +## Architecture + +Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From for u32`, which contains the mechanism for encoding each instruction. The encoding for each instruction is shown in the documentation for the struct that ends up being created. + +In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. For more complicated pieces of encoding (e.g., bitmask immediates) a corresponding module under the `arg` namespace is available. + +## Helpful links + +* [Arm A64 Instruction Set Architecture](https://developer.arm.com/documentation/ddi0596/2021-12?lang=en) Official documentation +* [armconverter.com](https://armconverter.com/) A website that encodes Arm assembly syntax +* [hatstone](https://github.com/tenderlove/hatstone) A wrapper around the Capstone disassembler written in Ruby +* [onlinedisassembler.com](https://onlinedisassembler.com/odaweb/) A web-based disassembler diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs new file mode 100644 index 00000000000000..54a6e6c34416c3 --- /dev/null +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -0,0 +1,210 @@ +/// Immediates used by the logical immediate instructions are not actually the +/// immediate value, but instead are encoded into a 13-bit wide mask of 3 +/// elements. This allows many more values to be represented than 13 bits would +/// normally allow, at the expense of not being able to represent every possible +/// value. +/// +/// In order for a number to be encodeable in this form, the binary +/// representation must consist of a single set of contiguous 1s. That pattern +/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or +/// 32 times (rotated or not). +/// +/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid. +/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot +/// be replicated across 64 bits. +/// +/// Some more examples to illustrate the idea of replication: +/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a +/// single set of 1s which can be replicated across all of the bits 32 times. +/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 8 times (rotated by 4 bits). +/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 4 times (rotated by 12 bits). +/// +/// To encode the values, there are 3 elements: +/// * n = 1 if the pattern is 64-bits wide, 0 otherwise +/// * imms = the size of the pattern, a 0, and then one less than the number of +/// sequential 1s +/// * immr = the number of right rotations to apply to the pattern to get the +/// target value +/// +pub struct BitmaskImmediate { + n: u8, + imms: u8, + immr: u8 +} + +impl TryFrom for BitmaskImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImmediate. + /// + /// The implementation here is largely based on this blog post: + /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ + fn try_from(value: u64) -> Result { + if value == 0 || value == u64::MAX { + return Err(()); + } + + fn rotate_right(value: u64, rotations: u32) -> u64 { + (value >> (rotations & 0x3F)) | + (value << (rotations.wrapping_neg() & 0x3F)) + } + + let rotations = (value & (value + 1)).trailing_zeros(); + let normalized = rotate_right(value, rotations & 0x3F); + + let zeroes = normalized.leading_zeros(); + let ones = (!normalized).trailing_zeros(); + let size = zeroes + ones; + + if rotate_right(value, size & 0x3F) != value { + return Err(()); + } + + Ok(BitmaskImmediate { + n: ((size >> 6) & 1) as u8, + imms: (((size << 1).wrapping_neg() | (ones - 1)) & 0x3F) as u8, + immr: ((rotations.wrapping_neg() & (size - 1)) & 0x3F) as u8 + }) + } +} + +impl From for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(bitmask: BitmaskImmediate) -> Self { + 0 + | ((bitmask.n as u32) << 12) + | ((bitmask.immr as u32) << 6) + | (bitmask.imms as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_failures() { + vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + assert!(BitmaskImmediate::try_from(imm).is_err()); + }); + } + + #[test] + fn test_negative() { + let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap(); + let encoded: u32 = bitmask.into(); + assert_eq!(7998, encoded); + } + + #[test] + fn test_size_2_minimum() { + let bitmask = BitmaskImmediate::try_from(0x5555555555555555); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 }))); + } + + #[test] + fn test_size_2_maximum() { + let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 }))); + } + + #[test] + fn test_size_4_minimum() { + let bitmask = BitmaskImmediate::try_from(0x1111111111111111); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 }))); + } + + #[test] + fn test_size_4_rotated() { + let bitmask = BitmaskImmediate::try_from(0x6666666666666666); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 }))); + } + + #[test] + fn test_size_4_maximum() { + let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 }))); + } + + #[test] + fn test_size_8_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0101010101010101); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 }))); + } + + #[test] + fn test_size_8_rotated() { + let bitmask = BitmaskImmediate::try_from(0x1818181818181818); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 }))); + } + + #[test] + fn test_size_8_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 }))); + } + + #[test] + fn test_size_16_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0001000100010001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 }))); + } + + #[test] + fn test_size_16_rotated() { + let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 }))); + } + + #[test] + fn test_size_16_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 }))); + } + + #[test] + fn test_size_32_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000100000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_32_rotated() { + let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 }))); + } + + #[test] + fn test_size_32_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 }))); + } + + #[test] + fn test_size_64_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000000000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_64_rotated() { + let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 }))); + } + + #[test] + fn test_size_64_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); + } + + #[test] + fn test_size_64_invalid() { + let bitmask = BitmaskImmediate::try_from(u64::MAX); + assert!(matches!(bitmask, Err(()))); + } +} diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs new file mode 100644 index 00000000000000..bb9ce570c30695 --- /dev/null +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -0,0 +1,52 @@ +/// Various instructions in A64 can have condition codes attached. This enum +/// includes all of the various kinds of conditions along with their respective +/// encodings. +pub struct Condition; + +impl Condition { + pub const EQ: u8 = 0b0000; // equal to + pub const NE: u8 = 0b0001; // not equal to + pub const CS: u8 = 0b0010; // carry set (alias for HS) + pub const CC: u8 = 0b0011; // carry clear (alias for LO) + pub const MI: u8 = 0b0100; // minus, negative + pub const PL: u8 = 0b0101; // positive or zero + pub const VS: u8 = 0b0110; // signed overflow + pub const VC: u8 = 0b0111; // no signed overflow + pub const HI: u8 = 0b1000; // greater than (unsigned) + pub const LS: u8 = 0b1001; // less than or equal to (unsigned) + pub const GE: u8 = 0b1010; // greater than or equal to (signed) + pub const LT: u8 = 0b1011; // less than (signed) + pub const GT: u8 = 0b1100; // greater than (signed) + pub const LE: u8 = 0b1101; // less than or equal to (signed) + pub const AL: u8 = 0b1110; // always + + pub const fn inverse(condition: u8) -> u8 { + match condition { + Condition::EQ => Condition::NE, + Condition::NE => Condition::EQ, + + Condition::CS => Condition::CC, + Condition::CC => Condition::CS, + + Condition::MI => Condition::PL, + Condition::PL => Condition::MI, + + Condition::VS => Condition::VC, + Condition::VC => Condition::VS, + + Condition::HI => Condition::LS, + Condition::LS => Condition::HI, + + Condition::LT => Condition::GE, + Condition::GE => Condition::LT, + + Condition::GT => Condition::LE, + Condition::LE => Condition::GT, + + Condition::AL => Condition::AL, + + _ => panic!("Unknown condition") + + } + } +} \ No newline at end of file diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs new file mode 100644 index 00000000000000..9bf4a8ea1322b3 --- /dev/null +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -0,0 +1,16 @@ +// This module contains various A64 instruction arguments and the logic +// necessary to encode them. + +mod bitmask_imm; +mod condition; +mod sf; +mod shifted_imm; +mod sys_reg; +mod truncate; + +pub use bitmask_imm::BitmaskImmediate; +pub use condition::Condition; +pub use sf::Sf; +pub use shifted_imm::ShiftedImmediate; +pub use sys_reg::SystemRegister; +pub use truncate::{truncate_imm, truncate_uimm}; diff --git a/yjit/src/asm/arm64/arg/sf.rs b/yjit/src/asm/arm64/arg/sf.rs new file mode 100644 index 00000000000000..c2fd33302c1ef8 --- /dev/null +++ b/yjit/src/asm/arm64/arg/sf.rs @@ -0,0 +1,19 @@ +/// This is commonly the top-most bit in the encoding of the instruction, and +/// represents whether register operands should be treated as 64-bit registers +/// or 32-bit registers. +pub enum Sf { + Sf32 = 0b0, + Sf64 = 0b1 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Sf { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Sf::Sf64, + 32 => Sf::Sf32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} diff --git a/yjit/src/asm/arm64/arg/shifted_imm.rs b/yjit/src/asm/arm64/arg/shifted_imm.rs new file mode 100644 index 00000000000000..5d1eeaf26dab80 --- /dev/null +++ b/yjit/src/asm/arm64/arg/shifted_imm.rs @@ -0,0 +1,75 @@ +/// How much to shift the immediate by. +pub enum Shift { + LSL0 = 0b0, // no shift + LSL12 = 0b1 // logical shift left by 12 bits +} + +/// Some instructions accept a 12-bit immediate that has an optional shift +/// attached to it. This allows encoding larger values than just fit into 12 +/// bits. We attempt to encode those here. If the values are too large we have +/// to bail out. +pub struct ShiftedImmediate { + shift: Shift, + value: u16 +} + +impl TryFrom for ShiftedImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result { + let mut current = value; + if current < 2_u64.pow(12) { + return Ok(ShiftedImmediate { shift: Shift::LSL0, value: current as u16 }); + } + + if (current & (2_u64.pow(12) - 1) == 0) && ((current >> 12) < 2_u64.pow(12)) { + return Ok(ShiftedImmediate { shift: Shift::LSL12, value: (current >> 12) as u16 }); + } + + Err(()) + } +} + +impl From for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(imm: ShiftedImmediate) -> Self { + 0 + | (((imm.shift as u32) & 1) << 12) + | (imm.value as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_shift() { + let value = 256; + let result = ShiftedImmediate::try_from(value); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL0, value }))); + } + + #[test] + fn test_maximum_no_shift() { + let value = (1 << 12) - 1; + let result = ShiftedImmediate::try_from(value); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL0, value }))); + } + + #[test] + fn test_with_shift() { + let result = ShiftedImmediate::try_from(256 << 12); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL12, value: 256 }))); + } + + #[test] + fn test_unencodable() { + let result = ShiftedImmediate::try_from((256 << 12) + 1); + assert!(matches!(result, Err(()))); + } +} diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs new file mode 100644 index 00000000000000..41d71920cb74cc --- /dev/null +++ b/yjit/src/asm/arm64/arg/sys_reg.rs @@ -0,0 +1,6 @@ +/// The encoded representation of an A64 system register. +/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/ +pub enum SystemRegister { + /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en + NZCV = 0b1_011_0100_0010_000 +} diff --git a/yjit/src/asm/arm64/arg/truncate.rs b/yjit/src/asm/arm64/arg/truncate.rs new file mode 100644 index 00000000000000..52f2c012cb5b72 --- /dev/null +++ b/yjit/src/asm/arm64/arg/truncate.rs @@ -0,0 +1,66 @@ +// There are many instances in AArch64 instruction encoding where you represent +// an integer value with a particular bit width that isn't a power of 2. These +// functions represent truncating those integer values down to the appropriate +// number of bits. + +/// Truncate a signed immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// When the value is positive, this should effectively be a no-op since we're +/// just dropping leading zeroes. When the value is negative we should only be +/// dropping leading ones. +pub fn truncate_imm, const WIDTH: usize>(imm: T) -> u32 { + let value: i32 = imm.into(); + let masked = (value as u32) & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + if value >= 0 { + assert_eq!(value as u32, masked); + } else { + assert_eq!(value as u32, masked | (u32::MAX << WIDTH)); + } + + masked +} + +/// Truncate an unsigned immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// This should effectively be a no-op since we're just dropping leading zeroes. +pub fn truncate_uimm, const WIDTH: usize>(uimm: T) -> u32 { + let value: u32 = uimm.into(); + let masked = (value & ((1 << WIDTH) - 1)); + + // Assert that we didn't drop any bits by truncating. + assert_eq!(value, masked); + + masked +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_imm_positive() { + let inst = truncate_imm::(5); + let result: u32 = inst.into(); + assert_eq!(0b0101, result); + } + + #[test] + fn test_truncate_imm_negative() { + let inst = truncate_imm::(-5); + let result: u32 = inst.into(); + assert_eq!(0b1011, result); + } + + #[test] + fn test_truncate_uimm() { + let inst = truncate_uimm::(5); + let result: u32 = inst.into(); + assert_eq!(0b0101, result); + } +} diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs new file mode 100644 index 00000000000000..5ce497209ceb12 --- /dev/null +++ b/yjit/src/asm/arm64/inst/atomic.rs @@ -0,0 +1,86 @@ +/// The size of the register operands to this instruction. +enum Size { + /// Using 32-bit registers. + Size32 = 0b10, + + /// Using 64-bit registers. + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 atomic instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 | +/// | size rs.............. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Atomic { + /// The register holding the value to be loaded. + rt: u8, + + /// The base register. + rn: u8, + + /// The register holding the data value to be operated on. + rs: u8, + + /// The size of the registers used in this instruction. + size: Size +} + +impl Atomic { + /// LDADDAL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en + pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { + Self { rt, rn, rs, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Atomic) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b111 << 21) + | ((inst.rs as u32) << 16) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Atomic) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldaddal() { + let result: u32 = Atomic::ldaddal(20, 21, 22, 64).into(); + assert_eq!(0xf8f402d5, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs new file mode 100644 index 00000000000000..f15ef2a9b0fc23 --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch.rs @@ -0,0 +1,100 @@ +/// Which operation to perform. +enum Op { + /// Perform a BR instruction. + BR = 0b00, + + /// Perform a BLR instruction. + BLR = 0b01, + + /// Perform a RET instruction. + RET = 0b10 +} + +/// The struct that represents an A64 branch instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 | +/// | op... rn.............. rm.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Branch { + /// The register holding the address to be branched to. + rn: u8, + + /// The operation to perform. + op: Op +} + +impl Branch { + /// BR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en + pub fn br(rn: u8) -> Self { + Self { rn, op: Op::BR } + } + + /// BLR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en + pub fn blr(rn: u8) -> Self { + Self { rn, op: Op::BLR } + } + + /// RET + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en + pub fn ret(rn: u8) -> Self { + Self { rn, op: Op::RET } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Branch) -> Self { + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 25) + | ((inst.op as u32) << 21) + | (0b11111 << 16) + | ((inst.rn as u32) << 5) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Branch) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_br() { + let result: u32 = Branch::br(0).into(); + assert_eq!(0xd61f0000, result); + } + + #[test] + fn test_blr() { + let result: u32 = Branch::blr(0).into(); + assert_eq!(0xd63f0000, result); + } + + #[test] + fn test_ret() { + let result: u32 = Branch::ret(30).into(); + assert_eq!(0xd65f03C0, result); + } + + #[test] + fn test_ret_rn() { + let result: u32 = Branch::ret(20).into(); + assert_eq!(0xd65f0280, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs new file mode 100644 index 00000000000000..c489bacef05ac0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -0,0 +1,77 @@ +use super::super::arg::{Condition, truncate_imm}; + +/// The struct that represents an A64 conditional branch instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 1 0 0 0 | +/// | imm19........................................................... cond....... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct BranchCond { + /// The kind of condition to check before branching. + cond: u8, + + /// The instruction offset from this instruction to branch to. + imm19: i32 +} + +impl BranchCond { + /// B.cond + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- + pub fn bcond(cond: u8, imm19: i32) -> Self { + Self { cond, imm19 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: BranchCond) -> Self { + 0 + | (1 << 30) + | (FAMILY << 26) + | (truncate_imm::<_, 19>(inst.imm19) << 5) + | (inst.cond as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: BranchCond) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_b_eq() { + let result: u32 = BranchCond::bcond(Condition::EQ, 32).into(); + assert_eq!(0x54000400, result); + } + + #[test] + fn test_b_vs() { + let result: u32 = BranchCond::bcond(Condition::VS, 32).into(); + assert_eq!(0x54000406, result); + } + + #[test] + fn test_b_eq_max() { + let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into(); + assert_eq!(0x547fffe0, result); + } + + #[test] + fn test_b_eq_min() { + let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into(); + assert_eq!(0x54800000, result); + } +} diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs new file mode 100644 index 00000000000000..be4920ac7656ed --- /dev/null +++ b/yjit/src/asm/arm64/inst/breakpoint.rs @@ -0,0 +1,55 @@ +/// The struct that represents an A64 breakpoint instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 | +/// | imm16.................................................. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Breakpoint { + /// The value to be captured by ESR_ELx.ISS + imm16: u16 +} + +impl Breakpoint { + /// BRK + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction- + pub fn brk(imm16: u16) -> Self { + Self { imm16 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Breakpoint) -> Self { + let imm16 = inst.imm16 as u32; + + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 21) + | (imm16 << 5) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Breakpoint) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_brk() { + let result: u32 = Breakpoint::brk(7).into(); + assert_eq!(0xd42000e0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs new file mode 100644 index 00000000000000..32d924f799186f --- /dev/null +++ b/yjit/src/asm/arm64/inst/call.rs @@ -0,0 +1,104 @@ +use super::super::arg::truncate_imm; + +/// The operation to perform for this instruction. +enum Op { + /// Branch directly, with a hint that this is not a subroutine call or + /// return. + Branch = 0, + + /// Branch directly, with a hint that this is a subroutine call or return. + BranchWithLink = 1 +} + +/// The struct that represents an A64 branch with our without link instruction +/// that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 0 1 | +/// | op imm26.................................................................................... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Call { + /// The PC-relative offset to jump to (which will be multiplied by 4). + imm26: i32, + + /// The operation to perform for this instruction. + op: Op +} + +impl Call { + /// B + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- + pub fn b(imm26: i32) -> Self { + Self { imm26, op: Op::Branch } + } + + /// BL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en + pub fn bl(imm26: i32) -> Self { + Self { imm26, op: Op::BranchWithLink } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Call) -> Self { + 0 + | ((inst.op as u32) << 31) + | (FAMILY << 26) + | truncate_imm::<_, 26>(inst.imm26) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Call) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bl() { + let result: u32 = Call::bl(0).into(); + assert_eq!(0x94000000, result); + } + + #[test] + fn test_bl_positive() { + let result: u32 = Call::bl(256).into(); + assert_eq!(0x94000100, result); + } + + #[test] + fn test_bl_negative() { + let result: u32 = Call::bl(-256).into(); + assert_eq!(0x97ffff00, result); + } + + #[test] + fn test_b() { + let result: u32 = Call::b(0).into(); + assert_eq!(0x14000000, result); + } + + #[test] + fn test_b_positive() { + let result: u32 = Call::b((1 << 25) - 1).into(); + assert_eq!(0x15ffffff, result); + } + + #[test] + fn test_b_negative() { + let result: u32 = Call::b(-(1 << 25)).into(); + assert_eq!(0x16000000, result); + } +} diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs new file mode 100644 index 00000000000000..e1950e95b428d3 --- /dev/null +++ b/yjit/src/asm/arm64/inst/conditional.rs @@ -0,0 +1,73 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 conditional instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 1 0 1 0 1 0 0 0 0 | +/// | sf rm.............. cond....... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Conditional { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The condition to use for the conditional instruction. + cond: u8, + + /// The number of the second general-purpose source register. + rm: u8, + + /// The size of the registers of this instruction. + sf: Sf +} + +impl Conditional { + /// CSEL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en + pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self { + Self { rd, rn, cond, rm, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel +const FAMILY: u32 = 0b101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Conditional) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (1 << 28) + | (FAMILY << 25) + | (1 << 23) + | ((inst.rm as u32) << 16) + | ((inst.cond as u32) << 12) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Conditional) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::super::arg::Condition; + + #[test] + fn test_csel() { + let result: u32 = Conditional::csel(0, 1, 2, Condition::NE, 64).into(); + assert_eq!(0x9a821020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs new file mode 100644 index 00000000000000..b474b00a527d10 --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -0,0 +1,143 @@ +use super::super::arg::{Sf, ShiftedImmediate}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// The struct that represents an A64 data processing -- immediate instruction +/// that can be encoded. +/// +/// Add/subtract (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 1 0 | +/// | sf op S sh imm12.................................... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// How much to shift the immediate by. + imm: ShiftedImmediate, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl DataImm { + /// ADD (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en + pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() } + } + + /// ADDS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en + pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() } + } + + /// CMP (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en + pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self::subs(31, rn, imm, num_bits) + } + + /// SUB (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en + pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() } + } + + /// SUBS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en + pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: DataImm) -> Self { + let imm: u32 = inst.imm.into(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | (FAMILY << 25) + | (1 << 24) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: DataImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let inst = DataImm::add(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x91001c20, result); + } + + #[test] + fn test_adds() { + let inst = DataImm::adds(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb1001c20, result); + } + + #[test] + fn test_cmp() { + let inst = DataImm::cmp(0, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf1001c1f, result); + } + + #[test] + fn test_sub() { + let inst = DataImm::sub(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xd1001c20, result); + } + + #[test] + fn test_subs() { + let inst = DataImm::subs(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf1001c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs new file mode 100644 index 00000000000000..a742121f1fd172 --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -0,0 +1,192 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10 // arithmetic shift right (signed) +} + +/// The struct that represents an A64 data processing -- register instruction +/// that can be encoded. +/// +/// Add/subtract (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 1 0 | +/// | sf op S shift rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataReg { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The amount to shift the second operand register by. + imm6: u8, + + /// The register number of the second operand register. + rm: u8, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl DataReg { + /// ADD (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en + pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Add, + sf: num_bits.into() + } + } + + /// ADDS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en + pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Add, + sf: num_bits.into() + } + } + + /// CMP (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en + pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self { + Self::subs(31, rn, rm, num_bits) + } + + /// SUB (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en + pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Sub, + sf: num_bits.into() + } + } + + /// SUBS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en + pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Sub, + sf: num_bits.into() + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: DataReg) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | (FAMILY << 25) + | (1 << 24) + | ((inst.shift as u32) << 22) + | ((inst.rm as u32) << 16) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: DataReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let inst = DataReg::add(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0x8b020020, result); + } + + #[test] + fn test_adds() { + let inst = DataReg::adds(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xab020020, result); + } + + #[test] + fn test_cmp() { + let inst = DataReg::cmp(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xeb01001f, result); + } + + #[test] + fn test_sub() { + let inst = DataReg::sub(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xcb020020, result); + } + + #[test] + fn test_subs() { + let inst = DataReg::subs(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xeb020020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs new file mode 100644 index 00000000000000..c31d1f8945c120 --- /dev/null +++ b/yjit/src/asm/arm64/inst/halfword_imm.rs @@ -0,0 +1,179 @@ +use super::super::arg::truncate_imm; + +/// Whether this is a load or a store. +enum Op { + Load = 1, + Store = 0 +} + +/// The type of indexing to perform for this instruction. +enum Index { + /// No indexing. + None = 0b00, + + /// Mutate the register after the read. + PostIndex = 0b01, + + /// Mutate the register before the read. + PreIndex = 0b11 +} + +/// The struct that represents an A64 halfword instruction that can be encoded. +/// +/// LDRH/STRH +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 1 0 | +/// | op imm12.................................... rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +/// LDRH (pre-index/post-index) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 0 0 0 | +/// | op imm9.......................... index rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct HalfwordImm { + /// The number of the 32-bit register to be loaded. + rt: u8, + + /// The number of the 64-bit base register to calculate the memory address. + rn: u8, + + /// The type of indexing to perform for this instruction. + index: Index, + + /// The immediate offset from the base register. + imm: i16, + + /// The operation to perform. + op: Op +} + +impl HalfwordImm { + /// LDRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load } + } + + /// LDRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load } + } + + /// LDRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load } + } + + /// STRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store } + } + + /// STRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store } + } + + /// STRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b111100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: HalfwordImm) -> Self { + let (mut opc, imm) = match inst.index { + Index::None => { + assert_eq!(inst.imm & 1, 0, "immediate offset must be even"); + let imm12 = truncate_imm::<_, 12>(inst.imm / 2); + (0b100, imm12) + }, + Index::PreIndex | Index::PostIndex => { + let imm9 = truncate_imm::<_, 9>(inst.imm); + (0b000, (imm9 << 2) | (inst.index as u32)) + } + }; + + 0 + | (FAMILY << 25) + | ((opc | (inst.op as u32)) << 22) + | (imm << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: HalfwordImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldrh() { + let inst = HalfwordImm::ldrh(0, 1, 8); + let result: u32 = inst.into(); + assert_eq!(0x79401020, result); + } + + #[test] + fn test_ldrh_pre() { + let inst = HalfwordImm::ldrh_pre(0, 1, 16); + let result: u32 = inst.into(); + assert_eq!(0x78410c20, result); + } + + #[test] + fn test_ldrh_post() { + let inst = HalfwordImm::ldrh_post(0, 1, 24); + let result: u32 = inst.into(); + assert_eq!(0x78418420, result); + } + + #[test] + fn test_ldrh_post_negative() { + let inst = HalfwordImm::ldrh_post(0, 1, -24); + let result: u32 = inst.into(); + assert_eq!(0x785e8420, result); + } + + #[test] + fn test_strh() { + let inst = HalfwordImm::strh(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x79000020, result); + } + + #[test] + fn test_strh_pre() { + let inst = HalfwordImm::strh_pre(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000c20, result); + } + + #[test] + fn test_strh_post() { + let inst = HalfwordImm::strh_post(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000420, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs new file mode 100644 index 00000000000000..c5ab09713c0ea1 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -0,0 +1,89 @@ +use super::super::arg::truncate_imm; + +/// The size of the operands being operated on. +enum Opc { + Size32 = 0b00, + Size64 = 0b01, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Size64, + 32 => Opc::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load literal instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 0 0 | +/// | opc.. imm19........................................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadLiteral { + /// The number of the register to load the value into. + rt: u8, + + /// The PC-relative number of instructions to load the value from. + imm19: i32, + + /// The size of the operands being operated on. + opc: Opc +} + +impl LoadLiteral { + /// LDR (load literal) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en + pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self { + Self { rt, imm19, opc: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadLiteral) -> Self { + 0 + | ((inst.opc as u32) << 30) + | (1 << 28) + | (FAMILY << 25) + | (truncate_imm::<_, 19>(inst.imm19) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadLiteral) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_positive() { + let inst = LoadLiteral::ldr_literal(0, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x580000a0, result); + } + + #[test] + fn test_ldr_negative() { + let inst = LoadLiteral::ldr_literal(0, -5, 64); + let result: u32 = inst.into(); + assert_eq!(0x58ffff60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs new file mode 100644 index 00000000000000..3426b9ba5f9be0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_register.rs @@ -0,0 +1,108 @@ +/// Whether or not to shift the register. +enum S { + Shift = 1, + NoShift = 0 +} + +/// The option for this instruction. +enum Option { + UXTW = 0b010, + LSL = 0b011, + SXTW = 0b110, + SXTX = 0b111 +} + +/// The size of the operands of this instruction. +enum Size { + Size32 = 0b10, + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 1 1 1 0 | +/// | size. rm.............. option.. S rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadRegister { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// Whether or not to shift the value of the register. + s: S, + + /// The option associated with this instruction that controls the shift. + option: Option, + + /// The number of the offset register. + rm: u8, + + /// The size of the operands. + size: Size +} + +impl LoadRegister { + /// LDR + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en + pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadRegister) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b11 << 21) + | ((inst.rm as u32) << 16) + | ((inst.option as u32) << 13) + | ((inst.s as u32) << 12) + | (0b10 << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadRegister) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr() { + let inst = LoadRegister::ldr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8626820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs new file mode 100644 index 00000000000000..ea42f2d17f2d87 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_store.rs @@ -0,0 +1,215 @@ +use super::super::arg::truncate_imm; + +/// The size of the operands being operated on. +enum Size { + Size32 = 0b10, + Size64 = 0b11, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The operation to perform for this instruction. +enum Opc { + STR = 0b00, + LDR = 0b01, + LDURSW = 0b10 +} + +/// What kind of indexing to perform for this instruction. +enum Index { + None = 0b00, + PostIndex = 0b01, + PreIndex = 0b11 +} + +/// The struct that represents an A64 load or store instruction that can be +/// encoded. +/// +/// LDR/LDUR/LDURSW/STR/STUR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 | +/// | size. opc.. imm9.......................... idx.. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadStore { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// What kind of indexing to perform for this instruction. + idx: Index, + + /// The optional signed immediate byte offset from the base register. + imm9: i16, + + /// The operation to perform for this instruction. + opc: Opc, + + /// The size of the operands being operated on. + size: Size +} + +impl LoadStore { + /// LDR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDUR (load register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en + pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDURSW (load register, unscaled, signed) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en + pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 } + } + + /// STR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STUR (store register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en + pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadStore) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | ((inst.opc as u32) << 22) + | (truncate_imm::<_, 9>(inst.imm9) << 12) + | ((inst.idx as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadStore) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_post() { + let inst = LoadStore::ldr_post(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410420, result); + } + + #[test] + fn test_ldr_pre() { + let inst = LoadStore::ldr_pre(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410c20, result); + } + + #[test] + fn test_ldur() { + let inst = LoadStore::ldur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8400020, result); + } + + #[test] + fn test_ldur_with_imm() { + let inst = LoadStore::ldur(0, 1, 123, 64); + let result: u32 = inst.into(); + assert_eq!(0xf847b020, result); + } + + #[test] + fn test_ldursw() { + let inst = LoadStore::ldursw(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0xb8800020, result); + } + + #[test] + fn test_ldursw_with_imm() { + let inst = LoadStore::ldursw(0, 1, 123); + let result: u32 = inst.into(); + assert_eq!(0xb887b020, result); + } + + #[test] + fn test_str_post() { + let inst = LoadStore::str_post(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0420, result); + } + + #[test] + fn test_str_pre() { + let inst = LoadStore::str_pre(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0c20, result); + } + + #[test] + fn test_stur() { + let inst = LoadStore::stur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8000020, result); + } + + #[test] + fn test_stur_negative_offset() { + let inst = LoadStore::stur(0, 1, -1, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81ff020, result); + } + + #[test] + fn test_stur_positive_offset() { + let inst = LoadStore::stur(0, 1, 255, 64); + let result: u32 = inst.into(); + assert_eq!(0xf80ff020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs new file mode 100644 index 00000000000000..73eec8b37c305f --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -0,0 +1,154 @@ +use super::super::arg::{BitmaskImmediate, Sf}; + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ORR operation. + Orr = 0b01, + + /// The EOR operation. + Eor = 0b10, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 bitwise immediate instruction that can be +/// encoded. +/// +/// AND/ORR/ANDS (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 0 | +/// | sf opc.. N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to test. + imm: BitmaskImmediate, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalImm { + /// AND (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en + pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } + } + + /// EOR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate-- + pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() } + } + + /// MOV (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en + pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- + pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en + pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self::ands(31, rn, imm, num_bits) + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm +const FAMILY: u32 = 0b1001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalImm) -> Self { + let imm: u32 = inst.imm.into(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x92400820, result); + } + + #[test] + fn test_ands() { + let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf2400820, result); + } + + #[test] + fn test_eor() { + let inst = LogicalImm::eor(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xd2400820, result); + } + + #[test] + fn test_mov() { + let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb200f3e0, result); + } + + #[test] + fn test_orr() { + let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb2400820, result); + } + + #[test] + fn test_tst() { + let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf240083f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs new file mode 100644 index 00000000000000..a96805c9f961c0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -0,0 +1,207 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// Whether or not this is a NOT instruction. +enum N { + /// This is not a NOT instruction. + No = 0, + + /// This is a NOT instruction. + Yes = 1 +} + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10, // arithmetic shift right (signed) + ROR = 0b11 // rotate right (unsigned) +} + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ORR operation. + Orr = 0b01, + + /// The EOR operation. + Eor = 0b10, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 logical register instruction that can be +/// encoded. +/// +/// AND/ORR/ANDS (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 | +/// | sf opc.. shift N rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalReg { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The amount to shift the second operand register. + imm6: u8, + + /// The register number of the second operand register. + rm: u8, + + /// Whether or not this is a NOT instruction. + n: N, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalReg { + /// AND (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en + pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } + + /// EOR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register-- + pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() } + } + + /// MOV (register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en + pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// MVN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en + pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-- + pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- + pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en + pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalReg) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | ((inst.shift as u32) << 22) + | ((inst.n as u32) << 21) + | ((inst.rm as u32) << 16) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalReg::and(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0x8a020020, result); + } + + #[test] + fn test_ands() { + let inst = LogicalReg::ands(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xea020020, result); + } + + #[test] + fn test_eor() { + let inst = LogicalReg::eor(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xca020020, result); + } + + #[test] + fn test_mov() { + let inst = LogicalReg::mov(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa0103e0, result); + } + + #[test] + fn test_mvn() { + let inst = LogicalReg::mvn(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa2103e0, result); + } + + #[test] + fn test_orn() { + let inst = LogicalReg::orn(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa220020, result); + } + + #[test] + fn test_orr() { + let inst = LogicalReg::orr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa020020, result); + } + + #[test] + fn test_tst() { + let inst = LogicalReg::tst(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xea01001f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs new file mode 100644 index 00000000000000..b3a77e73c98eb7 --- /dev/null +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -0,0 +1,48 @@ +// This module contains various A64 instructions and the logic necessary to +// encode them into u32s. + +mod atomic; +mod branch; +mod branch_cond; +mod breakpoint; +mod call; +mod conditional; +mod data_imm; +mod data_reg; +mod halfword_imm; +mod load_literal; +mod load_register; +mod load_store; +mod logical_imm; +mod logical_reg; +mod mov; +mod nop; +mod pc_rel; +mod reg_pair; +mod sbfm; +mod shift_imm; +mod sys_reg; +mod test_bit; + +pub use atomic::Atomic; +pub use branch::Branch; +pub use branch_cond::BranchCond; +pub use breakpoint::Breakpoint; +pub use call::Call; +pub use conditional::Conditional; +pub use data_imm::DataImm; +pub use data_reg::DataReg; +pub use halfword_imm::HalfwordImm; +pub use load_literal::LoadLiteral; +pub use load_register::LoadRegister; +pub use load_store::LoadStore; +pub use logical_imm::LogicalImm; +pub use logical_reg::LogicalReg; +pub use mov::Mov; +pub use nop::Nop; +pub use pc_rel::PCRelative; +pub use reg_pair::RegisterPair; +pub use sbfm::SBFM; +pub use shift_imm::ShiftImm; +pub use sys_reg::SysReg; +pub use test_bit::TestBit; diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs new file mode 100644 index 00000000000000..e7cb9215b0774f --- /dev/null +++ b/yjit/src/asm/arm64/inst/mov.rs @@ -0,0 +1,155 @@ +use super::super::arg::Sf; + +/// Which operation is being performed. +enum Op { + /// A movz operation which zeroes out the other bits. + MOVZ = 0b10, + + /// A movk operation which keeps the other bits in place. + MOVK = 0b11 +} + +/// How much to shift the immediate by. +enum Hw { + LSL0 = 0b00, + LSL16 = 0b01, + LSL32 = 0b10, + LSL48 = 0b11 +} + +impl From for Hw { + fn from(shift: u8) -> Self { + match shift { + 0 => Hw::LSL0, + 16 => Hw::LSL16, + 32 => Hw::LSL32, + 48 => Hw::LSL48, + _ => panic!("Invalid value for shift: {}", shift) + } + } +} + +/// The struct that represents a MOVK or MOVZ instruction. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 1 | +/// | sf op... hw... imm16.................................................. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Mov { + /// The register number of the destination register. + rd: u8, + + /// The value to move into the register. + imm16: u16, + + /// The shift of the value to move. + hw: Hw, + + /// Which operation is being performed. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl Mov { + /// MOVK + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en + pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } + } + + /// MOVZ + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en + pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Mov) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 29) + | (FAMILY << 25) + | (0b101 << 23) + | ((inst.hw as u32) << 21) + | ((inst.imm16 as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Mov) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_movk_unshifted() { + let inst = Mov::movk(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2800f60, result); + } + + #[test] + fn test_movk_shifted_16() { + let inst = Mov::movk(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2A00f60, result); + } + + #[test] + fn test_movk_shifted_32() { + let inst = Mov::movk(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2C00f60, result); + } + + #[test] + fn test_movk_shifted_48() { + let inst = Mov::movk(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2e00f60, result); + } + + #[test] + fn test_movz_unshifted() { + let inst = Mov::movz(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2800f60, result); + } + + #[test] + fn test_movz_shifted_16() { + let inst = Mov::movz(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2a00f60, result); + } + + #[test] + fn test_movz_shifted_32() { + let inst = Mov::movz(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2c00f60, result); + } + + #[test] + fn test_movz_shifted_48() { + let inst = Mov::movz(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2e00f60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs new file mode 100644 index 00000000000000..a99f8d34b7a9e0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/nop.rs @@ -0,0 +1,44 @@ +/// The struct that represents an A64 nop instruction that can be encoded. +/// +/// NOP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Nop; + +impl Nop { + /// NOP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation- + pub fn nop() -> Self { + Self {} + } +} + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Nop) -> Self { + 0b11010101000000110010000000011111 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Nop) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nop() { + let inst = Nop::nop(); + let result: u32 = inst.into(); + assert_eq!(0xd503201f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs new file mode 100644 index 00000000000000..fa330cb9d6b934 --- /dev/null +++ b/yjit/src/asm/arm64/inst/pc_rel.rs @@ -0,0 +1,107 @@ +/// Which operation to perform for the PC-relative instruction. +enum Op { + /// Form a PC-relative address. + ADR = 0, + + /// Form a PC-relative address to a 4KB page. + ADRP = 1 +} + +/// The struct that represents an A64 PC-relative address instruction that can +/// be encoded. +/// +/// ADR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 0 | +/// | op immlo immhi........................................................... rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct PCRelative { + /// The number for the general-purpose register to load the address into. + rd: u8, + + /// The number of bytes to add to the PC to form the address. + imm: i32, + + /// Which operation to perform for this instruction. + op: Op +} + +impl PCRelative { + /// ADR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address- + pub fn adr(rd: u8, imm: i32) -> Self { + Self { rd, imm, op: Op::ADR } + } + + /// ADRP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- + pub fn adrp(rd: u8, imm: i32) -> Self { + Self { rd, imm: imm >> 12, op: Op::ADRP } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: PCRelative) -> Self { + let immlo = (inst.imm & 0b11) as u32; + let mut immhi = ((inst.imm >> 2) & ((1 << 18) - 1)) as u32; + + // Toggle the sign bit if necessary. + if inst.imm < 0 { + immhi |= (1 << 18); + } + + 0 + | ((inst.op as u32) << 31) + | (immlo << 29) + | (FAMILY << 25) + | (immhi << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: PCRelative) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_adr_positive() { + let inst = PCRelative::adr(0, 5); + let result: u32 = inst.into(); + assert_eq!(0x30000020, result); + } + + #[test] + fn test_adr_negative() { + let inst = PCRelative::adr(0, -5); + let result: u32 = inst.into(); + assert_eq!(0x70ffffc0, result); + } + + #[test] + fn test_adrp_positive() { + let inst = PCRelative::adrp(0, 0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90000020, result); + } + + #[test] + fn test_adrp_negative() { + let inst = PCRelative::adrp(0, -0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90ffffe0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs new file mode 100644 index 00000000000000..87690e3b4ab0e1 --- /dev/null +++ b/yjit/src/asm/arm64/inst/reg_pair.rs @@ -0,0 +1,212 @@ +use super::super::arg::truncate_imm; + +/// The operation to perform for this instruction. +enum Opc { + /// When the registers are 32-bits wide. + Opc32 = 0b00, + + /// When the registers are 64-bits wide. + Opc64 = 0b10 +} + +/// The kind of indexing to perform for this instruction. +enum Index { + StorePostIndex = 0b010, + LoadPostIndex = 0b011, + StoreSignedOffset = 0b100, + LoadSignedOffset = 0b101, + StorePreIndex = 0b110, + LoadPreIndex = 0b111 +} + +/// A convenience function so that we can convert the number of bits of a +/// register operand directly into an Opc variant. +impl From for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Opc64, + 32 => Opc::Opc32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 register pair instruction that can be +/// encoded. +/// +/// STP/LDP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 0 | +/// | opc index..... imm7.................... rt2............. rn.............. rt1............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct RegisterPair { + /// The number of the first register to be transferred. + rt1: u8, + + /// The number of the base register. + rn: u8, + + /// The number of the second register to be transferred. + rt2: u8, + + /// The signed immediate byte offset, a multiple of 8. + imm7: i16, + + /// The kind of indexing to use for this instruction. + index: Index, + + /// The operation to be performed (in terms of size). + opc: Opc +} + +impl RegisterPair { + /// Create a register pair instruction with a given indexing mode. + fn new(rt1: u8, rt2: u8, rn: u8, disp: i16, index: Index, num_bits: u8) -> Self { + Self { rt1, rn, rt2, imm7: disp / 8, index, opc: num_bits.into() } + } + + /// LDP (signed offset) + /// LDP , , [{, #}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits) + } + + /// LDP (pre-index) + /// LDP , , [, #]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits) + } + + /// LDP (post-index) + /// LDP , , [], # + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits) + } + + /// STP (signed offset) + /// STP , , [{, #}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits) + } + + /// STP (pre-index) + /// STP , , [, #]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits) + } + + /// STP (post-index) + /// STP , , [], # + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits) + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: RegisterPair) -> Self { + 0 + | ((inst.opc as u32) << 30) + | (1 << 29) + | (FAMILY << 25) + | ((inst.index as u32) << 22) + | (truncate_imm::<_, 7>(inst.imm7) << 15) + | ((inst.rt2 as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt1 as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: RegisterPair) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldp() { + let inst = RegisterPair::ldp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9400440, result); + } + + #[test] + fn test_ldp_maximum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa95f8440, result); + } + + #[test] + fn test_ldp_minimum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9600440, result); + } + + #[test] + fn test_ldp_pre() { + let inst = RegisterPair::ldp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9d00440, result); + } + + #[test] + fn test_ldp_post() { + let inst = RegisterPair::ldp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8d00440, result); + } + + #[test] + fn test_stp() { + let inst = RegisterPair::stp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9000440, result); + } + + #[test] + fn test_stp_maximum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa91f8440, result); + } + + #[test] + fn test_stp_minimum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9200440, result); + } + + #[test] + fn test_stp_pre() { + let inst = RegisterPair::stp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9900440, result); + } + + #[test] + fn test_stp_post() { + let inst = RegisterPair::stp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8900440, result); + } +} diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs new file mode 100644 index 00000000000000..860299898040fc --- /dev/null +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -0,0 +1,103 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// The struct that represents an A64 signed bitfield move instruction that can +/// be encoded. +/// +/// SBFM +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SBFM { + /// The number for the general-purpose register to load the value into. + rd: u8, + + /// The number for the general-purpose register to copy from. + rn: u8, + + /// The leftmost bit number to be moved from the source. + imms: u8, + + // The right rotate amount. + immr: u8, + + /// Whether or not this is a 64-bit operation. + n: bool, + + /// The size of this operation. + sf: Sf +} + +impl SBFM { + /// ASR + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en + pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + let (imms, n) = if num_bits == 64 { + (0b111111, true) + } else { + (0b011111, false) + }; + + Self { rd, rn, immr: shift, imms, n, sf: num_bits.into() } + } + + /// SXTW + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en + pub fn sxtw(rd: u8, rn: u8) -> Self { + Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b1001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SBFM) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (FAMILY << 25) + | (1 << 24) + | ((inst.n as u32) << 22) + | (truncate_uimm::<_, 6>(inst.immr) << 16) + | (truncate_uimm::<_, 6>(inst.imms) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SBFM) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_asr_32_bits() { + let inst = SBFM::asr(0, 1, 2, 32); + let result: u32 = inst.into(); + assert_eq!(0x13027c20, result); + } + + #[test] + fn test_asr_64_bits() { + let inst = SBFM::asr(10, 11, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x9345fd6a, result); + } + + #[test] + fn test_sxtw() { + let inst = SBFM::sxtw(0, 1); + let result: u32 = inst.into(); + assert_eq!(0x93407c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs new file mode 100644 index 00000000000000..3d2685a997b1a2 --- /dev/null +++ b/yjit/src/asm/arm64/inst/shift_imm.rs @@ -0,0 +1,147 @@ +use super::super::arg::Sf; + +/// The operation to perform for this instruction. +enum Opc { + /// Logical left shift + LSL, + + /// Logical shift right + LSR +} + +/// The struct that represents an A64 unsigned bitfield move instruction that +/// can be encoded. +/// +/// LSL (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct ShiftImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to shift by. + shift: u8, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl ShiftImm { + /// LSL (immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en + pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() } + } + + /// LSR (immediate) + /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en + pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() } + } + + /// Returns a triplet of (n, immr, imms) encoded in u32s for this + /// instruction. This mirrors how they will be encoded in the actual bits. + fn bitmask(&self) -> (u32, u32, u32) { + match self.opc { + // The key insight is a little buried in the docs, but effectively: + // LSL , , # == UBFM , , #(- MOD 32), #(31-) + // LSL , , # == UBFM , , #(- MOD 64), #(63-) + Opc::LSL => { + let shift = -(self.shift as i16); + + match self.sf { + Sf::Sf32 => ( + 0, + (shift.rem_euclid(32) & 0x3f) as u32, + ((31 - self.shift) & 0x3f) as u32 + ), + Sf::Sf64 => ( + 1, + (shift.rem_euclid(64) & 0x3f) as u32, + ((63 - self.shift) & 0x3f) as u32 + ) + } + }, + // Similar to LSL: + // LSR , , # == UBFM , , #, #31 + // LSR , , # == UBFM , , #, #63 + Opc::LSR => { + match self.sf { + Sf::Sf32 => (0, (self.shift & 0x3f) as u32, 31), + Sf::Sf64 => (1, (self.shift & 0x3f) as u32, 63) + } + } + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b10011; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: ShiftImm) -> Self { + let (n, immr, imms) = inst.bitmask(); + + 0 + | ((inst.sf as u32) << 31) + | (1 << 30) + | (FAMILY << 24) + | (n << 22) + | (immr << 16) + | (imms << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: ShiftImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lsl_32() { + let inst = ShiftImm::lsl(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53196020, result); + } + + #[test] + fn test_lsl_64() { + let inst = ShiftImm::lsl(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd379e020, result); + } + + #[test] + fn test_lsr_32() { + let inst = ShiftImm::lsr(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53077c20, result); + } + + #[test] + fn test_lsr_64() { + let inst = ShiftImm::lsr(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd347fc20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs new file mode 100644 index 00000000000000..108737a870a92f --- /dev/null +++ b/yjit/src/asm/arm64/inst/sys_reg.rs @@ -0,0 +1,86 @@ +use super::super::arg::SystemRegister; + +/// Which operation to perform (loading or storing the system register value). +enum L { + /// Store the value of a general-purpose register in a system register. + MSR = 0, + + /// Store the value of a system register in a general-purpose register. + MRS = 1 +} + +/// The struct that represents an A64 system register instruction that can be +/// encoded. +/// +/// MSR/MRS (register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 1 | +/// | L o0 op1..... CRn........ CRm........ op2..... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SysReg { + /// The register to load the system register value into. + rt: u8, + + /// Which system register to load or store. + systemreg: SystemRegister, + + /// Which operation to perform (loading or storing the system register value). + l: L +} + +impl SysReg { + /// MRS (register) + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en + pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self { + SysReg { rt, systemreg, l: L::MRS } + } + + /// MSR (register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en + pub fn msr(systemreg: SystemRegister, rt: u8) -> Self { + SysReg { rt, systemreg, l: L::MSR } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove +const FAMILY: u32 = 0b110101010001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SysReg) -> Self { + 0 + | (FAMILY << 20) + | ((inst.l as u32) << 21) + | ((inst.systemreg as u32) << 5) + | inst.rt as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SysReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mrs() { + let inst = SysReg::mrs(0, SystemRegister::NZCV); + let result: u32 = inst.into(); + assert_eq!(0xd53b4200, result); + } + + #[test] + fn test_msr() { + let inst = SysReg::msr(SystemRegister::NZCV, 0); + let result: u32 = inst.into(); + assert_eq!(0xd51b4200, result); + } +} diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs new file mode 100644 index 00000000000000..c57a05ad2b83c6 --- /dev/null +++ b/yjit/src/asm/arm64/inst/test_bit.rs @@ -0,0 +1,133 @@ +use super::super::arg::truncate_imm; + +/// The upper bit of the bit number to test. +#[derive(Debug)] +enum B5 { + /// When the bit number is below 32. + B532 = 0, + + /// When the bit number is equal to or above 32. + B564 = 1 +} + +/// A convenience function so that we can convert the bit number directly into a +/// B5 variant. +impl From for B5 { + fn from(bit_num: u8) -> Self { + match bit_num { + 0..=31 => B5::B532, + 32..=63 => B5::B564, + _ => panic!("Invalid bit number: {}", bit_num) + } + } +} + +/// The operation to perform for this instruction. +enum Op { + /// The test bit zero operation. + TBZ = 0, + + /// The test bit not zero operation. + TBNZ = 1 +} + +/// The struct that represents an A64 test bit instruction that can be encoded. +/// +/// TBNZ/TBZ +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 1 1 | +/// | b5 op b40............. imm14.......................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct TestBit { + /// The number of the register to test. + rt: u8, + + /// The PC-relative offset to the target instruction in term of number of + /// instructions. + imm14: i16, + + /// The lower 5 bits of the bit number to be tested. + b40: u8, + + /// The operation to perform for this instruction. + op: Op, + + /// The upper bit of the bit number to test. + b5: B5 +} + +impl TestBit { + /// TBNZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en + pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() } + } + + /// TBZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en + pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b11011; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: TestBit) -> Self { + let b40 = (inst.b40 & 0b11111) as u32; + let imm14 = truncate_imm::<_, 14>(inst.imm14); + + 0 + | ((inst.b5 as u32) << 31) + | (FAMILY << 25) + | ((inst.op as u32) << 24) + | (b40 << 19) + | (imm14 << 5) + | inst.rt as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: TestBit) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tbnz() { + let inst = TestBit::tbnz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x37000000, result); + } + + #[test] + fn test_tbnz_negative() { + let inst = TestBit::tbnz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3707ffe0, result); + } + + #[test] + fn test_tbz() { + let inst = TestBit::tbz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x36000000, result); + } + + #[test] + fn test_tbz_negative() { + let inst = TestBit::tbz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3607ffe0, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs new file mode 100644 index 00000000000000..b73b3125e27646 --- /dev/null +++ b/yjit/src/asm/arm64/mod.rs @@ -0,0 +1,1440 @@ +#![allow(dead_code)] // For instructions and operands we're not currently using. + +use crate::asm::CodeBlock; + +mod arg; +mod inst; +mod opnd; + +use inst::*; + +// We're going to make these public to make using these things easier in the +// backend (so they don't have to have knowledge about the submodule). +pub use arg::*; +pub use opnd::*; + +/// Checks that a signed value fits within the specified number of bits. +pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { + let minimum = if num_bits == 64 { i64::MIN } else { -2_i64.pow((num_bits as u32) - 1) }; + let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; + + imm >= minimum && imm <= maximum +} + +/// Checks that an unsigned value fits within the specified number of bits. +pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { + let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; + + uimm <= maximum +} + +/// ADD - add rn and rm, put the result in rd, don't update flags +pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::add(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::sub(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::add(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to add instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADDS - add rn and rm, put the result in rd, update flags +pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::adds(rd.reg_no, rn.reg_no, imm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::subs(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::adds(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to adds instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADR - form a PC-relative address and load it into a register +pub fn adr(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); + + PCRelative::adr(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADRP - form a PC-relative address to a 4KB page and load it into a register. +/// This is effectively the same as ADR except that the immediate must be a +/// multiple of 4KB. +pub fn adrp(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 32), "The immediate operand must be 32 bits or less."); + + PCRelative::adrp(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// AND - and rn and rm, put the result in rd, don't update flags +pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to and instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ANDS - and rn and rm, put the result in rd, update flags +pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to ands instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ASR - arithmetic shift right rn by shift, put the result in rd, don't update +/// flags +pub fn asr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(shift)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(shift, 6), "The shift operand must be 6 bits or less."); + + SBFM::asr(rd.reg_no, rn.reg_no, shift.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to asr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset between two instructions fits into the branch with +/// or without link instruction. If it doesn't, then we have to load the value +/// into a register first. +pub const fn b_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 26) +} + +/// B - branch without link (offset is number of instructions to jump) +pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); + + Call::b(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to b instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset in number of instructions between two instructions +/// fits into the b.cond instruction. If it doesn't, then we have to load the +/// value into a register first, then use the b.cond instruction to skip past a +/// direct jump. +pub const fn bcond_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 21) && (offset & 0b11 == 0) +} + +/// B.cond - branch to target if condition is true +pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) { + let bytes: [u8; 4] = match byte_offset { + A64Opnd::Imm(imm) => { + assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); + + BranchCond::bcond(cond, (imm / 4) as i32).into() + }, + _ => panic!("Invalid operand combination to bcond instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BL - branch with link (offset is number of instructions to jump) +pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { + let bytes: [u8; 4] = match imm26 { + A64Opnd::Imm(imm26) => { + assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); + + Call::bl(imm26 as i32).into() + }, + _ => panic!("Invalid operand combination to bl instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// BLR - branch with link to a register +pub fn blr(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::blr(rn.reg_no).into(), + _ => panic!("Invalid operand to blr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BR - branch to a register +pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(), + _ => panic!("Invalid operand to br instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BRK - create a breakpoint +pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { + let bytes: [u8; 4] = match imm16 { + A64Opnd::None => Breakpoint::brk(0).into(), + A64Opnd::UImm(imm16) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + Breakpoint::brk(imm16 as u16).into() + }, + _ => panic!("Invalid operand combination to brk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// CMP - compare rn and rm, update flags +pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to cmp instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// CSEL - conditionally select between two registers +pub fn csel(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd, cond: u8) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + Conditional::csel(rd.reg_no, rn.reg_no, rm.reg_no, cond, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to csel instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// EOR - perform a bitwise XOR of rn and rm, put the result in rd, don't update flags +pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::eor(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::eor(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to eor instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDADDAL - atomic add with acquire and release semantics +pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rs, rt, rn) { + (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!( + rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits, + "All operands must be of the same size." + ); + + Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldaddal instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDP (signed offset) - load a pair of registers from memory +pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (pre-index) - load a pair of registers from memory, update the base pointer before loading it +pub fn ldp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (post-index) - load a pair of registers from memory, update the base pointer after loading it +pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDR - load a memory address into a register with a register offset +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn, rm) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDR - load a PC-relative memory address into a register +pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDRH - load a halfword from memory +pub fn ldrh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::ldrh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (pre-index) - load a halfword from memory, update the base pointer before loading it +pub fn ldrh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (post-index) - load a halfword from memory, update the base pointer after loading it +pub fn ldrh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not a memory address displacement fits into the maximum number of +/// bits such that it can be used without loading it into a register first. +pub fn mem_disp_fits_bits(disp: i32) -> bool { + imm_fits_bits(disp.into(), 9) +} + +/// LDR (post-index) - load a register from memory, update the base pointer after loading it +pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDR (pre-index) - load a register from memory, update the base pointer before loading it +pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDUR - load a memory address into a register +pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + + LoadStore::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() + }, + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operands for LDUR") + }; + + cb.write_bytes(&bytes); +} + +/// LDURSW - load a 32-bit memory address into a register and sign-extend it +pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldursw instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LSL - logical shift left a register by an immediate +pub fn lsl(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsl(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsl instruction") + }; + + cb.write_bytes(&bytes); +} + +/// LSR - logical shift right a register by an immediate +pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsr(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsr instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MOV - move a value in a register to another register +pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 }), A64Opnd::Reg(rm)) => { + assert!(rm.num_bits == 64, "Expected rm to be 64 bits"); + + DataImm::add(31, rm.reg_no, 0.try_into().unwrap(), 64).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 })) => { + assert!(rd.num_bits == 64, "Expected rd to be 64 bits"); + + DataImm::add(rd.reg_no, 31, 0.try_into().unwrap(), 64).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => { + LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { + LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mov instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MOVK - move a 16 bit immediate into a register, keep the other bits in place +pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MOVZ - move a 16 bit immediate into a register, zero the other bits +pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MRS - move a system register into a general-purpose register +pub fn mrs(cb: &mut CodeBlock, rt: A64Opnd, systemregister: SystemRegister) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::mrs(rt.reg_no, systemregister).into() + }, + _ => panic!("Invalid operand combination to mrs instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MSR - move a general-purpose register into a system register +pub fn msr(cb: &mut CodeBlock, systemregister: SystemRegister, rt: A64Opnd) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::msr(systemregister, rt.reg_no).into() + }, + _ => panic!("Invalid operand combination to msr instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MVN - move a value in a register to another register, negating it +pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mvn instruction") + }; + + cb.write_bytes(&bytes); +} + +/// NOP - no-operation, used for alignment purposes +pub fn nop(cb: &mut CodeBlock) { + let bytes: [u8; 4] = Nop::nop().into(); + + cb.write_bytes(&bytes); +} + +/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags +pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orn instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags +pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STP (signed offset) - store a pair of registers to memory +pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (pre-index) - store a pair of registers to memory, update the base pointer before loading it +pub fn stp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (post-index) - store a pair of registers to memory, update the base pointer after loading it +pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STR (post-index) - store a register to memory, update the base pointer after loading it +pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STR (pre-index) - store a register to memory, update the base pointer before loading it +pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STRH - store a halfword into memory +pub fn strh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::strh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (pre-index) - store a halfword into memory, update the base pointer before loading it +pub fn strh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (post-index) - store a halfword into memory, update the base pointer after loading it +pub fn strh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STUR - store a value in a register at a memory address +pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to stur instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// SUB - subtract rm from rn, put the result in rd, don't update flags +pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::sub(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::add(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::sub(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to sub instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SUBS - subtract rm from rn, put the result in rd, update flags +pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::subs(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::adds(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::subs(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to subs instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SXTW - sign extend a 32-bit register into a 64-bit register +pub fn sxtw(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn)) => { + assert_eq!(rd.num_bits, 64, "rd must be 64-bits wide."); + assert_eq!(rn.num_bits, 32, "rn must be 32-bits wide."); + + SBFM::sxtw(rd.reg_no, rn.reg_no).into() + }, + _ => panic!("Invalid operand combination to sxtw instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// RET - unconditionally return to a location in a register, defaults to X30 +pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::None => Branch::ret(30).into(), + A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(), + _ => panic!("Invalid operand to ret instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TBNZ - test bit and branch if not zero +pub fn tbnz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbnz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbnz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TBZ - test bit and branch if zero +pub fn tbz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TST - test the bits of a register against a mask, then update flags +pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size."); + + LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to tst instruction."), + }; + + cb.write_bytes(&bytes); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Check that the bytes for an instruction sequence match a hex string + fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { + let mut cb = super::CodeBlock::new_dummy(128); + run(&mut cb); + assert_eq!(format!("{:x}", cb), bytes); + } + + #[test] + fn test_imm_fits_bits() { + assert!(imm_fits_bits(i8::MAX.into(), 8)); + assert!(imm_fits_bits(i8::MIN.into(), 8)); + + assert!(imm_fits_bits(i16::MAX.into(), 16)); + assert!(imm_fits_bits(i16::MIN.into(), 16)); + + assert!(imm_fits_bits(i32::MAX.into(), 32)); + assert!(imm_fits_bits(i32::MIN.into(), 32)); + + assert!(imm_fits_bits(i64::MAX.into(), 64)); + assert!(imm_fits_bits(i64::MIN.into(), 64)); + } + + #[test] + fn test_uimm_fits_bits() { + assert!(uimm_fits_bits(u8::MAX.into(), 8)); + assert!(uimm_fits_bits(u16::MAX.into(), 16)); + assert!(uimm_fits_bits(u32::MAX.into(), 32)); + assert!(uimm_fits_bits(u64::MAX.into(), 64)); + } + + #[test] + fn test_add_reg() { + check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); + } + + #[test] + fn test_add_uimm() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_add_imm_positive() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_add_imm_negative() { + check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adds_reg() { + check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); + } + + #[test] + fn test_adds_uimm() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_adds_imm_positive() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_adds_imm_negatve() { + check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adr() { + check_bytes("aa000010", |cb| adr(cb, X10, A64Opnd::new_imm(20))); + } + + #[test] + fn test_adrp() { + check_bytes("4a000090", |cb| adrp(cb, X10, A64Opnd::new_imm(0x8000))); + } + + #[test] + fn test_and_register() { + check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); + } + + #[test] + fn test_and_immediate() { + check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_ands_register() { + check_bytes("200002ea", |cb| ands(cb, X0, X1, X2)); + } + + #[test] + fn test_ands_immediate() { + check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_asr() { + check_bytes("b4fe4a93", |cb| asr(cb, X20, X21, A64Opnd::new_uimm(10))); + } + + #[test] + fn test_bcond() { + check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400))); + } + + #[test] + fn test_b() { + check_bytes("ffffff15", |cb| b(cb, A64Opnd::new_imm((1 << 25) - 1))); + } + + #[test] + #[should_panic] + fn test_b_too_big() { + // There are 26 bits available + check_bytes("", |cb| b(cb, A64Opnd::new_imm(1 << 25))); + } + + #[test] + #[should_panic] + fn test_b_too_small() { + // There are 26 bits available + check_bytes("", |cb| b(cb, A64Opnd::new_imm(-(1 << 25) - 1))); + } + + #[test] + fn test_bl() { + check_bytes("00000096", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25)))); + } + + #[test] + #[should_panic] + fn test_bl_too_big() { + // There are 26 bits available + check_bytes("", |cb| bl(cb, A64Opnd::new_imm(1 << 25))); + } + + #[test] + #[should_panic] + fn test_bl_too_small() { + // There are 26 bits available + check_bytes("", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25) - 1))); + } + + #[test] + fn test_blr() { + check_bytes("80023fd6", |cb| blr(cb, X20)); + } + + #[test] + fn test_br() { + check_bytes("80021fd6", |cb| br(cb, X20)); + } + + #[test] + fn test_brk_none() { + check_bytes("000020d4", |cb| brk(cb, A64Opnd::None)); + } + + #[test] + fn test_brk_uimm() { + check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_cmp_register() { + check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); + } + + #[test] + fn test_cmp_immediate() { + check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_csel() { + check_bytes("6a018c9a", |cb| csel(cb, X10, X11, X12, Condition::EQ)); + } + + #[test] + fn test_eor_register() { + check_bytes("6a010cca", |cb| eor(cb, X10, X11, X12)); + } + + #[test] + fn test_eor_immediate() { + check_bytes("6a0940d2", |cb| eor(cb, X10, X11, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_ldaddal() { + check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); + } + + #[test] + fn test_ldp() { + check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_pre() { + check_bytes("8a2dcda9", |cb| ldp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_post() { + check_bytes("8a2dcda8", |cb| ldp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldr() { + check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12)); + } + + #[test] + fn test_ldr_literal() { + check_bytes("40010058", |cb| ldr_literal(cb, X0, 10)); + } + + #[test] + fn test_ldr_post() { + check_bytes("6a0541f8", |cb| ldr_post(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + + #[test] + fn test_ldr_pre() { + check_bytes("6a0d41f8", |cb| ldr_pre(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + + #[test] + fn test_ldrh() { + check_bytes("6a194079", |cb| ldrh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_pre() { + check_bytes("6acd4078", |cb| ldrh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_post() { + check_bytes("6ac54078", |cb| ldrh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldur_memory() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); + } + + #[test] + fn test_ldur_register() { + check_bytes("200040f8", |cb| ldur(cb, X0, X1)); + } + + #[test] + fn test_ldursw() { + check_bytes("6ab187b8", |cb| ldursw(cb, X10, A64Opnd::new_mem(64, X11, 123))); + } + + #[test] + fn test_lsl() { + check_bytes("6ac572d3", |cb| lsl(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_lsr() { + check_bytes("6afd4ed3", |cb| lsr(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_mov_registers() { + check_bytes("ea030baa", |cb| mov(cb, X10, X11)); + } + + #[test] + fn test_mov_immediate() { + check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555))); + } + + #[test] + fn test_mov_into_sp() { + check_bytes("1f000091", |cb| mov(cb, X31, X0)); + } + + #[test] + fn test_mov_from_sp() { + check_bytes("e0030091", |cb| mov(cb, X0, X31)); + } + + #[test] + fn test_movk() { + check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_movz() { + check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_mrs() { + check_bytes("0a423bd5", |cb| mrs(cb, X10, SystemRegister::NZCV)); + } + + #[test] + fn test_msr() { + check_bytes("0a421bd5", |cb| msr(cb, SystemRegister::NZCV, X10)); + } + + #[test] + fn test_mvn() { + check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); + } + + #[test] + fn test_nop() { + check_bytes("1f2003d5", |cb| nop(cb)); + } + + #[test] + fn test_orn() { + check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_register() { + check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_immediate() { + check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_ret_none() { + check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); + } + + #[test] + fn test_ret_register() { + check_bytes("80025fd6", |cb| ret(cb, X20)); + } + + #[test] + fn test_stp() { + check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_pre() { + check_bytes("8a2d8da9", |cb| stp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_post() { + check_bytes("8a2d8da8", |cb| stp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_str_post() { + check_bytes("6a051ff8", |cb| str_post(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + + #[test] + fn test_str_pre() { + check_bytes("6a0d1ff8", |cb| str_pre(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + + #[test] + fn test_strh() { + check_bytes("6a190079", |cb| strh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_pre() { + check_bytes("6acd0078", |cb| strh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_post() { + check_bytes("6ac50078", |cb| strh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_stur() { + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); + } + + #[test] + fn test_sub_reg() { + check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); + } + + #[test] + fn test_sub_uimm() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_sub_imm_positive() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_sub_imm_negative() { + check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_reg() { + check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); + } + + #[test] + fn test_subs_imm_positive() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_subs_imm_negative() { + check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_uimm() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_sxtw() { + check_bytes("6a7d4093", |cb| sxtw(cb, X10, W11)); + } + + #[test] + fn test_tbnz() { + check_bytes("4a005037", |cb| tbnz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + + #[test] + fn test_tbz() { + check_bytes("4a005036", |cb| tbz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + + #[test] + fn test_tst_register() { + check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); + } + + #[test] + fn test_tst_immediate() { + check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7))); + } +} diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs new file mode 100644 index 00000000000000..c89481fb03db2f --- /dev/null +++ b/yjit/src/asm/arm64/opnd.rs @@ -0,0 +1,187 @@ +use crate::asm::{imm_num_bits, uimm_num_bits}; + +/// This operand represents a register. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct A64Reg +{ + // Size in bits + pub num_bits: u8, + + // Register index number + pub reg_no: u8, +} + +impl A64Reg { + pub fn sub_reg(&self, num_bits: u8) -> Self { + assert!(num_bits == 32 || num_bits == 64); + assert!(num_bits <= self.num_bits); + + Self { num_bits, reg_no: self.reg_no } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct A64Mem +{ + // Size in bits + pub num_bits: u8, + + /// Base register number + pub base_reg_no: u8, + + /// Constant displacement from the base, not scaled + pub disp: i32, +} + +impl A64Mem { + pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + match reg { + A64Opnd::Reg(reg) => { + Self { num_bits, base_reg_no: reg.reg_no, disp } + }, + _ => panic!("Expected register operand") + } + } +} + +#[derive(Clone, Copy, Debug)] +pub enum A64Opnd +{ + // Dummy operand + None, + + // Immediate value + Imm(i64), + + // Unsigned immediate + UImm(u64), + + // Register + Reg(A64Reg), + + // Memory + Mem(A64Mem) +} + +impl A64Opnd { + /// Create a new immediate value operand. + pub fn new_imm(value: i64) -> Self { + A64Opnd::Imm(value) + } + + /// Create a new unsigned immediate value operand. + pub fn new_uimm(value: u64) -> Self { + A64Opnd::UImm(value) + } + + /// Creates a new memory operand. + pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + A64Opnd::Mem(A64Mem::new(num_bits, reg, disp)) + } + + /// Convenience function to check if this operand is a register. + pub fn is_reg(&self) -> bool { + match self { + A64Opnd::Reg(_) => true, + _ => false + } + } +} + +// argument registers +pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; +pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; +pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; +pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; +pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; +pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; + +// caller-save registers +pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; +pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 }; +pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 }; +pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; +pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; +pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 }; +pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 }; + +// callee-save registers +pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 }; +pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; +pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; +pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; + +// zero register +pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 }; + +// 64-bit registers +pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); +pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); +pub const X2: A64Opnd = A64Opnd::Reg(X2_REG); +pub const X3: A64Opnd = A64Opnd::Reg(X3_REG); +pub const X4: A64Opnd = A64Opnd::Reg(X4_REG); +pub const X5: A64Opnd = A64Opnd::Reg(X5_REG); +pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 }); +pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 }); +pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 }); +pub const X9: A64Opnd = A64Opnd::Reg(X9_REG); +pub const X10: A64Opnd = A64Opnd::Reg(X10_REG); +pub const X11: A64Opnd = A64Opnd::Reg(X11_REG); +pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); +pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); +pub const X14: A64Opnd = A64Opnd::Reg(X14_REG); +pub const X15: A64Opnd = A64Opnd::Reg(X15_REG); +pub const X16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 16 }); +pub const X17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 17 }); +pub const X18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 18 }); +pub const X19: A64Opnd = A64Opnd::Reg(X19_REG); +pub const X20: A64Opnd = A64Opnd::Reg(X20_REG); +pub const X21: A64Opnd = A64Opnd::Reg(X21_REG); +pub const X22: A64Opnd = A64Opnd::Reg(X22_REG); +pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); +pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); +pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); +pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); +pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); +pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); +pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); +pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); +pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG); + +// 32-bit registers +pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); +pub const W1: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 1 }); +pub const W2: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 2 }); +pub const W3: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 3 }); +pub const W4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 4 }); +pub const W5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 5 }); +pub const W6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 6 }); +pub const W7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 7 }); +pub const W8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 8 }); +pub const W9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 9 }); +pub const W10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 10 }); +pub const W11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 11 }); +pub const W12: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 12 }); +pub const W13: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 13 }); +pub const W14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 14 }); +pub const W15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 15 }); +pub const W16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 16 }); +pub const W17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 17 }); +pub const W18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 18 }); +pub const W19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 19 }); +pub const W20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 20 }); +pub const W21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 21 }); +pub const W22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 22 }); +pub const W23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 23 }); +pub const W24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 24 }); +pub const W25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 25 }); +pub const W26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 26 }); +pub const W27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 27 }); +pub const W28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 28 }); +pub const W29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 29 }); +pub const W30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 30 }); +pub const W31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 31 }); + +// C argument registers +pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; +pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index e16e8569251ad9..4029e2ca6745b5 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::mem; #[cfg(feature = "asm_comments")] @@ -9,6 +10,8 @@ use crate::virtualmem::{VirtualMem, CodePtr}; #[rustfmt::skip] pub mod x86_64; +pub mod arm64; + // // TODO: need a field_size_of macro, to compute the size of a struct field in bytes // @@ -20,6 +23,14 @@ struct LabelRef { // Label which this refers to label_idx: usize, + + /// The number of bytes that this label reference takes up in the memory. + /// It's necessary to know this ahead of time so that when we come back to + /// patch it it takes the same amount of space. + num_bytes: usize, + + /// The object that knows how to encode the branch instruction. + encode: fn(&mut CodeBlock, i64, i64) } /// Block of memory into which instructions can be assembled @@ -46,6 +57,10 @@ pub struct CodeBlock { #[cfg(feature = "asm_comments")] asm_comments: BTreeMap>, + // True for OutlinedCb + #[cfg(feature = "disasm")] + pub outlined: bool, + // Set if the CodeBlock is unable to output some instructions, // for example, when there is not enough space or when a jump // target is too far away. @@ -54,7 +69,7 @@ pub struct CodeBlock { impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: VirtualMem) -> Self { + pub fn new(mem_block: VirtualMem, outlined: bool) -> Self { Self { mem_size: mem_block.virtual_region_size(), mem_block, @@ -64,6 +79,8 @@ impl CodeBlock { label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), + #[cfg(feature = "disasm")] + outlined, dropped_bytes: false, } } @@ -110,10 +127,10 @@ impl CodeBlock { // Set the current write position pub fn set_pos(&mut self, pos: usize) { - // Assert here since while CodeBlock functions do bounds checking, there is - // nothing stopping users from taking out an out-of-bounds pointer and - // doing bad accesses with it. - assert!(pos < self.mem_size); + // No bounds check here since we can be out of bounds + // when the code block fills up. We want to be able to + // restore to the filled up state after patching something + // in the middle. self.write_pos = pos; } @@ -141,17 +158,17 @@ impl CodeBlock { self.set_pos(pos); } - // Get a direct pointer into the executable memory block + /// Get a (possibly dangling) direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { self.mem_block.start_ptr().add_bytes(offset) } - // Get a direct pointer to the current write position + /// Get a (possibly dangling) direct pointer to the current write position pub fn get_write_ptr(&mut self) -> CodePtr { self.get_ptr(self.write_pos) } - // Write a single byte at the current position + /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); @@ -162,15 +179,15 @@ impl CodeBlock { } } - // Write multiple bytes starting from the current position + /// Write multiple bytes starting from the current position. pub fn write_bytes(&mut self, bytes: &[u8]) { for byte in bytes { self.write_byte(*byte); } } - // Write a signed integer over a given number of bits at the current position - pub fn write_int(&mut self, val: u64, num_bits: u32) { + /// Write an integer over the given number of bits at the current position. + fn write_int(&mut self, val: u64, num_bits: u32) { assert!(num_bits > 0); assert!(num_bits % 8 == 0); @@ -212,22 +229,18 @@ impl CodeBlock { /// Write a label at the current address pub fn write_label(&mut self, label_idx: usize) { - // TODO: make sure that label_idx is valid - // TODO: add an asseer here - self.label_addrs[label_idx] = self.write_pos; } // Add a label reference at the current write position - pub fn label_ref(&mut self, label_idx: usize) { - // TODO: make sure that label_idx is valid - // TODO: add an asseer here + pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) { + assert!(label_idx < self.label_addrs.len()); // Keep track of the reference - self.label_refs.push(LabelRef { - pos: self.write_pos, - label_idx, - }); + self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode }); + + // Move past however many bytes the instruction takes up + self.write_pos += num_bytes; } // Link internal label references @@ -243,11 +256,12 @@ impl CodeBlock { let label_addr = self.label_addrs[label_idx]; assert!(label_addr < self.mem_size); - // Compute the offset from the reference's end to the label - let offset = (label_addr as i64) - ((ref_pos + 4) as i64); - self.set_pos(ref_pos); - self.write_int(offset as u64, 32); + (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); + + // Assert that we've written the same number of bytes that we + // expected to have written. + assert!(self.write_pos == ref_pos + label_ref.num_bytes); } self.write_pos = orig_pos; @@ -274,7 +288,18 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size); - Self::new(virt_mem) + Self::new(virt_mem, false) + } +} + +/// Produce hex string output from the bytes in a code block +impl<'a> fmt::LowerHex for CodeBlock { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + for pos in 0..self.write_pos { + let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; + fmtr.write_fmt(format_args!("{:02x}", byte))?; + } + Ok(()) } } @@ -294,3 +319,74 @@ impl OutlinedCb { &mut self.cb } } + +/// Compute the number of bits needed to encode a signed value +pub fn imm_num_bits(imm: i64) -> u8 +{ + // Compute the smallest size this immediate fits in + if imm >= i8::MIN.into() && imm <= i8::MAX.into() { + return 8; + } + if imm >= i16::MIN.into() && imm <= i16::MAX.into() { + return 16; + } + if imm >= i32::MIN.into() && imm <= i32::MAX.into() { + return 32; + } + + return 64; +} + +/// Compute the number of bits needed to encode an unsigned value +pub fn uimm_num_bits(uimm: u64) -> u8 +{ + // Compute the smallest size this immediate fits in + if uimm <= u8::MAX.into() { + return 8; + } + else if uimm <= u16::MAX.into() { + return 16; + } + else if uimm <= u32::MAX.into() { + return 32; + } + + return 64; +} + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn test_imm_num_bits() + { + assert_eq!(imm_num_bits(i8::MIN.into()), 8); + assert_eq!(imm_num_bits(i8::MAX.into()), 8); + + assert_eq!(imm_num_bits(i16::MIN.into()), 16); + assert_eq!(imm_num_bits(i16::MAX.into()), 16); + + assert_eq!(imm_num_bits(i32::MIN.into()), 32); + assert_eq!(imm_num_bits(i32::MAX.into()), 32); + + assert_eq!(imm_num_bits(i64::MIN.into()), 64); + assert_eq!(imm_num_bits(i64::MAX.into()), 64); + } + + #[test] + fn test_uimm_num_bits() { + assert_eq!(uimm_num_bits(u8::MIN.into()), 8); + assert_eq!(uimm_num_bits(u8::MAX.into()), 8); + + assert_eq!(uimm_num_bits(((u8::MAX as u16) + 1).into()), 16); + assert_eq!(uimm_num_bits(u16::MAX.into()), 16); + + assert_eq!(uimm_num_bits(((u16::MAX as u32) + 1).into()), 32); + assert_eq!(uimm_num_bits(u32::MAX.into()), 32); + + assert_eq!(uimm_num_bits(((u32::MAX as u64) + 1).into()), 64); + assert_eq!(uimm_num_bits(u64::MAX.into()), 64); + } +} diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 6eb7efaa0a0759..3f865b82a52fe4 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -5,24 +5,24 @@ use crate::asm::*; // Import the assembler tests module mod tests; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86Imm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: i64 + pub value: i64 } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86UImm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: u64 + pub value: u64 } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -34,36 +34,36 @@ pub enum RegType IP, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct X86Reg { // Size in bits - num_bits: u8, + pub num_bits: u8, // Register type - reg_type: RegType, + pub reg_type: RegType, // Register index number - reg_no: u8, + pub reg_no: u8, } #[derive(Clone, Copy, Debug)] pub struct X86Mem { // Size in bits - num_bits: u8, + pub num_bits: u8, /// Base register number - base_reg_no: u8, + pub base_reg_no: u8, /// Index register number - idx_reg_no: Option, + pub idx_reg_no: Option, /// SIB scale exponent value (power of two, two bits) - scale_exp: u8, + pub scale_exp: u8, /// Constant displacement from the base, not scaled - disp: i32, + pub disp: i32, } #[derive(Clone, Copy, Debug)] @@ -88,6 +88,25 @@ pub enum X86Opnd IPRel(i32) } +impl X86Reg { + pub fn sub_reg(&self, num_bits: u8) -> Self { + assert!( + num_bits == 8 || + num_bits == 16 || + num_bits == 32 || + num_bits == 64 + ); + + assert!(num_bits <= self.num_bits); + + Self { + num_bits, + reg_type: self.reg_type, + reg_no: self.reg_no + } + } +} + impl X86Opnd { fn rex_needed(&self) -> bool { match self { @@ -118,7 +137,7 @@ impl X86Opnd { X86Opnd::Mem(mem) => { if mem.disp != 0 { // Compute the required displacement size - let num_bits = sig_imm_size(mem.disp.into()); + let num_bits = imm_num_bits(mem.disp.into()); if num_bits > 32 { panic!("displacement does not fit in 32 bits"); } @@ -157,22 +176,39 @@ const RBP_REG_NO: u8 = 5; const R12_REG_NO: u8 = 12; const R13_REG_NO: u8 = 13; -pub const RAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO }); -pub const RCX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 }); -pub const RDX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 }); -pub const RBX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 }); -pub const RSP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO }); -pub const RBP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO }); -pub const RSI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 }); -pub const RDI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 }); -pub const R8: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 }); -pub const R9: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 }); -pub const R10: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 }); -pub const R11: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 }); -pub const R12: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO }); -pub const R13: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO }); -pub const R14: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 }); -pub const R15: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 }); +pub const RAX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO }; +pub const RCX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 }; +pub const RDX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 }; +pub const RBX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 }; +pub const RSP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO }; +pub const RBP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO }; +pub const RSI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 }; +pub const RDI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 }; +pub const R8_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 }; +pub const R9_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 }; +pub const R10_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 }; +pub const R11_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 }; +pub const R12_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO }; +pub const R13_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO }; +pub const R14_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 }; +pub const R15_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 }; + +pub const RAX: X86Opnd = X86Opnd::Reg(RAX_REG); +pub const RCX: X86Opnd = X86Opnd::Reg(RCX_REG); +pub const RDX: X86Opnd = X86Opnd::Reg(RDX_REG); +pub const RBX: X86Opnd = X86Opnd::Reg(RBX_REG); +pub const RSP: X86Opnd = X86Opnd::Reg(RSP_REG); +pub const RBP: X86Opnd = X86Opnd::Reg(RBP_REG); +pub const RSI: X86Opnd = X86Opnd::Reg(RSI_REG); +pub const RDI: X86Opnd = X86Opnd::Reg(RDI_REG); +pub const R8: X86Opnd = X86Opnd::Reg(R8_REG); +pub const R9: X86Opnd = X86Opnd::Reg(R9_REG); +pub const R10: X86Opnd = X86Opnd::Reg(R10_REG); +pub const R11: X86Opnd = X86Opnd::Reg(R11_REG); +pub const R12: X86Opnd = X86Opnd::Reg(R12_REG); +pub const R13: X86Opnd = X86Opnd::Reg(R13_REG); +pub const R14: X86Opnd = X86Opnd::Reg(R14_REG); +pub const R15: X86Opnd = X86Opnd::Reg(R15_REG); // 32-bit GP registers pub const EAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 }); @@ -197,7 +233,7 @@ pub const AX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType: pub const CX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 }); pub const DX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 }); pub const BX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 }); -pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 }); +//pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 }); pub const BP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 }); pub const SI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 }); pub const DI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 }); @@ -228,45 +264,8 @@ pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType:: pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 }); pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 }); -// C argument registers -pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9]; - //=========================================================================== -/// Compute the number of bits needed to encode a signed value -pub fn sig_imm_size(imm: i64) -> u8 -{ - // Compute the smallest size this immediate fits in - if imm >= i8::MIN.into() && imm <= i8::MAX.into() { - return 8; - } - if imm >= i16::MIN.into() && imm <= i16::MAX.into() { - return 16; - } - if imm >= i32::MIN.into() && imm <= i32::MAX.into() { - return 32; - } - - return 64; -} - -/// Compute the number of bits needed to encode an unsigned value -pub fn unsig_imm_size(imm: u64) -> u8 -{ - // Compute the smallest size this immediate fits in - if imm <= u8::MAX.into() { - return 8; - } - else if imm <= u16::MAX.into() { - return 16; - } - else if imm <= u32::MAX.into() { - return 32; - } - - return 64; -} - /// Shorthand for memory operand with base register and displacement pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd { @@ -345,12 +344,12 @@ static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits) pub fn imm_opnd(value: i64) -> X86Opnd { - X86Opnd::Imm(X86Imm { num_bits: sig_imm_size(value), value }) + X86Opnd::Imm(X86Imm { num_bits: imm_num_bits(value), value }) } pub fn uimm_opnd(value: u64) -> X86Opnd { - X86Opnd::UImm(X86UImm { num_bits: unsig_imm_size(value), value }) + X86Opnd::UImm(X86UImm { num_bits: uimm_num_bits(value), value }) } pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd @@ -602,7 +601,7 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r }, // R/M + UImm (_, X86Opnd::UImm(uimm)) => { - let num_bits = sig_imm_size(uimm.value.try_into().unwrap()); + let num_bits = imm_num_bits(uimm.value.try_into().unwrap()); if num_bits <= 8 { // 8-bit immediate @@ -701,14 +700,10 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) { /// call - Call to label with 32-bit offset pub fn call_label(cb: &mut CodeBlock, label_idx: usize) { - // Write the opcode - cb.write_byte(0xE8); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int(0, 32); + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE8); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// call - Indirect call with an R/M operand @@ -799,55 +794,54 @@ pub fn int3(cb: &mut CodeBlock) { cb.write_byte(0xcc); } -// Encode a relative jump to a label (direct or conditional) +// Encode a conditional relative jump to a label // Note: this always encodes a 32-bit offset -fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) { - // Write the opcode - if op0 != 0xff { - cb.write_byte(op0); - } - - cb.write_byte(op1); - - // Add a reference to the label - cb.label_ref(label_idx); - - // Relative 32-bit offset to be patched - cb.write_int( 0, 32); +fn write_jcc(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 6, |cb, src_addr, dst_addr| { + cb.write_byte(0x0F); + cb.write_byte(OP); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); } /// jcc - relative jumps to a label -pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); } -pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); } -pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); } -pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); } -pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); } -pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); } -pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); } -pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); } -pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); } -pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); } -pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); } -pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); } -pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); } -pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); } -pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); } -pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); } -pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); } -pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); } +pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } +pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x81>(cb, label_idx); } +pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8b>(cb, label_idx); } +pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x89>(cb, label_idx); } +pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x80>(cb, label_idx); } +pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8B>(cb, label_idx); } +pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x88>(cb, label_idx); } +pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } + +pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE9); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} /// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional) fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { @@ -996,7 +990,7 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(sig_imm_size(imm.value) <= (output_num_bits as u8)); + assert!(imm_num_bits(imm.value) <= (output_num_bits as u8)); cb.write_int(imm.value as u64, output_num_bits); }, // M + UImm @@ -1011,7 +1005,7 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { } let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; - assert!(sig_imm_size(uimm.value as i64) <= (output_num_bits as u8)); + assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8)); cb.write_int(uimm.value, output_num_bits); }, // * + Imm/UImm diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs index ffcc0634202937..92691803a30800 100644 --- a/yjit/src/asm/x86_64/tests.rs +++ b/yjit/src/asm/x86_64/tests.rs @@ -1,18 +1,6 @@ #![cfg(test)] use crate::asm::x86_64::*; -use std::fmt; - -/// Produce hex string output from the bytes in a code block -impl<'a> fmt::LowerHex for super::CodeBlock { - fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { - for pos in 0..self.write_pos { - let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; - fmtr.write_fmt(format_args!("{:02x}", byte))?; - } - Ok(()) - } -} /// Check that the bytes for an instruction sequence match a hex string fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs new file mode 100644 index 00000000000000..4f07bf80626d18 --- /dev/null +++ b/yjit/src/backend/arm64/mod.rs @@ -0,0 +1,1261 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use crate::asm::{CodeBlock}; +use crate::asm::arm64::*; +use crate::codegen::{JITState}; +use crate::cruby::*; +use crate::backend::ir::*; +use crate::virtualmem::CodePtr; + +// Use the arm64 register type for this platform +pub type Reg = A64Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(X19_REG); +pub const _EC: Opnd = Opnd::Reg(X20_REG); +pub const _SP: Opnd = Opnd::Reg(X21_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: i32 = 16; + +/// Map Opnd to A64Opnd +impl From for A64Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), + Opnd::Reg(reg) => A64Opnd::Reg(reg), + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for A64Opnd { + fn from(opnd: &Opnd) -> Self { + A64Opnd::from(*opnd) + } +} + +impl Assembler +{ + // A special scratch register for intermediate processing. + const SCRATCH0: A64Opnd = A64Opnd::Reg(X22_REG); + + /// Get the list of registers from which we will allocate on this platform + /// These are caller-saved registers + /// Note: we intentionally exclude C_RET_REG (X0) from this list + /// because of the way it's used in gen_leave() and gen_leave_exit() + pub fn get_alloc_regs() -> Vec { + vec![X11_REG, X12_REG, X13_REG] + } + + /// Get a list of all of the caller-saved registers + pub fn get_caller_save_regs() -> Vec { + vec![X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + } + + /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. + fn arm64_split(mut self) -> Assembler + { + /// When we're attempting to load a memory address into a register, the + /// displacement must fit into the maximum number of bits for an Op::Add + /// immediate. If it doesn't, we have to load the displacement into a + /// register first. + fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(Mem { base, disp, num_bits }) => { + if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { + asm.lea(opnd) + } else { + let disp = asm.load(Opnd::Imm(disp.into())); + let reg = match base { + MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), + MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } + }; + + asm.add(reg, disp) + } + }, + _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") + } + } + + /// When you're storing a register into a memory location or loading a + /// memory location into a register, the displacement from the base + /// register of the memory location must fit into 9 bits. If it doesn't, + /// then we need to load that memory address into a register first. + fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if mem_disp_fits_bits(mem.disp) { + opnd + } else { + let base = split_lea_operand(asm, opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only split memory addresses.") + } + } + + /// Any memory operands you're sending into an Op::Load instruction need + /// to be split in case their displacement doesn't fit into 9 bits. + fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(_) => { + let split_opnd = split_memory_address(asm, opnd); + asm.load(split_opnd) + }, + _ => asm.load(opnd) + } + } + + /// Operands that take the place of bitmask immediates must follow a + /// certain encoding. In this function we ensure that those operands + /// do follow that encoding, and if they don't then we load them first. + fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(imm) => { + if imm <= 0 { + asm.load(opnd) + } else if BitmaskImmediate::try_from(imm as u64).is_ok() { + Opnd::UImm(imm as u64) + } else { + asm.load(opnd) + } + }, + Opnd::UImm(uimm) => { + if BitmaskImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + + /// Operands that take the place of a shifted immediate must fit within + /// a certain size. If they don't then we need to load them first. + fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(_) => asm.load(opnd), + Opnd::UImm(uimm) => { + if ShiftedImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + + /// Returns the operands that should be used for a boolean logic + /// instruction. + fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + match (opnd0, opnd1) { + (Opnd::Reg(_), Opnd::Reg(_)) => { + (opnd0, opnd1) + }, + (reg_opnd @ Opnd::Reg(_), other_opnd) | + (other_opnd, reg_opnd @ Opnd::Reg(_)) => { + let opnd1 = split_bitmask_immediate(asm, other_opnd); + (reg_opnd, opnd1) + }, + _ => { + let opnd0 = split_load_operand(asm, opnd0); + let opnd1 = split_bitmask_immediate(asm, opnd1); + (opnd0, opnd1) + } + } + } + + /// Returns the operands that should be used for a csel instruction. + fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + let opnd0 = match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0, + _ => split_load_operand(asm, opnd0) + }; + + let opnd1 = match opnd1 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1, + _ => split_load_operand(asm, opnd1) + }; + + (opnd0, opnd1) + } + + let mut asm_local = Assembler::new_with_label_names(std::mem::take(&mut self.label_names)); + let asm = &mut asm_local; + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_mapped() { + // Here we're going to map the operands of the instruction to load + // any Opnd::Value operands into registers if they are heap objects + // such that only the Op::Load instruction needs to handle that + // case. If the values aren't heap objects then we'll treat them as + // if they were just unsigned integer. + let is_load = matches!(insn, Insn::Load { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + match opnd { + Opnd::Value(value) => { + if value.special_const_p() { + *opnd = Opnd::UImm(value.as_u64()); + } else if !is_load { + *opnd = asm.load(*opnd); + } + }, + _ => {} + }; + } + + match insn { + Insn::Add { left, right, .. } => { + match (left, right) { + (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { + asm.add(left, right); + }, + (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | + (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { + let opnd1 = split_shifted_immediate(asm, other_opnd); + asm.add(reg_opnd, opnd1); + }, + _ => { + let opnd0 = split_load_operand(asm, left); + let opnd1 = split_shifted_immediate(asm, right); + asm.add(opnd0, opnd1); + } + } + }, + Insn::And { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.and(opnd0, opnd1); + }, + Insn::Or { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.or(opnd0, opnd1); + }, + Insn::Xor { left, right, .. } => { + let (opnd0, opnd1) = split_boolean_operands(asm, left, right); + asm.xor(opnd0, opnd1); + }, + Insn::CCall { opnds, target, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // For each of the operands we're going to first load them + // into a register and then move them into the correct + // argument register. + // Note: the iteration order is reversed to avoid corrupting x0, + // which is both the return value and first argument register + for (idx, opnd) in opnds.into_iter().enumerate().rev() { + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + _ => split_load_operand(asm, opnd) + }; + + asm.mov(C_ARG_OPNDS[idx], value); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(target.unwrap_fun_ptr(), vec![]); + }, + Insn::Cmp { left, right } => { + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) + }; + + let opnd1 = split_shifted_immediate(asm, right); + asm.cmp(opnd0, opnd1); + }, + Insn::CRet(opnd) => { + if opnd != Opnd::Reg(C_RET_REG) { + let value = split_load_operand(asm, opnd); + asm.mov(C_RET_OPND, value); + } + asm.cret(C_RET_OPND); + }, + Insn::CSelZ { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_z(opnd0, opnd1); + }, + Insn::CSelNZ { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_nz(opnd0, opnd1); + }, + Insn::CSelE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_e(opnd0, opnd1); + }, + Insn::CSelNE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_ne(opnd0, opnd1); + }, + Insn::CSelL { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_l(opnd0, opnd1); + }, + Insn::CSelLE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_le(opnd0, opnd1); + }, + Insn::CSelG { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_g(opnd0, opnd1); + }, + Insn::CSelGE { truthy, falsy, .. } => { + let (opnd0, opnd1) = split_csel_operands(asm, truthy, falsy); + asm.csel_ge(opnd0, opnd1); + }, + Insn::IncrCounter { mem, value } => { + // We'll use LDADD later which only works with registers + // ... Load pointer into register + let counter_addr = split_lea_operand(asm, mem); + + // Load immediates into a register + let addend = match value { + opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd), + opnd => opnd, + }; + + asm.incr_counter(counter_addr, addend); + }, + Insn::JmpOpnd(opnd) => { + if let Opnd::Mem(_) = opnd { + let opnd0 = split_load_operand(asm, opnd); + asm.jmp_opnd(opnd0); + } else { + asm.jmp_opnd(opnd); + } + }, + Insn::Load { opnd, .. } => { + split_load_operand(asm, opnd); + }, + Insn::LoadSExt { opnd, .. } => { + match opnd { + // We only want to sign extend if the operand is a + // register, instruction output, or memory address that + // is 32 bits. Otherwise we'll just load the value + // directly since there's no need to sign extend. + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } | + Opnd::Mem(Mem { num_bits: 32, .. }) => { + asm.load_sext(opnd); + }, + _ => { + asm.load(opnd); + } + }; + }, + Insn::Mov { dest, src } => { + let value: Opnd = match (dest, src) { + // If the first operand is zero, then we can just use + // the zero register. + (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG), + // If the first operand is a memory operand, we're going + // to transform this into a store instruction, so we'll + // need to load this anyway. + (Opnd::Mem(_), Opnd::UImm(_)) => asm.load(src), + // The value that is being moved must be either a + // register or an immediate that can be encoded as a + // bitmask immediate. Otherwise, we'll need to split the + // move into multiple instructions. + _ => split_bitmask_immediate(asm, src) + }; + + // If we're attempting to load into a memory operand, then + // we'll switch over to the store instruction. Otherwise + // we'll use the normal mov instruction. + match dest { + Opnd::Mem(_) => { + let opnd0 = split_memory_address(asm, dest); + asm.store(opnd0, value); + }, + Opnd::Reg(_) => { + asm.mov(dest, value); + }, + _ => unreachable!() + }; + }, + Insn::Not { opnd, .. } => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, opnd), + _ => opnd + }; + + asm.not(opnd0); + }, + Insn::Store { dest, src } => { + // The displacement for the STUR instruction can't be more + // than 9 bits long. If it's longer, we need to load the + // memory address into a register first. + let opnd0 = split_memory_address(asm, dest); + + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match src { + Opnd::Reg(_) | Opnd::InsnOut { .. } => src, + _ => split_load_operand(asm, src) + }; + + asm.store(opnd0, opnd1); + }, + Insn::Sub { left, right, .. } => { + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) + }; + + let opnd1 = split_shifted_immediate(asm, right); + asm.sub(opnd0, opnd1); + }, + Insn::Test { left, right } => { + // The value being tested must be in a register, so if it's + // not already one we'll load it first. + let opnd0 = match left { + Opnd::Reg(_) | Opnd::InsnOut { .. } => left, + _ => split_load_operand(asm, left) + }; + + // The second value must be either a register or an + // unsigned immediate that can be encoded as a bitmask + // immediate. If it's not one of those, we'll need to load + // it first. + let opnd1 = split_bitmask_immediate(asm, right); + asm.test(opnd0, opnd1); + }, + _ => { + // If we have an output operand, then we need to replace it + // with a new output operand from the new assembler. + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(asm); + } + + asm_local + } + + /// Emit platform-specific machine code + /// Returns a list of GC offsets + pub fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Vec + { + /// Determine how many instructions it will take to represent moving + /// this value into a register. Note that the return value of this + /// function must correspond to how many instructions are used to + /// represent this load in the emit_load_value function. + fn emit_load_size(value: u64) -> u8 { + if BitmaskImmediate::try_from(value).is_ok() { + return 1; + } + + if value < (1 << 16) { + 1 + } else if value < (1 << 32) { + 2 + } else if value < (1 << 48) { + 3 + } else { + 4 + } + } + + /// Emit the required instructions to load the given value into the + /// given register. Our goal here is to use as few instructions as + /// possible to get this value into the register. + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; + } + return num_insns; + } + } + + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { + match target { + Target::CodePtr(dst_ptr) => { + let dst_addr = dst_ptr.into_i64(); + let src_addr = cb.get_write_ptr().into_i64(); + let offset = dst_addr - src_addr; + + let num_insns = if bcond_offset_fits_bits(offset) { + // If the jump offset fits into the conditional jump as + // an immediate value and it's properly aligned, then we + // can use the b.cond instruction directly. + bcond(cb, CONDITION, A64Opnd::new_imm(offset)); + + // Here we're going to return 1 because we've only + // written out 1 instruction. + 1 + } else { + // Otherwise, we need to load the address into a + // register and use the branch register instruction. + let dst_addr = dst_ptr.into_u64(); + let load_insns: i64 = emit_load_size(dst_addr).into(); + + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instructions used for branching. + bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4)); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + // Here we'll return the number of instructions that it + // took to write out the destination address + 1 for the + // b.cond and 1 for the br. + load_insns + 2 + }; + + // We need to make sure we have at least 6 instructions for + // every kind of jump for invalidation purposes, so we're + // going to write out padding nop instructions here. + for _ in num_insns..6 { nop(cb); } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - (src_addr - 4))); + }); + }, + Target::FunPtr(_) => unreachable!() + }; + } + + /// Emit a push instruction for the given operand by adding to the stack + /// pointer and then storing the given value. + fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { + str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); + } + + /// Emit a pop instruction into the given operand by loading the value + /// and then subtracting from the stack pointer. + fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { + ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec = Vec::new(); + + // For each instruction + let start_write_pos = cb.get_write_pos(); + for insn in &self.insns { + match insn { + Insn::Comment(text) => { + if cfg!(feature = "asm_comments") { + cb.add_comment(text); + } + }, + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + // Report back the current position in the generated code + Insn::PosMarker(pos_marker) => { + pos_marker(cb.get_write_ptr()); + } + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + + // Pad out the string to the next 4-byte boundary so that + // it's easy to jump past. + for _ in 0..(4 - ((text.len() + 1) % 4)) { + cb.write_byte(0); + } + }, + Insn::Add { left, right, out } => { + adds(cb, out.into(), left.into(), right.into()); + }, + Insn::FrameSetup => { + stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); + + // X29 (frame_pointer) = SP + mov(cb, X29, C_SP_REG); + }, + Insn::FrameTeardown => { + // SP = X29 (frame pointer) + mov(cb, C_SP_REG, X29); + + ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); + }, + Insn::Sub { left, right, out } => { + subs(cb, out.into(), left.into(), right.into()); + }, + Insn::And { left, right, out } => { + and(cb, out.into(), left.into(), right.into()); + }, + Insn::Or { left, right, out } => { + orr(cb, out.into(), left.into(), right.into()); + }, + Insn::Xor { left, right, out } => { + eor(cb, out.into(), left.into(), right.into()); + }, + Insn::Not { opnd, out } => { + mvn(cb, out.into(), opnd.into()); + }, + Insn::RShift { opnd, shift, out } => { + asr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::URShift { opnd, shift, out } => { + lsr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::LShift { opnd, shift, out } => { + lsl(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::Store { dest, src } => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + stur(cb, src.into(), dest.into()); + }, + Insn::Load { opnd, out } => { + match *opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + mov(cb, out.into(), opnd.into()); + }, + Opnd::UImm(uimm) => { + emit_load_value(cb, out.into(), uimm); + }, + Opnd::Imm(imm) => { + emit_load_value(cb, out.into(), imm as u64); + }, + Opnd::Mem(_) => { + ldur(cb, out.into(), opnd.into()); + }, + Opnd::Value(value) => { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, out.into(), 2); + b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + }, + Opnd::None => { + unreachable!("Attempted to load from None operand"); + } + }; + }, + Insn::LoadSExt { opnd, out } => { + match *opnd { + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } => { + sxtw(cb, out.into(), opnd.into()); + }, + Opnd::Mem(Mem { num_bits: 32, .. }) => { + ldursw(cb, out.into(), opnd.into()); + }, + _ => unreachable!() + }; + }, + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + Insn::Lea { opnd, out } => { + let opnd: A64Opnd = opnd.into(); + + match opnd { + A64Opnd::Mem(mem) => { + add( + cb, + out.into(), + A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), + A64Opnd::new_imm(mem.disp.into()) + ); + }, + _ => { + panic!("Op::Lea only accepts Opnd::Mem operands."); + } + }; + }, + Insn::LeaLabel { out, target, .. } => { + let label_idx = target.unwrap_label_idx(); + + cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); + }); + + mov(cb, out.into(), Self::SCRATCH0); + }, + Insn::CPush(opnd) => { + emit_push(cb, opnd.into()); + }, + Insn::CPop { out } => { + emit_pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + emit_pop(cb, opnd.into()); + }, + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + emit_push(cb, A64Opnd::Reg(reg)); + } + + // Push the flags/state register + mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); + emit_push(cb, Self::SCRATCH0); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + // Pop the state/flags register + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); + emit_pop(cb, Self::SCRATCH0); + + for reg in regs.into_iter().rev() { + emit_pop(cb, A64Opnd::Reg(reg)); + } + }, + Insn::CCall { target, .. } => { + // The offset to the call target in bytes + let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = target.unwrap_fun_ptr() as i64; + let offset = dst_addr - src_addr; + // The offset in instruction count for BL's immediate + let offset = offset / 4; + + // Use BL if the offset is short enough to encode as an immediate. + // Otherwise, use BLR with a register. + if b_offset_fits_bits(offset) { + bl(cb, A64Opnd::new_imm(offset)); + } else { + emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + blr(cb, Self::SCRATCH0); + } + }, + Insn::CRet { .. } => { + ret(cb, A64Opnd::None); + }, + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); + }, + Insn::Test { left, right } => { + tst(cb, left.into(), right.into()); + }, + Insn::JmpOpnd(opnd) => { + br(cb, opnd.into()); + }, + Insn::Jmp(target) => { + match target { + Target::CodePtr(dst_ptr) => { + let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = dst_ptr.into_i64(); + + // The offset between the two instructions in bytes. + // Note that when we encode this into a b + // instruction, we'll divide by 4 because it accepts + // the number of instructions to jump over. + let offset = dst_addr - src_addr; + let offset = offset / 4; + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + br(cb, Self::SCRATCH0); + for _ in num_insns..4 { + nop(cb); + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| { + b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4)); + }); + }, + _ => unreachable!() + }; + }, + Insn::Je(target) | Insn::Jz(target) => { + emit_conditional_jump::<{Condition::EQ}>(cb, *target); + }, + Insn::Jne(target) | Insn::Jnz(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, *target); + }, + Insn::Jl(target) => { + emit_conditional_jump::<{Condition::LT}>(cb, *target); + }, + Insn::Jbe(target) => { + emit_conditional_jump::<{Condition::LS}>(cb, *target); + }, + Insn::Jo(target) => { + emit_conditional_jump::<{Condition::VS}>(cb, *target); + }, + Insn::IncrCounter { mem, value } => { + ldaddal(cb, value.into(), value.into(), mem.into()); + }, + Insn::Breakpoint => { + brk(cb, A64Opnd::None); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); + }, + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); + }, + Insn::CSelL { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); + }, + Insn::CSelLE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); + }, + Insn::CSelG { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); + }, + Insn::CSelGE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadEntryExit => { + let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions + while (cb.get_write_pos() - start_write_pos) < jmp_len { + nop(cb); + } + } + }; + } + + // Invalidate icache for newly written out region so we don't run + // stale code. + #[cfg(not(test))] + { + let start = cb.get_ptr(start_write_pos).raw_ptr(); + let write_ptr = cb.get_write_ptr().raw_ptr(); + let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr(); + let end = std::cmp::min(write_ptr, codeblock_end); + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + + gc_offsets + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec + { + let mut asm = self.arm64_split().alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.arm64_emit(cb); + + if !cb.has_dropped_bytes() { + cb.link_labels(); + } + + gc_offsets + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, vec![X3_REG]); + + // Assert that only 2 instructions were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); + + // Testing that we pad the string to the nearest 4-byte boundary to make + // it easier to jump over. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_cpush_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpush_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_cpop_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpop_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_frame() { + let (mut asm, mut cb) = setup_asm(); + + asm.frame_setup(); + asm.frame_teardown(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_fits_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 80; + let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_does_not_fit_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 1 << 21; + let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into(); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_lea_label() { + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("label"); + let opnd = asm.lea_label(label); + + asm.write_label(label); + asm.bake_string("Hello, world!"); + asm.store(Opnd::mem(64, SP, 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_load() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 0)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that two instructions were written: LDUR and STUR. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: ADD, LDUR, and STUR. + assert_eq!(12, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_does_not_fit_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(Qnil)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that only two instructions were written since the value is an + // immediate. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_non_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that five instructions were written since the value is not an + // immediate and needs to be loaded into a register. + assert_eq!(20, cb.get_write_pos()); + } + + #[test] + fn test_emit_or() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_lshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_rshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_urshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_test() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_negative_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_xor() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + #[cfg(feature = "disasm")] + fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { + // Test drive Capstone with simple input + use capstone::prelude::*; + + let cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build()?; + + let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; + + match insns.as_ref() { + [insn] => { + assert_eq!(Some("movk"), insn.mnemonic()); + Ok(()) + } + _ => Err(capstone::Error::CustomError( + "expected to disassemble to movk", + )), + } + } +} diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs new file mode 100644 index 00000000000000..33a79a417922aa --- /dev/null +++ b/yjit/src/backend/ir.rs @@ -0,0 +1,1534 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use std::cell::Cell; +use std::fmt; +use std::convert::From; +use std::mem::take; +use crate::cruby::{VALUE}; +use crate::virtualmem::{CodePtr}; +use crate::asm::{CodeBlock, uimm_num_bits, imm_num_bits}; +use crate::core::{Context, Type, TempMapping}; +use crate::options::*; + +#[cfg(target_arch = "x86_64")] +use crate::backend::x86_64::*; + +#[cfg(target_arch = "aarch64")] +use crate::backend::arm64::*; + +pub const EC: Opnd = _EC; +pub const CFP: Opnd = _CFP; +pub const SP: Opnd = _SP; + +pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; +pub const C_RET_OPND: Opnd = _C_RET_OPND; + +// Memory operand base +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum MemBase +{ + Reg(u8), + InsnOut(usize), +} + +// Memory location +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct Mem +{ + // Base register number or instruction index + pub(super) base: MemBase, + + // Offset relative to the base pointer + pub(super) disp: i32, + + // Size in bits + pub(super) num_bits: u8, +} + +impl fmt::Debug for Mem { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?; + if self.disp != 0 { + let sign = if self.disp > 0 { '+' } else { '-' }; + write!(fmt, " {sign} {}", self.disp)?; + } + + write!(fmt, "]") + } +} + +/// Operand to an IR instruction +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Opnd +{ + None, // For insns with no output + + // Immediate Ruby value, may be GC'd, movable + Value(VALUE), + + // Output of a preceding instruction in this block + InsnOut{ idx: usize, num_bits: u8 }, + + // Low-level operands, for lowering + Imm(i64), // Raw signed immediate + UImm(u64), // Raw unsigned immediate + Mem(Mem), // Memory location + Reg(Reg), // Machine register +} + +impl fmt::Debug for Opnd { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + use Opnd::*; + match self { + Self::None => write!(fmt, "None"), + Value(val) => write!(fmt, "Value({val:?})"), + InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), + Imm(signed) => write!(fmt, "{signed:x}_i64"), + UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), + // Say Mem and Reg only once + Mem(mem) => write!(fmt, "{mem:?}"), + Reg(reg) => write!(fmt, "{reg:?}"), + } + } +} + +impl Opnd +{ + /// Convenience constructor for memory operands + pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { + match base { + Opnd::Reg(base_reg) => { + assert!(base_reg.num_bits == 64); + Opnd::Mem(Mem { + base: MemBase::Reg(base_reg.reg_no), + disp: disp, + num_bits: num_bits, + }) + }, + + Opnd::InsnOut{idx, num_bits } => { + assert!(num_bits == 64); + Opnd::Mem(Mem { + base: MemBase::InsnOut(idx), + disp: disp, + num_bits: num_bits, + }) + }, + + _ => unreachable!("memory operand with non-register base") + } + } + + /// Constructor for constant pointer operand + pub fn const_ptr(ptr: *const u8) -> Self { + Opnd::UImm(ptr as u64) + } + + pub fn is_some(&self) -> bool { + match *self { + Opnd::None => false, + _ => true, + } + } + + /// Unwrap a register operand + pub fn unwrap_reg(&self) -> Reg { + match self { + Opnd::Reg(reg) => *reg, + _ => unreachable!("trying to unwrap {:?} into reg", self) + } + } + + /// Get the size in bits for this operand if there is one. + fn num_bits(&self) -> Option { + match *self { + Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits), + Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits), + Opnd::InsnOut { num_bits, .. } => Some(num_bits), + _ => None + } + } + + /// Get the size in bits for register/memory operands. + pub fn rm_num_bits(&self) -> u8 { + self.num_bits().unwrap() + } + + /// Maps the indices from a previous list of instructions to a new list of + /// instructions. + pub fn map_index(self, indices: &Vec) -> Opnd { + match self { + Opnd::InsnOut { idx, num_bits } => { + Opnd::InsnOut { idx: indices[idx], num_bits } + } + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + Opnd::Mem(Mem { base: MemBase::InsnOut(indices[idx]), disp, num_bits }) + }, + _ => self + } + } + + /// When there aren't any operands to check against, this is the number of + /// bits that should be used for any given output variable. + const DEFAULT_NUM_BITS: u8 = 64; + + /// Determine the size in bits from the iterator of operands. If any of them + /// are different sizes this will panic. + pub fn match_num_bits_iter<'a>(opnds: impl Iterator) -> u8 { + let mut value: Option = None; + + for opnd in opnds { + if let Some(num_bits) = opnd.num_bits() { + match value { + None => { + value = Some(num_bits); + }, + Some(value) => { + assert_eq!(value, num_bits, "operands of incompatible sizes"); + } + }; + } + } + + value.unwrap_or(Self::DEFAULT_NUM_BITS) + } + + /// Determine the size in bits of the slice of the given operands. If any of + /// them are different sizes this will panic. + pub fn match_num_bits(opnds: &[Opnd]) -> u8 { + Self::match_num_bits_iter(opnds.iter()) + } +} + +impl From for Opnd { + fn from(value: usize) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: u64) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: i64) -> Self { + Opnd::Imm(value) + } +} + +impl From for Opnd { + fn from(value: i32) -> Self { + Opnd::Imm(value.try_into().unwrap()) + } +} + +impl From for Opnd { + fn from(value: u32) -> Self { + Opnd::UImm(value as u64) + } +} + +impl From for Opnd { + fn from(value: VALUE) -> Self { + Opnd::Value(value) + } +} + +/// Branch target (something that we can jump to) +/// for branch instructions +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Target +{ + CodePtr(CodePtr), // Pointer to a piece of YJIT-generated code (e.g. side-exit) + FunPtr(*const u8), // Pointer to a C function + Label(usize), // A label within the generated code +} + +impl Target +{ + pub fn unwrap_fun_ptr(&self) -> *const u8 { + match self { + Target::FunPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into fun ptr", self) + } + } + + pub fn unwrap_label_idx(&self) -> usize { + match self { + Target::Label(idx) => *idx, + _ => unreachable!("trying to unwrap {:?} into label", self) + } + } + + pub fn unwrap_code_ptr(&self) -> CodePtr { + match self { + Target::CodePtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into code ptr", self) + } + } +} + +impl From for Target { + fn from(code_ptr: CodePtr) -> Self { + Target::CodePtr(code_ptr) + } +} + +type PosMarkerFn = Box; + +/// YJIT IR instruction +pub enum Insn { + /// Add two operands together, and return the result as a new operand. + Add { left: Opnd, right: Opnd, out: Opnd }, + + /// This is the same as the OP_ADD instruction, except that it performs the + /// binary AND operation. + And { left: Opnd, right: Opnd, out: Opnd }, + + /// Bake a string directly into the instruction stream. + BakeString(String), + + // Trigger a debugger breakpoint + Breakpoint, + + /// Add a comment into the IR at the point that this instruction is added. + /// It won't have any impact on that actual compiled code. + Comment(String), + + /// Compare two operands + Cmp { left: Opnd, right: Opnd }, + + /// Pop a register from the C stack + CPop { out: Opnd }, + + /// Pop all of the caller-save registers and the flags from the C stack + CPopAll, + + /// Pop a register from the C stack and store it into another register + CPopInto(Opnd), + + /// Push a register onto the C stack + CPush(Opnd), + + /// Push all of the caller-save registers and the flags to the C stack + CPushAll, + + // C function call with N arguments (variadic) + CCall { opnds: Vec, target: Target, out: Opnd }, + + // C function return + CRet(Opnd), + + /// Conditionally select if equal + CSelE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater + CSelG { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater or equal + CSelGE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if less + CSelL { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if less or equal + CSelLE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if not equal + CSelNE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if not zero + CSelNZ { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if zero + CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Set up the frame stack as necessary per the architecture. + FrameSetup, + + /// Tear down the frame stack as necessary per the architecture. + FrameTeardown, + + // Atomically increment a counter + // Input: memory operand, increment value + // Produces no output + IncrCounter { mem: Opnd, value: Opnd }, + + /// Jump if below or equal + Jbe(Target), + + /// Jump if equal + Je(Target), + + /// Jump if lower + Jl(Target), + + // Unconditional jump to a branch target + Jmp(Target), + + // Unconditional jump which takes a reg/mem address operand + JmpOpnd(Opnd), + + /// Jump if not equal + Jne(Target), + + /// Jump if not zero + Jnz(Target), + + /// Jump if overflow + Jo(Target), + + /// Jump if zero + Jz(Target), + + // Add a label into the IR at the point that this instruction is added. + Label(Target), + + // Load effective address relative to the current instruction pointer. It + // accepts a single signed immediate operand. + LeaLabel { target: Target, out: Opnd }, + + // Load effective address + Lea { opnd: Opnd, out: Opnd }, + + /// Take a specific register. Signal the register allocator to not use it. + LiveReg { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a register. + Load { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a register and + // sign-extends it to a 64-bit value. + LoadSExt { opnd: Opnd, out: Opnd }, + + /// Shift a value left by a certain amount. + LShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // A low-level mov instruction. It accepts two operands. + Mov { dest: Opnd, src: Opnd }, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not { opnd: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary OR operation. + Or { left: Opnd, right: Opnd, out: Opnd }, + + /// Pad nop instructions to accomodate Op::Jmp in case the block is + /// invalidated. + PadEntryExit, + + // Mark a position in the generated code + PosMarker(PosMarkerFn), + + /// Shift a value right by a certain amount (signed). + RShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // Low-level instruction to store a value to memory. + Store { dest: Opnd, src: Opnd }, + + // This is the same as the OP_ADD instruction, except for subtraction. + Sub { left: Opnd, right: Opnd, out: Opnd }, + + // Bitwise AND test instruction + Test { left: Opnd, right: Opnd }, + + /// Shift a value right by a certain amount (unsigned). + URShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary XOR operation. + Xor { left: Opnd, right: Opnd, out: Opnd } +} + +impl Insn { + /// Create an iterator that will yield a non-mutable reference to each + /// operand in turn for this instruction. + pub(super) fn opnd_iter(&self) -> InsnOpndIterator { + InsnOpndIterator::new(self) + } + + /// Create an iterator that will yield a mutable reference to each operand + /// in turn for this instruction. + pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator { + InsnOpndMutIterator::new(self) + } + + /// Returns a string that describes which operation this instruction is + /// performing. This is used for debugging. + fn op(&self) -> &'static str { + match self { + Insn::Add { .. } => "Add", + Insn::And { .. } => "And", + Insn::BakeString(_) => "BakeString", + Insn::Breakpoint => "Breakpoint", + Insn::Comment(_) => "Comment", + Insn::Cmp { .. } => "Cmp", + Insn::CPop { .. } => "CPop", + Insn::CPopAll => "CPopAll", + Insn::CPopInto(_) => "CPopInto", + Insn::CPush(_) => "CPush", + Insn::CPushAll => "CPushAll", + Insn::CCall { .. } => "CCall", + Insn::CRet(_) => "CRet", + Insn::CSelE { .. } => "CSelE", + Insn::CSelG { .. } => "CSelG", + Insn::CSelGE { .. } => "CSelGE", + Insn::CSelL { .. } => "CSelL", + Insn::CSelLE { .. } => "CSelLE", + Insn::CSelNE { .. } => "CSelNE", + Insn::CSelNZ { .. } => "CSelNZ", + Insn::CSelZ { .. } => "CSelZ", + Insn::FrameSetup => "FrameSetup", + Insn::FrameTeardown => "FrameTeardown", + Insn::IncrCounter { .. } => "IncrCounter", + Insn::Jbe(_) => "Jbe", + Insn::Je(_) => "Je", + Insn::Jl(_) => "Jl", + Insn::Jmp(_) => "Jmp", + Insn::JmpOpnd(_) => "JmpOpnd", + Insn::Jne(_) => "Jne", + Insn::Jnz(_) => "Jnz", + Insn::Jo(_) => "Jo", + Insn::Jz(_) => "Jz", + Insn::Label(_) => "Label", + Insn::LeaLabel { .. } => "LeaLabel", + Insn::Lea { .. } => "Lea", + Insn::LiveReg { .. } => "LiveReg", + Insn::Load { .. } => "Load", + Insn::LoadSExt { .. } => "LoadSExt", + Insn::LShift { .. } => "LShift", + Insn::Mov { .. } => "Mov", + Insn::Not { .. } => "Not", + Insn::Or { .. } => "Or", + Insn::PadEntryExit => "PadEntryExit", + Insn::PosMarker(_) => "PosMarker", + Insn::RShift { .. } => "RShift", + Insn::Store { .. } => "Store", + Insn::Sub { .. } => "Sub", + Insn::Test { .. } => "Test", + Insn::URShift { .. } => "URShift", + Insn::Xor { .. } => "Xor" + } + } + + /// Return a non-mutable reference to the out operand for this instruction + /// if it has one. + pub fn out_opnd(&self) -> Option<&Opnd> { + match self { + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaLabel { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), + _ => None + } + } + + /// Return a mutable reference to the out operand for this instruction if it + /// has one. + pub fn out_opnd_mut(&mut self) -> Option<&mut Opnd> { + match self { + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaLabel { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), + _ => None + } + } + + /// Returns the target for this instruction if there is one. + pub fn target(&self) -> Option<&Target> { + match self { + Insn::Jbe(target) | + Insn::Je(target) | + Insn::Jl(target) | + Insn::Jmp(target) | + Insn::Jne(target) | + Insn::Jnz(target) | + Insn::Jo(target) | + Insn::Jz(target) | + Insn::LeaLabel { target, .. } => Some(target), + _ => None + } + } + + /// Returns the text associated with this instruction if there is some. + pub fn text(&self) -> Option<&String> { + match self { + Insn::BakeString(text) | + Insn::Comment(text) => Some(text), + _ => None + } + } +} + +/// An iterator that will yield a non-mutable reference to each operand in turn +/// for the given instruction. +pub(super) struct InsnOpndIterator<'a> { + insn: &'a Insn, + idx: usize, +} + +impl<'a> InsnOpndIterator<'a> { + fn new(insn: &'a Insn) -> Self { + Self { insn, idx: 0 } + } +} + +impl<'a> Iterator for InsnOpndIterator<'a> { + type Item = &'a Opnd; + + fn next(&mut self) -> Option { + match self.insn { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaLabel { .. } | + Insn::PadEntryExit | + Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Not { opnd, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnd) + }, + _ => None + } + }, + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnd0) + } + 1 => { + self.idx += 1; + Some(&opnd1) + } + _ => None + } + }, + Insn::CCall { opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + +/// An iterator that will yield each operand in turn for the given instruction. +pub(super) struct InsnOpndMutIterator<'a> { + insn: &'a mut Insn, + idx: usize, +} + +impl<'a> InsnOpndMutIterator<'a> { + fn new(insn: &'a mut Insn) -> Self { + Self { insn, idx: 0 } + } + + pub(super) fn next(&mut self) -> Option<&mut Opnd> { + match self.insn { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaLabel { .. } | + Insn::PadEntryExit | + Insn::PosMarker(_) => None, + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Not { opnd, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(opnd) + }, + _ => None + } + }, + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(opnd0) + } + 1 => { + self.idx += 1; + Some(opnd1) + } + _ => None + } + }, + Insn::CCall { opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &mut opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + +impl fmt::Debug for Insn { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}(", self.op())?; + + // Print list of operands + let mut opnd_iter = self.opnd_iter(); + if let Some(first_opnd) = opnd_iter.next() { + write!(fmt, "{first_opnd:?}")?; + } + for opnd in opnd_iter { + write!(fmt, ", {opnd:?}")?; + } + write!(fmt, ")")?; + + // Print text, target, and pos if they are present + if let Some(text) = self.text() { + write!(fmt, " {text:?}")? + } + if let Some(target) = self.target() { + write!(fmt, " target={target:?}")?; + } + + write!(fmt, " -> {:?}", self.out_opnd().unwrap_or(&Opnd::None)) + } +} + +/// Object into which we assemble instructions to be +/// optimized and lowered +pub struct Assembler +{ + pub(super) insns: Vec, + + /// Parallel vec with insns + /// Index of the last insn using the output of this insn + pub(super) live_ranges: Vec, + + /// Names of labels + pub(super) label_names: Vec, +} + +impl Assembler +{ + pub fn new() -> Self { + Self::new_with_label_names(Vec::default()) + } + + pub fn new_with_label_names(label_names: Vec) -> Self { + Self { + insns: Vec::default(), + live_ranges: Vec::default(), + label_names + } + } + + /// Build an Opnd::InsnOut from the current index of the assembler and the + /// given number of bits. + pub(super) fn next_opnd_out(&self, num_bits: u8) -> Opnd { + Opnd::InsnOut { idx: self.insns.len(), num_bits } + } + + /// Append an instruction onto the current list of instructions and update + /// the live ranges of any instructions whose outputs are being used as + /// operands to this instruction. + pub(super) fn push_insn(&mut self, insn: Insn) { + // Index of this instruction + let insn_idx = self.insns.len(); + + // If we find any InsnOut from previous instructions, we're going to + // update the live range of the previous instruction to point to this + // one. + for opnd in insn.opnd_iter() { + match opnd { + Opnd::InsnOut { idx, .. } => { + assert!(*idx < self.insns.len()); + self.live_ranges[*idx] = insn_idx; + } + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { + assert!(*idx < self.insns.len()); + self.live_ranges[*idx] = insn_idx; + } + _ => {} + } + } + + self.insns.push(insn); + self.live_ranges.push(insn_idx); + } + + /// Create a new label instance that we can jump to + pub fn new_label(&mut self, name: &str) -> Target + { + assert!(!name.contains(" "), "use underscores in label names, not spaces"); + + let label_idx = self.label_names.len(); + self.label_names.push(name.to_string()); + Target::Label(label_idx) + } + + /// Sets the out field on the various instructions that require allocated + /// registers because their output is used as the operand on a subsequent + /// instruction. This is our implementation of the linear scan algorithm. + pub(super) fn alloc_regs(mut self, regs: Vec) -> Assembler + { + //dbg!(&self); + + // First, create the pool of registers. + let mut pool: u32 = 0; + + // Mutate the pool bitmap to indicate that the register at that index + // has been allocated and is live. + fn alloc_reg(pool: &mut u32, regs: &Vec) -> Reg { + for (index, reg) in regs.iter().enumerate() { + if (*pool & (1 << index)) == 0 { + *pool |= 1 << index; + return *reg; + } + } + + unreachable!("Register spill not supported"); + } + + // Allocate a specific register + fn take_reg(pool: &mut u32, regs: &Vec, reg: &Reg) -> Reg { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + assert_eq!(*pool & (1 << reg_index), 0, "register already allocated"); + *pool |= 1 << reg_index; + } + + return *reg; + } + + // Mutate the pool bitmap to indicate that the given register is being + // returned as it is no longer used by the instruction that previously + // held it. + fn dealloc_reg(pool: &mut u32, regs: &Vec, reg: &Reg) { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + *pool &= !(1 << reg_index); + } + } + + let live_ranges: Vec = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_unmapped() { + // Check if this is the last instruction that uses an operand that + // spans more than one instruction. In that case, return the + // allocated register to the pool. + for opnd in insn.opnd_iter() { + match opnd { + Opnd::InsnOut { idx, .. } | + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { + // Since we have an InsnOut, we know it spans more that one + // instruction. + let start_index = *idx; + assert!(start_index < index); + + // We're going to check if this is the last instruction that + // uses this operand. If it is, we can return the allocated + // register to the pool. + if live_ranges[start_index] == index { + if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { + dealloc_reg(&mut pool, ®s, reg); + } else { + unreachable!("no register allocated for insn {:?}", insn); + } + } + } + _ => {} + } + } + + // C return values need to be mapped to the C return register + if matches!(insn, Insn::CCall { .. }) { + assert_eq!(pool, 0, "register lives past C function call"); + } + + // If this instruction is used by another instruction, + // we need to allocate a register to it + if live_ranges[index] != index { + // If we get to this point where the end of the live range is + // not equal to the index of the instruction, then it must be + // true that we set an output operand for this instruction. If + // it's not true, something has gone wrong. + assert!( + !matches!(insn.out_opnd(), None), + "Instruction output reused but no output operand set" + ); + + // This is going to be the output operand that we will set on + // the instruction. + let mut out_reg: Option = None; + + // C return values need to be mapped to the C return register + if matches!(insn, Insn::CCall { .. }) { + out_reg = Some(take_reg(&mut pool, ®s, &C_RET_REG)); + } + + // If this instruction's first operand maps to a register and + // this is the last use of the register, reuse the register + // We do this to improve register allocation on x86 + // e.g. out = add(reg0, reg1) + // reg0 = add(reg0, reg1) + if out_reg.is_none() { + let mut opnd_iter = insn.opnd_iter(); + + if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() { + if live_ranges[*idx] == index { + if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() { + out_reg = Some(take_reg(&mut pool, ®s, reg)); + } + } + } + } + + // Allocate a new register for this instruction if one is not + // already allocated. + if out_reg.is_none() { + out_reg = match &insn { + Insn::LiveReg { opnd, .. } => { + // Allocate a specific register + let reg = opnd.unwrap_reg(); + Some(take_reg(&mut pool, ®s, ®)) + }, + _ => { + Some(alloc_reg(&mut pool, ®s)) + } + }; + } + + // Set the output operand on the instruction + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + + // If we have gotten to this point, then we're sure we have an + // output operand on this instruction because the live range + // extends beyond the index of the instruction. + let out = insn.out_opnd_mut().unwrap(); + *out = Opnd::Reg(out_reg.unwrap().sub_reg(out_num_bits)); + } + + // Replace InsnOut operands by their corresponding register + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + match *opnd { + Opnd::InsnOut { idx, .. } => { + *opnd = *asm.insns[idx].out_opnd().unwrap(); + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + let base = MemBase::Reg(asm.insns[idx].out_opnd().unwrap().unwrap_reg().reg_no); + *opnd = Opnd::Mem(Mem { base, disp, num_bits }); + } + _ => {}, + } + } + + asm.push_insn(insn); + } + + assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); + asm + } + + /// Compile the instructions down to machine code + /// NOTE: should compile return a list of block labels to enable + /// compiling multiple blocks at a time? + pub fn compile(self, cb: &mut CodeBlock) -> Vec + { + #[cfg(feature = "disasm")] + let start_addr = cb.get_write_ptr().raw_ptr(); + + let alloc_regs = Self::get_alloc_regs(); + let gc_offsets = self.compile_with_regs(cb, alloc_regs); + + #[cfg(feature = "disasm")] + if get_option!(dump_disasm) && !cb.outlined { + use crate::disasm::disasm_addr_range; + let last_ptr = cb.get_write_ptr(); + let disasm = disasm_addr_range(cb, start_addr, last_ptr.raw_ptr() as usize - start_addr as usize); + if disasm.len() > 0 { + println!("{disasm}"); + } + } + gc_offsets + } + + /// Compile with a limited number of registers. Used only for unit tests. + pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> Vec + { + let mut alloc_regs = Self::get_alloc_regs(); + let alloc_regs = alloc_regs.drain(0..num_regs).collect(); + self.compile_with_regs(cb, alloc_regs) + } + + /// Consume the assembler by creating a new draining iterator. + pub fn into_draining_iter(self) -> AssemblerDrainingIterator { + AssemblerDrainingIterator::new(self) + } + + /// Consume the assembler by creating a new lookback iterator. + pub fn into_lookback_iter(self) -> AssemblerLookbackIterator { + AssemblerLookbackIterator::new(self) + } +} + +/// A struct that allows iterating through an assembler's instructions and +/// consuming them as it iterates. +pub struct AssemblerDrainingIterator { + insns: std::vec::IntoIter, + index: usize, + indices: Vec +} + +impl AssemblerDrainingIterator { + fn new(asm: Assembler) -> Self { + Self { + insns: asm.insns.into_iter(), + index: 0, + indices: Vec::default() + } + } + + /// When you're working with two lists of instructions, you need to make + /// sure you do some bookkeeping to align the indices contained within the + /// operands of the two lists. + /// + /// This function accepts the assembler that is being built and tracks the + /// end of the current list of instructions in order to maintain that + /// alignment. + pub fn map_insn_index(&mut self, asm: &mut Assembler) { + self.indices.push(asm.insns.len() - 1); + } + + /// Map an operand by using this iterator's list of mapped indices. + pub fn map_opnd(&self, opnd: Opnd) -> Opnd { + opnd.map_index(&self.indices) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the next list of instructions. + pub fn next_mapped(&mut self) -> Option<(usize, Insn)> { + self.next_unmapped().map(|(index, mut insn)| { + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + *opnd = opnd.map_index(&self.indices); + } + + (index, insn) + }) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the previous list of instructions. + pub fn next_unmapped(&mut self) -> Option<(usize, Insn)> { + let index = self.index; + self.index += 1; + self.insns.next().map(|insn| (index, insn)) + } +} + +/// A struct that allows iterating through references to an assembler's +/// instructions without consuming them. +pub struct AssemblerLookbackIterator { + asm: Assembler, + index: Cell +} + +impl AssemblerLookbackIterator { + fn new(asm: Assembler) -> Self { + Self { asm, index: Cell::new(0) } + } + + /// Fetches a reference to an instruction at a specific index. + pub fn get(&self, index: usize) -> Option<&Insn> { + self.asm.insns.get(index) + } + + /// Fetches a reference to an instruction in the list relative to the + /// current cursor location of this iterator. + pub fn get_relative(&self, difference: i32) -> Option<&Insn> { + let index: Result = self.index.get().try_into(); + let relative: Result = index.and_then(|value| (value + difference).try_into()); + relative.ok().and_then(|value| self.asm.insns.get(value)) + } + + /// Fetches the previous instruction relative to the current cursor location + /// of this iterator. + pub fn get_previous(&self) -> Option<&Insn> { + self.get_relative(-1) + } + + /// Fetches the next instruction relative to the current cursor location of + /// this iterator. + pub fn get_next(&self) -> Option<&Insn> { + self.get_relative(1) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the previous list of instructions. + pub fn next_unmapped(&self) -> Option<(usize, &Insn)> { + let index = self.index.get(); + self.index.set(index + 1); + self.asm.insns.get(index).map(|insn| (index, insn)) + } +} + +impl fmt::Debug for Assembler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Assembler\n")?; + + for (idx, insn) in self.insns.iter().enumerate() { + write!(fmt, " {idx:03} {insn:?}\n")?; + } + + Ok(()) + } +} + +impl Assembler { + #[must_use] + pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Add { left, right, out }); + out + } + + #[must_use] + pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::And { left, right, out }); + out + } + + pub fn bake_string(&mut self, text: &str) { + self.push_insn(Insn::BakeString(text.to_string())); + } + + pub fn breakpoint(&mut self) { + self.push_insn(Insn::Breakpoint); + } + + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); + self.push_insn(Insn::CCall { target: Target::FunPtr(fptr), opnds, out }); + out + } + + pub fn cmp(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn::Cmp { left, right }); + } + + pub fn comment(&mut self, text: &str) { + self.push_insn(Insn::Comment(text.to_string())); + } + + #[must_use] + pub fn cpop(&mut self) -> Opnd { + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); + self.push_insn(Insn::CPop { out }); + out + } + + pub fn cpop_all(&mut self) { + self.push_insn(Insn::CPopAll); + } + + pub fn cpop_into(&mut self, opnd: Opnd) { + self.push_insn(Insn::CPopInto(opnd)); + } + + pub fn cpush(&mut self, opnd: Opnd) { + self.push_insn(Insn::CPush(opnd)); + } + + pub fn cpush_all(&mut self) { + self.push_insn(Insn::CPushAll); + } + + pub fn cret(&mut self, opnd: Opnd) { + self.push_insn(Insn::CRet(opnd)); + } + + #[must_use] + pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelG { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelGE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelL { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelLE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelNE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelNZ { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelZ { truthy, falsy, out }); + out + } + + pub fn frame_setup(&mut self) { + self.push_insn(Insn::FrameSetup); + } + + pub fn frame_teardown(&mut self) { + self.push_insn(Insn::FrameTeardown); + } + + pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) { + self.push_insn(Insn::IncrCounter { mem, value }); + } + + pub fn jbe(&mut self, target: Target) { + self.push_insn(Insn::Jbe(target)); + } + + pub fn je(&mut self, target: Target) { + self.push_insn(Insn::Je(target)); + } + + pub fn jl(&mut self, target: Target) { + self.push_insn(Insn::Jl(target)); + } + + pub fn jmp(&mut self, target: Target) { + self.push_insn(Insn::Jmp(target)); + } + + pub fn jmp_opnd(&mut self, opnd: Opnd) { + self.push_insn(Insn::JmpOpnd(opnd)); + } + + pub fn jne(&mut self, target: Target) { + self.push_insn(Insn::Jne(target)); + } + + pub fn jnz(&mut self, target: Target) { + self.push_insn(Insn::Jnz(target)); + } + + pub fn jo(&mut self, target: Target) { + self.push_insn(Insn::Jo(target)); + } + + pub fn jz(&mut self, target: Target) { + self.push_insn(Insn::Jz(target)); + } + + #[must_use] + pub fn lea(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Lea { opnd, out }); + out + } + + #[must_use] + pub fn lea_label(&mut self, target: Target) -> Opnd { + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); + self.push_insn(Insn::LeaLabel { target, out }); + out + } + + #[must_use] + pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::LiveReg { opnd, out }); + out + } + + #[must_use] + pub fn load(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Load { opnd, out }); + out + } + + #[must_use] + pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::LoadSExt { opnd, out }); + out + } + + #[must_use] + pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::LShift { opnd, shift, out }); + out + } + + pub fn mov(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn::Mov { dest, src }); + } + + #[must_use] + pub fn not(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Not { opnd, out }); + out + } + + #[must_use] + pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Or { left, right, out }); + out + } + + pub fn pad_entry_exit(&mut self) { + self.push_insn(Insn::PadEntryExit); + } + + //pub fn pos_marker(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr) + 'static) { + self.push_insn(Insn::PosMarker(Box::new(marker_fn))); + } + + #[must_use] + pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::RShift { opnd, shift, out }); + out + } + + pub fn store(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn::Store { dest, src }); + } + + #[must_use] + pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Sub { left, right, out }); + out + } + + pub fn test(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn::Test { left, right }); + } + + #[must_use] + pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::URShift { opnd, shift, out }); + out + } + + /// Add a label at the current position + pub fn write_label(&mut self, target: Target) { + assert!(target.unwrap_label_idx() < self.label_names.len()); + self.push_insn(Insn::Label(target)); + } + + #[must_use] + pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Xor { left, right, out }); + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_opnd_iter() { + let insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; + + let mut opnd_iter = insn.opnd_iter(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); + } + + #[test] + fn test_opnd_iter_mut() { + let mut insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; + + let mut opnd_iter = insn.opnd_iter_mut(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); + } +} diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs new file mode 100644 index 00000000000000..47946950946438 --- /dev/null +++ b/yjit/src/backend/mod.rs @@ -0,0 +1,8 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + +#[cfg(target_arch = "aarch64")] +pub mod arm64; + +pub mod ir; +mod tests; diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs new file mode 100644 index 00000000000000..08e8849b4d5acd --- /dev/null +++ b/yjit/src/backend/tests.rs @@ -0,0 +1,343 @@ +#![cfg(test)] + +use crate::asm::{CodeBlock}; +use crate::virtualmem::{CodePtr}; +use crate::backend::ir::*; +use crate::cruby::*; +use crate::core::*; +use crate::utils::c_callable; +use InsnOpnd::*; + +// Test that this function type checks +fn gen_dup( + ctx: &mut Context, + asm: &mut Assembler, +) { + let dup_val = ctx.stack_pop(0); + let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); + + let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); + asm.mov(loc0, dup_val); +} + +fn guard_object_is_heap( + asm: &mut Assembler, + object_opnd: Opnd, + ctx: &mut Context, + side_exit: CodePtr, +) { + asm.comment("guard object is heap"); + + // Test that the object is not an immediate + asm.test(object_opnd.clone(), Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(Target::CodePtr(side_exit)); + + // Test that the object is not false or nil + asm.cmp(object_opnd.clone(), Opnd::UImm(Qnil.into())); + asm.jbe(Target::CodePtr(side_exit)); +} + +#[test] +fn test_add() { + let mut asm = Assembler::new(); + let out = asm.add(SP, Opnd::UImm(1)); + asm.add(out, Opnd::UImm(2)); +} + +#[test] +fn test_alloc_regs() { + let mut asm = Assembler::new(); + + // Get the first output that we're going to reuse later. + let out1 = asm.add(EC, Opnd::UImm(1)); + + // Pad some instructions in to make sure it can handle that. + asm.add(EC, Opnd::UImm(2)); + + // Get the second output we're going to reuse. + let out2 = asm.add(EC, Opnd::UImm(3)); + + // Pad another instruction. + asm.add(EC, Opnd::UImm(4)); + + // Reuse both the previously captured outputs. + asm.add(out1, out2); + + // Now get a third output to make sure that the pool has registers to + // allocate now that the previous ones have been returned. + let out3 = asm.add(EC, Opnd::UImm(5)); + asm.add(out3, Opnd::UImm(6)); + + // Here we're going to allocate the registers. + let result = asm.alloc_regs(Assembler::get_alloc_regs()); + + // Now we're going to verify that the out field has been appropriately + // updated for each of the instructions that needs it. + let regs = Assembler::get_alloc_regs(); + let reg0 = regs[0]; + let reg1 = regs[1]; + + assert!(matches!(result.insns[0].out_opnd(), Some(Opnd::Reg(reg0)))); + assert!(matches!(result.insns[2].out_opnd(), Some(Opnd::Reg(reg1)))); + assert!(matches!(result.insns[5].out_opnd(), Some(Opnd::Reg(reg0)))); +} + +fn setup_asm() -> (Assembler, CodeBlock) { + return ( + Assembler::new(), + CodeBlock::new_dummy(1024) + ); +} + +// Test full codegen pipeline +#[test] +fn test_compile() +{ + let (mut asm, mut cb) = setup_asm(); + let regs = Assembler::get_alloc_regs(); + + let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); + let out2 = asm.add(out, Opnd::UImm(2)); + asm.store(Opnd::mem(64, SP, 0), out2); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test memory-to-memory move +#[test] +fn test_mov_mem2mem() +{ + let (mut asm, mut cb) = setup_asm(); + + asm.comment("check that comments work too"); + asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test load of register into new register +#[test] +fn test_load_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let out = asm.load(SP); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test load of a GC'd value +#[test] +fn test_load_value() +{ + let (mut asm, mut cb) = setup_asm(); + + let gcd_value = VALUE(0xFFFFFFFFFFFF00); + assert!(!gcd_value.special_const_p()); + + let out = asm.load(Opnd::Value(gcd_value)); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Multiple registers needed and register reuse +#[test] +fn test_reuse_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); + let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); + + let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register + let v3 = asm.add(v0, v2); + + asm.store(Opnd::mem(64, SP, 0), v2); + asm.store(Opnd::mem(64, SP, 8), v3); + + asm.compile_with_num_regs(&mut cb, 2); +} + +// 64-bit values can't be written directly to memory, +// need to be split into one or more register movs first +#[test] +fn test_store_u64() +{ + let (mut asm, mut cb) = setup_asm(); + asm.store(Opnd::mem(64, SP, 0), u64::MAX.into()); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Use instruction output as base register for memory operand +#[test] +fn test_base_insn_out() +{ + let (mut asm, mut cb) = setup_asm(); + + // Forced register to be reused + // This also causes the insn sequence to change length + asm.mov( + Opnd::mem(64, SP, 8), + Opnd::mem(64, SP, 0) + ); + + // Load the pointer into a register + let ptr_reg = asm.load(Opnd::const_ptr(4351776248 as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, 1.into()); + + asm.compile_with_num_regs(&mut cb, 2); +} + +#[test] +fn test_c_call() +{ + c_callable! { + fn dummy_c_fun(v0: usize, v1: usize) {} + } + + let (mut asm, mut cb) = setup_asm(); + + let ret_val = asm.ccall( + dummy_c_fun as *const u8, + vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] + ); + + // Make sure that the call's return value is usable + asm.mov(Opnd::mem(64, SP, 0), ret_val); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_alloc_ccall_regs() { + let mut asm = Assembler::new(); + let out1 = asm.ccall(0 as *const u8, vec![]); + let out2 = asm.ccall(0 as *const u8, vec![out1]); + asm.mov(EC, out2); + let mut cb = CodeBlock::new_dummy(1024); + asm.compile_with_regs(&mut cb, Assembler::get_alloc_regs()); +} + +#[test] +fn test_lea_ret() +{ + let (mut asm, mut cb) = setup_asm(); + + let addr = asm.lea(Opnd::mem(64, SP, 0)); + asm.cret(addr); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_jcc_label() +{ + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("foo"); + asm.cmp(EC, EC); + asm.je(label); + asm.write_label(label); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_jcc_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + let side_exit = Target::CodePtr((5 as *mut u8).into()); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + asm.test( + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + not_mask, + ); + asm.jnz(side_exit); + + asm.compile_with_num_regs(&mut cb, 2); +} + +/// Direct jump to a stub e.g. for deferred compilation +#[test] +fn test_jmp_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + let stub = Target::CodePtr((5 as *mut u8).into()); + asm.jmp(stub); + + asm.compile_with_num_regs(&mut cb, 0); +} + +#[test] +fn test_jo() +{ + let (mut asm, mut cb) = setup_asm(); + + let side_exit = Target::CodePtr((5 as *mut u8).into()); + + let arg1 = Opnd::mem(64, SP, 0); + let arg0 = Opnd::mem(64, SP, 8); + + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(side_exit); + + asm.mov(Opnd::mem(64, SP, 0), out_val); + + asm.compile_with_num_regs(&mut cb, 2); +} + +#[test] +fn test_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); +} + +#[test] +fn test_draining_iterator() { + let mut asm = Assembler::new(); + + asm.load(Opnd::None); + asm.store(Opnd::None, Opnd::None); + asm.add(Opnd::None, Opnd::None); + + let mut iter = asm.into_draining_iter(); + + while let Some((index, insn)) = iter.next_unmapped() { + match index { + 0 => assert!(matches!(insn, Insn::Load { .. })), + 1 => assert!(matches!(insn, Insn::Store { .. })), + 2 => assert!(matches!(insn, Insn::Add { .. })), + _ => panic!("Unexpected instruction index"), + }; + } +} + +#[test] +fn test_lookback_iterator() { + let mut asm = Assembler::new(); + + asm.load(Opnd::None); + asm.store(Opnd::None, Opnd::None); + asm.store(Opnd::None, Opnd::None); + + let mut iter = asm.into_lookback_iter(); + + while let Some((index, insn)) = iter.next_unmapped() { + if index > 0 { + let opnd_iter = iter.get_previous().unwrap().opnd_iter(); + assert_eq!(opnd_iter.take(1).next(), Some(&Opnd::None)); + assert!(matches!(insn, Insn::Store { .. })); + } + } +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs new file mode 100644 index 00000000000000..bda7dc4c066e96 --- /dev/null +++ b/yjit/src/backend/x86_64/mod.rs @@ -0,0 +1,662 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use std::mem::take; + +use crate::asm::*; +use crate::asm::x86_64::*; +use crate::codegen::{JITState}; +use crate::cruby::*; +use crate::backend::ir::*; + +// Use the x86 register type for this platform +pub type Reg = X86Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(R13_REG); +pub const _EC: Opnd = Opnd::Reg(R12_REG); +pub const _SP: Opnd = Opnd::Reg(RBX_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(RDI_REG), + Opnd::Reg(RSI_REG), + Opnd::Reg(RDX_REG), + Opnd::Reg(RCX_REG), + Opnd::Reg(R8_REG), + Opnd::Reg(R9_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = RAX_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); + +/// Map Opnd to X86Opnd +impl From for X86Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + // NOTE: these operand types need to be lowered first + //Value(VALUE), // Immediate Ruby value, may be GC'd, movable + //InsnOut(usize), // Output of a preceding instruction in this block + + Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"), + + Opnd::UImm(val) => uimm_opnd(val), + Opnd::Imm(val) => imm_opnd(val), + Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64), + + // General-purpose register + Opnd::Reg(reg) => X86Opnd::Reg(reg), + + // Memory operand with displacement + Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => { + let reg = X86Reg { + reg_no, + num_bits: 64, + reg_type: RegType::GP + }; + + mem_opnd(num_bits, X86Opnd::Reg(reg), disp) + } + + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + _ => panic!("unsupported x86 operand type") + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for X86Opnd { + fn from(opnd: &Opnd) -> Self { + X86Opnd::from(*opnd) + } +} + +impl Assembler +{ + // A special scratch register for intermediate processing. + // Note: right now this is only used by LeaLabel because label_ref accepts + // a closure and we don't want it to have to capture anything. + const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); + + /// Get the list of registers from which we can allocate on this platform + pub fn get_alloc_regs() -> Vec + { + vec![ + RAX_REG, + RCX_REG, + ] + } + + /// Get a list of all of the caller-save registers + pub fn get_caller_save_regs() -> Vec { + vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] + } + + // These are the callee-saved registers in the x86-64 SysV ABI + // RBX, RSP, RBP, and R12–R15 + + /// Split IR instructions for the x86 platform + fn x86_split(mut self) -> Assembler + { + fn split_arithmetic_opnds(asm: &mut Assembler, live_ranges: &Vec, index: usize, unmapped_opnds: &Vec, left: &Opnd, right: &Opnd) -> (Opnd, Opnd) { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + (asm.load(*left), asm.load(*right)) + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + // 32-bit values will be sign-extended + if imm_num_bits(value as i64) > 32 { + (asm.load(*left), asm.load(*right)) + } else { + (asm.load(*left), *right) + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(value) > 32 { + (asm.load(*left), asm.load(*right)) + } else { + (asm.load(*left), *right) + } + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + (asm.load(*left), *right) + } else { + (*left, *right) + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + (asm.load(*left), *right) + }, + _ => (*left, *right) + } + } + + let live_ranges: Vec = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_unmapped() { + // When we're iterating through the instructions with x86_split, we + // need to know the previous live ranges in order to tell if a + // register lasts beyond the current instruction. So instead of + // using next_mapped, we call next_unmapped. When you're using the + // next_unmapped API, you need to make sure that you map each + // operand that could reference an old index, which means both + // Opnd::InsnOut operands and Opnd::Mem operands with a base of + // MemBase::InsnOut. + // + // You need to ensure that you only map it _once_, because otherwise + // you'll end up mapping an incorrect index which could end up being + // out of bounds of the old set of indices. + // + // We handle all of that mapping here to ensure that it's only + // mapped once. We also handle loading Opnd::Value operands into + // registers here so that all mapping happens in one place. We load + // Opnd::Value operands into registers here because: + // + // - Most instructions can't be encoded with 64-bit immediates. + // - We look for Op::Load specifically when emiting to keep GC'ed + // VALUEs alive. This is a sort of canonicalization. + let mut unmapped_opnds: Vec = vec![]; + + let is_load = matches!(insn, Insn::Load { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + unmapped_opnds.push(*opnd); + + *opnd = if is_load { + iterator.map_opnd(*opnd) + } else if let Opnd::Value(value) = opnd { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + iterator.map_opnd(*opnd) + } + } else { + iterator.map_opnd(*opnd) + } + } + + match &mut insn { + Insn::Add { left, right, out } | + Insn::Sub { left, right, out } | + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + let (split_left, split_right) = split_arithmetic_opnds(&mut asm, &live_ranges, index, &unmapped_opnds, left, right); + + *left = split_left; + *right = split_right; + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); + + asm.push_insn(insn); + }, + Insn::Cmp { left, right } | + Insn::Test { left, right } => { + let (split_left, split_right) = split_arithmetic_opnds(&mut asm, &live_ranges, index, &unmapped_opnds, left, right); + + *left = split_left; + *right = split_right; + + asm.push_insn(insn); + }, + // These instructions modify their input operand in-place, so we + // may need to load the input value to preserve it + Insn::LShift { opnd, shift, out } | + Insn::RShift { opnd, shift, out } | + Insn::URShift { opnd, shift, out } => { + match (&unmapped_opnds[0], &unmapped_opnds[1]) { + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[*idx] > index { + *opnd = asm.load(*opnd); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *opnd = asm.load(*opnd); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift])); + asm.push_insn(insn); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + match truthy { + Opnd::Reg(_) | Opnd::InsnOut { .. } => {}, + _ => { + *truthy = asm.load(*truthy); + } + }; + + match falsy { + Opnd::Reg(_) | Opnd::InsnOut { .. } => {}, + _ => { + *falsy = asm.load(*falsy); + } + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); + asm.push_insn(insn); + }, + Insn::Mov { dest, src } => { + match (&dest, &src) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value as i64) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(*value) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + _ => { + asm.mov(*dest, *src); + } + } + }, + Insn::Not { opnd, .. } => { + let opnd0 = match unmapped_opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + asm.load(*opnd) + } else { + *opnd + } + }, + // We have to load memory and register operands to avoid + // corrupting them. + Opnd::Mem(_) | Opnd::Reg(_) => { + asm.load(*opnd) + }, + // Otherwise we can just reuse the existing operand. + _ => *opnd + }; + + asm.not(opnd0); + }, + _ => { + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(&mut asm); + } + + asm + } + + /// Emit platform-specific machine code + pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec + { + //dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec = Vec::new(); + + // For each instruction + let start_write_pos = cb.get_write_pos(); + for insn in &self.insns { + match insn { + Insn::Comment(text) => { + if cfg!(feature = "asm_comments") { + cb.add_comment(text); + } + }, + + // Write the label at the current position + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + + // Report back the current position in the generated code + Insn::PosMarker(pos_marker) => { + pos_marker(cb.get_write_ptr()); + }, + + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + }, + + Insn::Add { left, right, .. } => { + add(cb, left.into(), right.into()) + }, + + Insn::FrameSetup => {}, + Insn::FrameTeardown => {}, + + Insn::Sub { left, right, .. } => { + sub(cb, left.into(), right.into()) + }, + + Insn::And { left, right, .. } => { + and(cb, left.into(), right.into()) + }, + + Insn::Or { left, right, .. } => { + or(cb, left.into(), right.into()); + }, + + Insn::Xor { left, right, .. } => { + xor(cb, left.into(), right.into()); + }, + + Insn::Not { opnd, .. } => { + not(cb, opnd.into()); + }, + + Insn::LShift { opnd, shift , ..} => { + shl(cb, opnd.into(), shift.into()) + }, + + Insn::RShift { opnd, shift , ..} => { + sar(cb, opnd.into(), shift.into()) + }, + + Insn::URShift { opnd, shift, .. } => { + shr(cb, opnd.into(), shift.into()) + }, + + Insn::Store { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // This assumes only load instructions can contain references to GC'd Value operands + Insn::Load { opnd, out } => { + mov(cb, out.into(), opnd.into()); + + // If the value being loaded is a heap object + if let Opnd::Value(val) = opnd { + if !val.special_const_p() { + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + gc_offsets.push(ptr_offset); + } + } + }, + + Insn::LoadSExt { opnd, out } => { + movsx(cb, out.into(), opnd.into()); + }, + + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // Load effective address + Insn::Lea { opnd, out } => { + lea(cb, out.into(), opnd.into()); + }, + + // Load relative address + Insn::LeaLabel { target, out } => { + let label_idx = target.unwrap_label_idx(); + + cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); + + mov(cb, out.into(), Self::SCRATCH0); + }, + + // Push and pop to/from the C stack + Insn::CPush(opnd) => { + push(cb, opnd.into()); + }, + Insn::CPop { out } => { + pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + pop(cb, opnd.into()); + }, + + // Push and pop to the C stack all caller-save registers and the + // flags + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + push(cb, X86Opnd::Reg(reg)); + } + pushfq(cb); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + popfq(cb); + for reg in regs.into_iter().rev() { + pop(cb, X86Opnd::Reg(reg)); + } + }, + + // C function call + Insn::CCall { opnds, target, .. } => { + // Temporary + assert!(opnds.len() <= _C_ARG_OPNDS.len()); + + // For each operand + for (idx, opnd) in opnds.iter().enumerate() { + mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), opnds[idx].into()); + } + + let ptr = target.unwrap_fun_ptr(); + call_ptr(cb, RAX, ptr); + }, + + Insn::CRet(opnd) => { + // TODO: bias allocation towards return register + if *opnd != Opnd::Reg(C_RET_REG) { + mov(cb, RAX, opnd.into()); + } + + ret(cb); + }, + + // Compare + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); + } + + // Test and set flags + Insn::Test { left, right } => { + test(cb, left.into(), right.into()); + } + + Insn::JmpOpnd(opnd) => { + jmp_rm(cb, opnd.into()); + } + + // Conditional jump to a label + Insn::Jmp(target) => { + match *target { + Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), + Target::Label(label_idx) => jmp_label(cb, label_idx), + _ => unreachable!() + } + } + + Insn::Je(target) => { + match *target { + Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), + Target::Label(label_idx) => je_label(cb, label_idx), + _ => unreachable!() + } + } + + Insn::Jne(target) => { + match *target { + Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), + Target::Label(label_idx) => jne_label(cb, label_idx), + _ => unreachable!() + } + } + + Insn::Jl(target) => { + match *target { + Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), + Target::Label(label_idx) => jl_label(cb, label_idx), + _ => unreachable!() + } + }, + + Insn::Jbe(target) => { + match *target { + Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), + Target::Label(label_idx) => jbe_label(cb, label_idx), + _ => unreachable!() + } + }, + + Insn::Jz(target) => { + match *target { + Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), + Target::Label(label_idx) => jz_label(cb, label_idx), + _ => unreachable!() + } + } + + Insn::Jnz(target) => { + match *target { + Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), + Target::Label(label_idx) => jnz_label(cb, label_idx), + _ => unreachable!() + } + } + + Insn::Jo(target) => { + match *target { + Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), + Target::Label(label_idx) => jo_label(cb, label_idx), + _ => unreachable!() + } + } + + // Atomically increment a counter at a given memory location + Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::Mem(_))); + assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) ); + write_lock_prefix(cb); + add(cb, mem.into(), value.into()); + }, + + Insn::Breakpoint => int3(cb), + + Insn::CSelZ { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovnz(cb, out.into(), falsy.into()); + }, + Insn::CSelNZ { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovz(cb, out.into(), falsy.into()); + }, + Insn::CSelE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovne(cb, out.into(), falsy.into()); + }, + Insn::CSelNE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmove(cb, out.into(), falsy.into()); + }, + Insn::CSelL { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovge(cb, out.into(), falsy.into()); + }, + Insn::CSelLE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovg(cb, out.into(), falsy.into()); + }, + Insn::CSelG { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovle(cb, out.into(), falsy.into()); + }, + Insn::CSelGE { truthy, falsy, out } => { + mov(cb, out.into(), truthy.into()); + cmovl(cb, out.into(), falsy.into()); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadEntryExit => { + // We assume that our Op::Jmp usage that gets invalidated is <= 5 + let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap(); + if code_size < 5 { + nop(cb, 5 - code_size); + } + } + + // We want to keep the panic here because some instructions that + // we feed to the backend could get lowered into other + // instructions. So it's possible that some of our backend + // instructions can never make it to the emit stage. + _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) + }; + } + + gc_offsets + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Vec + { + let mut asm = self.x86_split().alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let gc_offsets = asm.x86_emit(cb); + + if !cb.has_dropped_bytes() { + cb.link_labels(); + } + + gc_offsets + } +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 0acd1972c39df1..744495eb298eac 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1,8 +1,9 @@ // We use the YARV bytecode constants which have a CRuby-style name #![allow(non_upper_case_globals)] -use crate::asm::x86_64::*; +//use crate::asm::x86_64::*; use crate::asm::*; +use crate::backend::ir::*; use crate::core::*; use crate::cruby::*; use crate::invariants::*; @@ -23,38 +24,26 @@ use std::slice; pub use crate::virtualmem::CodePtr; -// Callee-saved registers -pub const REG_CFP: X86Opnd = R13; -pub const REG_EC: X86Opnd = R12; -pub const REG_SP: X86Opnd = RBX; - -// Scratch registers used by YJIT -pub const REG0: X86Opnd = RAX; -pub const REG0_32: X86Opnd = EAX; -pub const REG0_8: X86Opnd = AL; -pub const REG1: X86Opnd = RCX; -// pub const REG1_32: X86Opnd = ECX; - // A block that can be invalidated needs space to write a jump. // We'll reserve a minimum size for any block that could // be invalidated. In this case the JMP takes 5 bytes, but // gen_send_general will always MOV the receiving object // into place, so 2 bytes are always written automatically. -pub const JUMP_SIZE_IN_BYTES:usize = 3; +//pub const JUMP_SIZE_IN_BYTES: usize = 3; /// Status returned by code generation functions #[derive(PartialEq, Debug)] enum CodegenStatus { - EndBlock, KeepCompiling, CantCompile, + EndBlock, } /// Code generation function signature type InsnGenFn = fn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus; @@ -118,13 +107,6 @@ impl JITState { self.opcode } - pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) { - let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut(); - gc_obj_vec.add_gc_object_offset(ptr_offset); - - incr_counter!(num_gc_obj_refs); - } - pub fn get_pc(self: &JITState) -> *mut VALUE { self.pc } @@ -149,22 +131,6 @@ pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE { unsafe { *(jit.pc.offset(arg_idx + 1)) } } -// Load a VALUE into a register and keep track of the reference if it is on the GC heap. -pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) { - assert!(matches!(reg, X86Opnd::Reg(_))); - assert!(reg.num_bits() == 64); - - // Load the pointer constant into the specified register - mov(cb, reg, const_ptr_opnd(ptr.as_ptr())); - - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - - if !ptr.special_const_p() { - jit.add_gc_object_offset(ptr_offset); - } -} - // Get the index of the next instruction fn jit_next_insn_idx(jit: &JITState) -> u32 { jit.insn_idx + insn_len(jit.get_opcode()) @@ -233,19 +199,21 @@ fn add_comment(cb: &mut CodeBlock, comment_str: &str) { /// Increment a profiling counter with counter_name #[cfg(not(feature = "stats"))] macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => {}; + ($asm:tt, $counter_name:ident) => {}; } #[cfg(feature = "stats")] macro_rules! gen_counter_incr { - ($cb:tt, $counter_name:ident) => { + ($asm:tt, $counter_name:ident) => { if (get_option!(gen_stats)) { // Get a pointer to the counter variable let ptr = ptr_to_counter!($counter_name); - // Use REG1 because there might be return value in REG0 - mov($cb, REG1, const_ptr_opnd(ptr as *const u8)); - write_lock_prefix($cb); // for ractors. - add($cb, mem_opnd(64, REG1, 0), imm_opnd(1)); + // Load the pointer into a register + let ptr_reg = $asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + $asm.incr_counter(counter_opnd, Opnd::UImm(1)); } }; } @@ -268,11 +236,14 @@ macro_rules! counted_exit { let ocb = $ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + let mut ocb_asm = Assembler::new(); + // Increment the counter - gen_counter_incr!(ocb, $counter_name); + gen_counter_incr!(ocb_asm, $counter_name); // Jump to the existing side exit - jmp_ptr(ocb, $existing_side_exit); + ocb_asm.jmp($existing_side_exit.into()); + ocb_asm.compile(ocb); // Pointer to the side-exit code code_ptr @@ -282,26 +253,27 @@ macro_rules! counted_exit { // Save the incremented PC on the CFP // This is necessary when callees can raise or allocate -fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) { +fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { let pc: *mut VALUE = jit.get_pc(); let ptr: *mut VALUE = unsafe { let cur_insn_len = insn_len(jit.get_opcode()) as isize; pc.offset(cur_insn_len) }; - mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg); + + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8)); } /// Save the current SP on the CFP /// This realigns the interpreter SP with the JIT SP /// Note: this will change the current value of REG_SP, /// which could invalidate memory operands -fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { +fn gen_save_sp(jit: &JITState, asm: &mut Assembler, ctx: &mut Context) { if ctx.get_sp_offset() != 0 { let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); - let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP); - mov(cb, cfp_sp_opnd, REG_SP); + let sp_addr = asm.lea(stack_pointer); + asm.mov(SP, sp_addr); + let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + asm.mov(cfp_sp_opnd, SP); ctx.set_sp_offset(0); } } @@ -314,12 +286,11 @@ fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) { fn jit_prepare_routine_call( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, - scratch_reg: X86Opnd, + asm: &mut Assembler ) { jit.record_boundary_patch_point = true; - jit_save_pc(jit, cb, scratch_reg); - gen_save_sp(cb, ctx); + jit_save_pc(jit, asm); + gen_save_sp(jit, asm, ctx); // In case the routine calls Ruby methods, it can set local variables // through Kernel#binding and other means. @@ -328,8 +299,10 @@ fn jit_prepare_routine_call( /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. -fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) { - CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos); +fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + asm.pos_marker(move |code_ptr| { + CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos); + }); } /// Verify the ctx's types and mappings against the compile-time stack, self, @@ -414,66 +387,88 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } } -/// Generate an exit to return to the interpreter -fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr { - let code_ptr = cb.get_write_ptr(); +// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit +// to the interpreter when it cannot service a stub by generating new code. +// Before coming here, branch_stub_hit() takes care of fully reconstructing +// interpreter state. +fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { + let ocb = ocb.unwrap(); + let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); + + gen_counter_incr!(asm, exit_from_branch_stub); + + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); - add_comment(cb, "exit to interpreter"); + asm.cret(Qundef.into()); + + asm.compile(ocb); + + code_ptr +} + +/// Generate an exit to return to the interpreter +fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, asm: &mut Assembler) { + asm.comment("exit to interpreter"); // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP if ctx.get_sp_offset() != 0 { - let stack_pointer = ctx.sp_opnd(0); - lea(cb, REG_SP, stack_pointer); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP); + let sp_opnd = asm.lea(ctx.sp_opnd(0)); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), + sp_opnd + ); } // Update CFP->PC - mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), + Opnd::const_ptr(exit_pc as *const u8) + ); // Accumulate stats about interpreter exits #[cfg(feature = "stats")] if get_option!(gen_stats) { - mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8)); - call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8); + asm.ccall( + rb_yjit_count_side_exit_op as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); // If --yjit-trace-exits option is enabled, record the exit stack // while recording the side exits. if get_option!(gen_trace_exits) { - mov(cb, C_ARG_REGS[0], const_ptr_opnd(exit_pc as *const u8)); - call_ptr(cb, REG0, rb_yjit_record_exit_stack as *const u8); + asm.ccall( + rb_yjit_record_exit_stack as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); } } - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.frame_teardown(); - return code_ptr; + asm.cret(Qundef.into()); } -// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit -// to the interpreter when it cannot service a stub by generating new code. -// Before coming here, branch_stub_hit() takes care of fully reconstructing -// interpreter state. -fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { - let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); - - gen_counter_incr!(ocb, exit_from_branch_stub); +/// Generate an exit to the interpreter in the outlined code block +fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> CodePtr { + let mut cb = ocb.unwrap(); + let exit_code = cb.get_write_ptr(); + let mut asm = Assembler::new(); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + gen_exit(exit_pc, ctx, &mut asm); - mov(ocb, RAX, uimm_opnd(Qundef.into())); - ret(ocb); + asm.compile(&mut cb); - return code_ptr; + exit_code } // :side-exit: @@ -491,7 +486,7 @@ fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr { fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr { match jit.side_exit_for_pc { None => { - let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap()); + let exit_code = gen_outlined_exit(jit.pc, ctx, ocb); jit.side_exit_for_pc = Some(exit_code); exit_code } @@ -511,72 +506,44 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) { return; } + // If we're compiling the first instruction in the block. if jit.insn_idx == blockid.idx { - // We are compiling the first instruction in the block. // Generate the exit with the cache in jitstate. block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx)); } else { let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) }; - block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap())); + block.entry_exit = Some(gen_outlined_exit(jit.pc, &block_ctx, ocb)); } } -// Generate a runtime guard that ensures the PC is at the expected -// instruction index in the iseq, otherwise takes a side-exit. -// This is to handle the situation of optional parameters. -// When a function with optional parameters is called, the entry -// PC for the method isn't necessarily 0. -fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) { - //RUBY_ASSERT(cb != NULL); - - let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC); - let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; - let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8); - mov(cb, REG0, pc_opnd); - mov(cb, REG1, expected_pc_opnd); - cmp(cb, REG0, REG1); - - let pc_match = cb.new_label("pc_match".to_string()); - je_label(cb, pc_match); - - // We're not starting at the first PC, so we need to exit. - gen_counter_incr!(cb, leave_start_pc_non_zero); - - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); - - mov(cb, RAX, imm_opnd(Qundef.into())); - ret(cb); - - // PC should match the expected insn_idx - cb.write_label(pc_match); - cb.link_labels(); -} - // Landing code for when c_return tracing is enabled. See full_cfunc_return(). fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { - let cb = ocb.unwrap(); - let code_ptr = cb.get_write_ptr(); + let ocb = ocb.unwrap(); + let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); - // This chunk of code expect REG_EC to be filled properly and + // This chunk of code expects REG_EC to be filled properly and // RAX to contain the return value of the C method. // Call full_cfunc_return() - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_full_cfunc_return as *const u8); + asm.ccall( + rb_full_cfunc_return as *const u8, + vec![EC, C_RET_OPND] + ); // Count the exit - gen_counter_incr!(cb, traced_cfunc_return); + gen_counter_incr!(asm, traced_cfunc_return); // Return to the interpreter - pop(cb, REG_SP); - pop(cb, REG_EC); - pop(cb, REG_CFP); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); - mov(cb, RAX, uimm_opnd(Qundef.into())); - ret(cb); + asm.cret(Qundef.into()); + + asm.compile(ocb); return code_ptr; } @@ -586,22 +553,58 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr { fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr { let ocb = ocb.unwrap(); let code_ptr = ocb.get_write_ptr(); + let mut asm = Assembler::new(); - // Note, gen_leave() fully reconstructs interpreter state and leaves the - // return value in RAX before coming here. + // gen_leave() fully reconstructs interpreter state and leaves the + // return value in C_RET_OPND before coming here. + let ret_opnd = asm.live_reg_opnd(C_RET_OPND); // Every exit to the interpreter should be counted - gen_counter_incr!(ocb, leave_interp_return); + gen_counter_incr!(asm, leave_interp_return); + + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); - pop(ocb, REG_SP); - pop(ocb, REG_EC); - pop(ocb, REG_CFP); + asm.cret(ret_opnd); - ret(ocb); + asm.compile(ocb); return code_ptr; } +// Generate a runtime guard that ensures the PC is at the expected +// instruction index in the iseq, otherwise takes a side-exit. +// This is to handle the situation of optional parameters. +// When a function with optional parameters is called, the entry +// PC for the method isn't necessarily 0. +fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); + let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); + + asm.cmp(pc_opnd, expected_pc_opnd); + + let pc_match = asm.new_label("pc_match"); + asm.je(pc_match); + + // We're not starting at the first PC, so we need to exit. + gen_counter_incr!(asm, leave_start_pc_non_zero); + + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(Qundef.into()); + + // PC should match the expected insn_idx + asm.write_label(pc_match); +} + /// Compile an interpreter entry block to be inserted into an iseq /// Returns None if compilation fails. pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option { @@ -614,30 +617,38 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O let old_write_pos = cb.get_write_pos(); + // TODO: figure out if this is actually beneficial for performance // Align the current write position to cache line boundaries cb.align_pos(64); let code_ptr = cb.get_write_ptr(); - add_comment(cb, "yjit entry"); - push(cb, REG_CFP); - push(cb, REG_EC); - push(cb, REG_SP); + let mut asm = Assembler::new(); + if get_option!(dump_disasm) { + asm.comment(&format!("YJIT entry: {}", iseq_get_location(iseq))); + } else { + asm.comment("YJIT entry"); + } + + asm.frame_setup(); + + // Save the CFP, EC, SP registers to the C stack + asm.cpush(CFP); + asm.cpush(EC); + asm.cpush(SP); - // We are passed EC and CFP - mov(cb, REG_EC, C_ARG_REGS[0]); - mov(cb, REG_CFP, C_ARG_REGS[1]); + // We are passed EC and CFP as arguments + asm.mov(EC, C_ARG_OPNDS[0]); + asm.mov(CFP, C_ARG_OPNDS[1]); // Load the current SP from the CFP into REG_SP - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); // Setup cfp->jit_return - mov( - cb, - REG0, - code_ptr_opnd(CodegenGlobals::get_leave_exit_code()), + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr()), ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); // We're compiling iseqs that we *expect* to start at `insn_idx`. But in // the case of optional parameters, the interpreter can set the pc to a @@ -646,9 +657,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O // compiled for is the same PC that the interpreter wants us to run with. // If they don't match, then we'll take a side exit. if unsafe { get_iseq_flags_has_opt(iseq) } { - gen_pc_guard(cb, iseq, insn_idx); + gen_pc_guard(&mut asm, iseq, insn_idx); } + asm.compile(cb); + // Verify MAX_PROLOGUE_SIZE assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE); @@ -657,22 +670,19 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O // Generate code to check for interrupts and take a side-exit. // Warning: this function clobbers REG0 -fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { +fn gen_check_ints(asm: &mut Assembler, side_exit: CodePtr) { // Check for interrupts // see RUBY_VM_CHECK_INTS(ec) macro - add_comment(cb, "RUBY_VM_CHECK_INTS(ec)"); - mov( - cb, - REG0_32, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK), - ); - not(cb, REG0_32); - test( - cb, - mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), - REG0_32, + asm.comment("RUBY_VM_CHECK_INTS(ec)"); + + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + + asm.test( + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + not_mask, ); - jnz_ptr(cb, side_exit); + + asm.jnz(Target::CodePtr(side_exit)); } // Generate a stubbed unconditional jump to the next bytecode instruction. @@ -680,7 +690,7 @@ fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) { fn jump_to_next_insn( jit: &mut JITState, current_context: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) { // Reset the depth since in current usages we only ever jump to to @@ -695,14 +705,14 @@ fn jump_to_next_insn( // We are at the end of the current instruction. Record the boundary. if jit.record_boundary_patch_point { - let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; - let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap()); - record_global_inval_patch(cb, exit_pos); + let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; + let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb); + record_global_inval_patch(asm, exit_pos); jit.record_boundary_patch_point = false; } // Generate the jump instruction - gen_direct_jump(jit, &reset_depth, jump_block, cb); + gen_direct_jump(jit, &reset_depth, jump_block, asm); } // Compile a sequence of bytecode instructions for a given basic block version. @@ -739,6 +749,14 @@ pub fn gen_single_block( // Mark the start position of the block blockref.borrow_mut().set_start_addr(cb.get_write_ptr()); + // Create a backend assembler instance + let mut asm = Assembler::new(); + + #[cfg(feature = "disasm")] + if get_option!(dump_disasm) { + asm.comment(&format!("Block: {} (ISEQ offset: {})", iseq_get_location(blockid.iseq), blockid.idx)); + } + // For each instruction to compile // NOTE: could rewrite this loop with a std::iter::Iterator while insn_idx < iseq_size { @@ -752,7 +770,7 @@ pub fn gen_single_block( // opt_getinlinecache wants to be in a block all on its own. Cut the block short // if we run into it. See gen_opt_getinlinecache() for details. if opcode == YARVINSN_opt_getinlinecache.as_usize() && insn_idx > starting_insn_idx { - jump_to_next_insn(&mut jit, &ctx, cb, ocb); + jump_to_next_insn(&mut jit, &ctx, &mut asm, ocb); break; } @@ -765,8 +783,8 @@ pub fn gen_single_block( // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it - let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap()); - record_global_inval_patch(cb, exit_pos); + let exit_pos = gen_outlined_exit(jit.pc, &ctx, ocb); + record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } @@ -781,35 +799,39 @@ pub fn gen_single_block( // :count-placement: // Count bytecode instructions that execute in generated code. // Note that the increment happens even when the output takes side exit. - gen_counter_incr!(cb, exec_instruction); + gen_counter_incr!(asm, exec_instruction); // Add a comment for the name of the YARV instruction - add_comment(cb, &insn_name(opcode)); + asm.comment(&insn_name(opcode)); // If requested, dump instructions for debugging if get_option!(dump_insns) { println!("compiling {}", insn_name(opcode)); - print_str(cb, &format!("executing {}", insn_name(opcode))); + print_str(&mut asm, &format!("executing {}", insn_name(opcode))); } // Call the code generation function - status = gen_fn(&mut jit, &mut ctx, cb, ocb); + status = gen_fn(&mut jit, &mut ctx, &mut asm, ocb); } // If we can't compile this instruction // exit to the interpreter and stop compiling if status == CantCompile { + if get_option!(dump_insns) { + println!("can't compile {}", insn_name(opcode)); + } + let mut block = jit.block.borrow_mut(); // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE, // the exit this generates would be wrong. We could save a copy of the entry context // and assert that ctx is the same here. - let exit = gen_exit(jit.pc, &ctx, cb); + gen_exit(jit.pc, &ctx, &mut asm); // If this is the first instruction in the block, then we can use // the exit for block->entry_exit. if insn_idx == block.get_blockid().idx { - block.entry_exit = Some(exit); + block.entry_exit = block.get_start_addr(); } break; @@ -831,6 +853,17 @@ pub fn gen_single_block( // Finish filling out the block { let mut block = jit.block.borrow_mut(); + if block.entry_exit.is_some() { + asm.pad_entry_exit(); + } + + // Compile code into the code block + let gc_offsets = asm.compile(cb); + + // Add the GC offsets to the block + for offset in gc_offsets { + block.add_gc_obj_offset(offset) + } // Mark the end position of the block block.set_end_addr(cb.get_write_ptr()); @@ -841,26 +874,13 @@ pub fn gen_single_block( // We currently can't handle cases where the request is for a block that // doesn't go to the next instruction. - //assert!(!jit.record_boundary_patch_point); + assert!(!jit.record_boundary_patch_point); // If code for the block doesn't fit, fail if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { return Err(()); } - // TODO: we may want a feature for this called dump_insns? Can leave commented for now - /* - if (YJIT_DUMP_MODE >= 2) { - // Dump list of compiled instrutions - fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq); - for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) { - int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx)); - fprintf(stderr, " %04d %s\n", idx, insn_name(opcode)); - idx += insn_len(opcode); - } - } - */ - // Block compiled successfully Ok(blockref) } @@ -868,7 +888,7 @@ pub fn gen_single_block( fn gen_nop( _jit: &mut JITState, _ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Do nothing @@ -878,7 +898,7 @@ fn gen_nop( fn gen_pop( _jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Decrement SP @@ -887,17 +907,17 @@ fn gen_pop( } fn gen_dup( - _jit: &mut JITState, + jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { + let dup_val = ctx.stack_pop(0); let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); let loc0 = ctx.stack_push_mapping((mapping, tmp_type)); - mov(cb, REG0, dup_val); - mov(cb, loc0, REG0); + asm.mov(loc0, dup_val); KeepCompiling } @@ -906,9 +926,10 @@ fn gen_dup( fn gen_dupn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { + let nval: VALUE = jit_get_arg(jit, 0); let VALUE(n) = nval; @@ -917,52 +938,49 @@ fn gen_dupn( return CantCompile; } - let opnd1: X86Opnd = ctx.stack_opnd(1); - let opnd0: X86Opnd = ctx.stack_opnd(0); + let opnd1: Opnd = ctx.stack_opnd(1); + let opnd0: Opnd = ctx.stack_opnd(0); let mapping1 = ctx.get_opnd_mapping(StackOpnd(1)); let mapping0 = ctx.get_opnd_mapping(StackOpnd(0)); - let dst1: X86Opnd = ctx.stack_push_mapping(mapping1); - mov(cb, REG0, opnd1); - mov(cb, dst1, REG0); + let dst1: Opnd = ctx.stack_push_mapping(mapping1); + asm.mov(dst1, opnd1); - let dst0: X86Opnd = ctx.stack_push_mapping(mapping0); - mov(cb, REG0, opnd0); - mov(cb, dst0, REG0); + let dst0: Opnd = ctx.stack_push_mapping(mapping0); + asm.mov(dst0, opnd0); KeepCompiling } // Swap top 2 stack entries fn gen_swap( - _jit: &mut JITState, + jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - stack_swap(ctx, cb, 0, 1, REG0, REG1); + stack_swap(jit, ctx, asm, 0, 1); KeepCompiling } fn stack_swap( + jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, offset0: u16, offset1: u16, - _reg0: X86Opnd, - _reg1: X86Opnd, ) { - let opnd0 = ctx.stack_opnd(offset0 as i32); - let opnd1 = ctx.stack_opnd(offset1 as i32); + let stack0_mem = ctx.stack_opnd(offset0 as i32); + let stack1_mem = ctx.stack_opnd(offset1 as i32); let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0)); let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1)); - mov(cb, REG0, opnd0); - mov(cb, REG1, opnd1); - mov(cb, opnd0, REG1); - mov(cb, opnd1, REG0); + let stack0_reg = asm.load(stack0_mem); + let stack1_reg = asm.load(stack1_mem); + asm.mov(stack0_mem, stack1_reg); + asm.mov(stack1_mem, stack0_reg); ctx.set_opnd_mapping(StackOpnd(offset0), mapping1); ctx.set_opnd_mapping(StackOpnd(offset1), mapping0); @@ -971,43 +989,23 @@ fn stack_swap( fn gen_putnil( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - jit_putobject(jit, ctx, cb, Qnil); + jit_putobject(jit, ctx, asm, Qnil); KeepCompiling } -fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) { +fn jit_putobject(jit: &mut JITState, ctx: &mut Context, asm: &mut Assembler, arg: VALUE) { let val_type: Type = Type::from(arg); let stack_top = ctx.stack_push(val_type); - - if arg.special_const_p() { - // Immediates will not move and do not need to be tracked for GC - // Thanks to this we can mov directly to memory when possible. - let imm = imm_opnd(arg.as_i64()); - - // 64-bit immediates can't be directly written to memory - if imm.num_bits() <= 32 { - mov(cb, stack_top, imm); - } else { - mov(cb, REG0, imm); - mov(cb, stack_top, REG0); - } - } else { - // Load the value to push into REG0 - // Note that this value may get moved by the GC - jit_mov_gc_ptr(jit, cb, REG0, arg); - - // Write argument at SP - mov(cb, stack_top, REG0); - } + asm.mov(stack_top, arg.into()); } fn gen_putobject_int2fix( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let opcode = jit.opcode; @@ -1017,35 +1015,35 @@ fn gen_putobject_int2fix( 1 }; - jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val)); + jit_putobject(jit, ctx, asm, VALUE::fixnum_from_usize(cst_val)); KeepCompiling } fn gen_putobject( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let arg: VALUE = jit_get_arg(jit, 0); - jit_putobject(jit, ctx, cb, arg); + jit_putobject(jit, ctx, asm, arg); KeepCompiling } fn gen_putself( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - // Load self from CFP - let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF); - mov(cb, REG0, cf_opnd); // Write it on the stack - let stack_top: X86Opnd = ctx.stack_push_self(); - mov(cb, stack_top, REG0); + let stack_top = ctx.stack_push_self(); + asm.mov( + stack_top, + Opnd::mem((8 * SIZEOF_VALUE) as u8, CFP, RUBY_OFFSET_CFP_SELF) + ); KeepCompiling } @@ -1053,15 +1051,15 @@ fn gen_putself( fn gen_putspecialobject( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let object_type = jit_get_arg(jit, 0); if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE.as_usize()) { - let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap); - jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore }); - mov(cb, stack_top, REG0); + let stack_top = ctx.stack_push(Type::UnknownHeap); + let frozen_core = unsafe { rb_mRubyVMFrozenCore }; + asm.mov(stack_top, frozen_core.into()); KeepCompiling } else { // TODO: implement for VM_SPECIAL_OBJECT_CBASE and @@ -1074,19 +1072,20 @@ fn gen_putspecialobject( fn gen_setn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; + let n: VALUE = jit_get_arg(jit, 0); - let top_val: X86Opnd = ctx.stack_pop(0); - let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap()); - mov(cb, REG0, top_val); - mov(cb, dst_opnd, REG0); + let top_val = ctx.stack_pop(0); + let dst_opnd = ctx.stack_opnd(n.into()); + asm.mov( + dst_opnd, + top_val + ); let mapping = ctx.get_opnd_mapping(StackOpnd(0)); - ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping); + ctx.set_opnd_mapping(StackOpnd(n.into()), mapping); KeepCompiling } @@ -1095,18 +1094,15 @@ fn gen_setn( fn gen_topn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let nval: VALUE = jit_get_arg(jit, 0); - let VALUE(n) = nval; - - let top_n_val = ctx.stack_opnd(n.try_into().unwrap()); - let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap())); + let nval = jit_get_arg(jit, 0); + let top_n_val = ctx.stack_opnd(nval.into()); + let mapping = ctx.get_opnd_mapping(StackOpnd(nval.into())); let loc0 = ctx.stack_push_mapping(mapping); - mov(cb, REG0, top_n_val); - mov(cb, loc0, REG0); + asm.mov(loc0, top_n_val); KeepCompiling } @@ -1115,12 +1111,11 @@ fn gen_topn( fn gen_adjuststack( jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _cb: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let nval: VALUE = jit_get_arg(jit, 0); let VALUE(n) = nval; - ctx.stack_pop(n); KeepCompiling } @@ -1128,11 +1123,11 @@ fn gen_adjuststack( fn gen_opt_plus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -1149,25 +1144,24 @@ fn gen_opt_plus( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); - // Get the operands and destination from the stack + // Get the operands from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Add arg0 + arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, imm_opnd(1)); - add(cb, REG0, arg1); - jo_ptr(cb, side_exit); + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(side_exit.into()); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.mov(dst, out_val); KeepCompiling } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } @@ -1175,26 +1169,36 @@ fn gen_opt_plus( fn gen_newarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let n = jit_get_arg(jit, 0).as_u32(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize)); + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + let offset_magnitude = SIZEOF_VALUE as u32 * n; + let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + asm.lea(values_opnd) + }; // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8); + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); ctx.stack_pop(n.as_usize()); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_ary); KeepCompiling } @@ -1203,20 +1207,22 @@ fn gen_newarray( fn gen_duparray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let ary = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_ary_resurrect(VALUE ary); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary); - call_ptr(cb, REG0, rb_ary_resurrect as *const u8); + let new_ary = asm.ccall( + rb_ary_resurrect as *const u8, + vec![ary.into()], + ); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_ary); KeepCompiling } @@ -1225,20 +1231,19 @@ fn gen_duparray( fn gen_duphash( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let hash = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_hash_resurrect(VALUE hash); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash); - call_ptr(cb, REG0, rb_hash_resurrect as *const u8); + let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, hash); KeepCompiling } @@ -1247,25 +1252,47 @@ fn gen_duphash( fn gen_splatarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 0); // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let ary_opnd = ctx.stack_pop(1); // Call rb_vm_splat_array(flag, ary) - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag); - mov(cb, C_ARG_REGS[1], ary_opnd); - call_ptr(cb, REG1, rb_vm_splat_array as *const u8); + let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); let stack_ret = ctx.stack_push(Type::Array); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ary); + + KeepCompiling +} + +// concat two arrays +fn gen_concatarray( + jit: &mut JITState, + ctx: &mut Context, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> CodegenStatus { + // Save the PC and SP because the callee may allocate + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_routine_call(jit, ctx, asm); + + // Get the operands from the stack + let ary2st_opnd = ctx.stack_pop(1); + let ary1_opnd = ctx.stack_pop(1); + + // Call rb_vm_concat_array(ary1, ary2st) + let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); + + let stack_ret = ctx.stack_push(Type::Array); + asm.mov(stack_ret, ary); KeepCompiling } @@ -1274,75 +1301,94 @@ fn gen_splatarray( fn gen_newrange( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 0); // rb_range_new() allocates and can raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // val = rb_range_new(low, high, (int)flag); - mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1)); - mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0)); - mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into())); - call_ptr(cb, REG0, rb_range_new as *const u8); + let range_opnd = asm.ccall( + rb_range_new as *const u8, + vec![ + ctx.stack_opnd(1), + ctx.stack_opnd(0), + flag.into() + ] + ); ctx.stack_pop(2); let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, range_opnd); KeepCompiling } fn guard_object_is_heap( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - _ctx: &mut Context, + asm: &mut Assembler, + object_opnd: Opnd, side_exit: CodePtr, ) { - add_comment(cb, "guard object is heap"); + asm.comment("guard object is heap"); // Test that the object is not an immediate - test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); + asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(side_exit.into()); // Test that the object is not false or nil - cmp(cb, object_opnd, uimm_opnd(Qnil.into())); - jbe_ptr(cb, side_exit); + asm.cmp(object_opnd, Qnil.into()); + asm.jbe(side_exit.into()); } fn guard_object_is_array( - cb: &mut CodeBlock, - object_opnd: X86Opnd, - flags_opnd: X86Opnd, - _ctx: &mut Context, + asm: &mut Assembler, + object_opnd: Opnd, side_exit: CodePtr, ) { - add_comment(cb, "guard object is array"); + asm.comment("guard object is array"); // Pull out the type mask - mov( - cb, - flags_opnd, - mem_opnd( + let flags_opnd = Opnd::mem( + 8 * SIZEOF_VALUE as u8, + object_opnd, + RUBY_OFFSET_RBASIC_FLAGS, + ); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + // Compare the result with T_ARRAY + asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into()); + asm.jne(side_exit.into()); +} + +fn guard_object_is_string( + asm: &mut Assembler, + object_reg: Opnd, + side_exit: CodePtr, +) { + asm.comment("guard object is string"); + + // Pull out the type mask + let flags_reg = asm.load( + Opnd::mem( 8 * SIZEOF_VALUE as u8, - object_opnd, + object_reg, RUBY_OFFSET_RBASIC_FLAGS, ), ); - and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64)); + let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); - // Compare the result with T_ARRAY - cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64)); - jne_ptr(cb, side_exit); + // Compare the result with T_STRING + asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); + asm.jne(side_exit.into()); } // push enough nils onto the stack to fill out an array fn gen_expandarray( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let flag = jit_get_arg(jit, 1); @@ -1350,13 +1396,13 @@ fn gen_expandarray( // If this instruction has the splat flag, then bail out. if flag_value & 0x01 != 0 { - incr_counter!(expandarray_splat); + gen_counter_incr!(asm, expandarray_splat); return CantCompile; } // If this instruction has the postarg flag, then bail out. if flag_value & 0x02 != 0 { - incr_counter!(expandarray_postarg); + gen_counter_incr!(asm, expandarray_postarg); return CantCompile; } @@ -1373,24 +1419,21 @@ fn gen_expandarray( // push N nils onto the stack for _i in 0..(num.into()) { let push_opnd = ctx.stack_push(Type::Nil); - mov(cb, push_opnd, uimm_opnd(Qnil.into())); + asm.mov(push_opnd, Qnil.into()); } return KeepCompiling; } // Move the array from the stack into REG0 and check that it's an array. - mov(cb, REG0, array_opnd); + let array_reg = asm.load(array_opnd); guard_object_is_heap( - cb, - REG0, - ctx, + asm, + array_reg, counted_exit!(ocb, side_exit, expandarray_not_array), ); guard_object_is_array( - cb, - REG0, - REG1, - ctx, + asm, + array_reg, counted_exit!(ocb, side_exit, expandarray_not_array), ); @@ -1400,47 +1443,47 @@ fn gen_expandarray( } // Pull out the embed flag to check if it's an embedded array. - let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS); - mov(cb, REG1, flags_opnd); + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); // Move the length of the embedded array into REG1. - and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64)); - shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64)); + let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); + let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); // Conditionally move the length of the heap array into REG1. - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let array_len_opnd = mem_opnd( + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + let array_len_opnd = Opnd::mem( (8 * size_of::()) as u8, - REG0, + asm.load(array_opnd), RUBY_OFFSET_RARRAY_AS_HEAP_LEN, ); - cmovz(cb, REG1, array_len_opnd); + let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd); // Only handle the case where the number of values in the array is greater // than or equal to the number of values requested. - cmp(cb, REG1, uimm_opnd(num.into())); - jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small)); + asm.cmp(array_len_opnd, num.into()); + asm.jl(counted_exit!(ocb, side_exit, expandarray_rhs_too_small).into()); // Load the address of the embedded array into REG1. // (struct RArray *)(obj)->as.ary - let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY); - lea(cb, REG1, ary_opnd); + let array_reg = asm.load(array_opnd); + let ary_opnd = asm.lea(Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); // Conditionally load the address of the heap array into REG1. // (struct RArray *)(obj)->as.heap.ptr - test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64)); - let heap_ptr_opnd = mem_opnd( + let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64)); + let heap_ptr_opnd = Opnd::mem( (8 * size_of::()) as u8, - REG0, + asm.load(array_opnd), RUBY_OFFSET_RARRAY_AS_HEAP_PTR, ); - cmovz(cb, REG1, heap_ptr_opnd); + let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd); // Loop backward through the array and push each element onto the stack. for i in (0..(num.as_i32())).rev() { let top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32))); - mov(cb, top, REG0); + asm.mov(top, Opnd::mem(64, ary_opnd, i * (SIZEOF_VALUE as i32))); } KeepCompiling @@ -1449,7 +1492,7 @@ fn gen_expandarray( fn gen_getlocal_wc0( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Compute the offset from BP to the local @@ -1458,14 +1501,14 @@ fn gen_getlocal_wc0( let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx); // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // Load the local from the EP - mov(cb, REG0, mem_opnd(64, REG0, offs)); + let local_opnd = Opnd::mem(64, ep_opnd, offs); // Write the local at SP let stack_top = ctx.stack_push_local(local_idx.as_usize()); - mov(cb, stack_top, REG0); + asm.mov(stack_top, local_opnd); KeepCompiling } @@ -1497,37 +1540,41 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 { } // Get EP at level from CFP -fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) { - // Load environment pointer EP from CFP - let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP); - mov(cb, reg, ep_opnd); +fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { + // Load environment pointer EP from CFP into a register + let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); + let mut ep_opnd = asm.load(ep_opnd); for _ in (0..level).rev() { // Get the previous EP from the current EP // See GET_PREV_EP(ep) macro // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32); - mov(cb, reg, mem_opnd(64, reg, offs)); - and(cb, reg, imm_opnd(!0x03)); + ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offs)); + ep_opnd = asm.and(ep_opnd, Opnd::Imm(!0x03)); } + + ep_opnd } fn gen_getlocal_generic( ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, local_idx: u32, level: u32, ) -> CodegenStatus { - gen_get_ep(cb, REG0, level); + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); // Load the local from the block // val = *(vm_get_ep(GET_EP(), level) - idx); let offs = -(SIZEOF_VALUE as i32 * local_idx as i32); - mov(cb, REG0, mem_opnd(64, REG0, offs)); + let local_opnd = Opnd::mem(64, ep_opnd, offs); // Write the local at SP let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, REG0); + + asm.mov(stack_top, local_opnd); KeepCompiling } @@ -1535,28 +1582,28 @@ fn gen_getlocal_generic( fn gen_getlocal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0); let level = jit_get_arg(jit, 1); - gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32()) + gen_getlocal_generic(ctx, asm, idx.as_u32(), level.as_u32()) } fn gen_getlocal_wc1( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0); - gen_getlocal_generic(ctx, cb, idx.as_u32(), 1) + gen_getlocal_generic(ctx, asm, idx.as_u32(), 1) } fn gen_setlocal_wc0( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { /* @@ -1577,25 +1624,25 @@ fn gen_setlocal_wc0( let value_type = ctx.get_opnd_type(StackOpnd(0)); // Load environment pointer EP (level 0) from CFP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers // only affect heap objects being written. If we know an immediate value is being written we // can skip this check. if !value_type.is_imm() { // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG0, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64)); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); + asm.jnz(side_exit.into()); } // Set the type of the local variable in the context @@ -1603,11 +1650,10 @@ fn gen_setlocal_wc0( // Pop the value to write from the stack let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); // Write the value at the environment pointer let offs: i32 = -8 * slot_idx; - mov(cb, mem_opnd(64, REG0, offs), REG1); + asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); KeepCompiling } @@ -1615,7 +1661,7 @@ fn gen_setlocal_wc0( fn gen_setlocal_generic( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, local_idx: i32, level: u32, @@ -1623,34 +1669,33 @@ fn gen_setlocal_generic( let value_type = ctx.get_opnd_type(StackOpnd(0)); // Load environment pointer EP at level - gen_get_ep(cb, REG0, level); + let ep_opnd = gen_get_ep(asm, level); // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers // only affect heap objects being written. If we know an immediate value is being written we // can skip this check. if !value_type.is_imm() { // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG0, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); + asm.jnz(side_exit.into()); } // Pop the value to write from the stack let stack_top = ctx.stack_pop(1); - mov(cb, REG1, stack_top); // Write the value at the environment pointer let offs = -(SIZEOF_VALUE as i32 * local_idx); - mov(cb, mem_opnd(64, REG0, offs), REG1); + asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); KeepCompiling } @@ -1658,67 +1703,71 @@ fn gen_setlocal_generic( fn gen_setlocal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0).as_i32(); let level = jit_get_arg(jit, 1).as_u32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, level) + gen_setlocal_generic(jit, ctx, asm, ocb, idx, level) } fn gen_setlocal_wc1( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let idx = jit_get_arg(jit, 0).as_i32(); - gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1) + gen_setlocal_generic(jit, ctx, asm, ocb, idx, 1) } // new hash initialized from top N values fn gen_newhash( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let num: i64 = jit_get_arg(jit, 0).as_i64(); + let num: u64 = jit_get_arg(jit, 0).as_u64(); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); if num != 0 { // val = rb_hash_new_with_size(num / 2); - mov(cb, C_ARG_REGS[0], imm_opnd(num / 2)); - call_ptr(cb, REG0, rb_hash_new_with_size as *const u8); + let new_hash = asm.ccall( + rb_hash_new_with_size as *const u8, + vec![Opnd::UImm(num / 2)] + ); - // save the allocated hash as we want to push it after insertion - push(cb, RAX); - push(cb, RAX); // alignment + // Save the allocated hash as we want to push it after insertion + asm.cpush(new_hash); + asm.cpush(new_hash); // x86 alignment + + // Get a pointer to the values to insert into the hash + let stack_addr_from_top = asm.lea(ctx.stack_opnd((num - 1) as i32)); // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); - mov(cb, C_ARG_REGS[0], imm_opnd(num)); - lea( - cb, - C_ARG_REGS[1], - ctx.stack_opnd((num - 1).try_into().unwrap()), + asm.ccall( + rb_hash_bulk_insert as *const u8, + vec![ + Opnd::UImm(num), + stack_addr_from_top, + new_hash + ] ); - mov(cb, C_ARG_REGS[2], RAX); - call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8); - pop(cb, RAX); // alignment - pop(cb, RAX); + let new_hash = asm.cpop(); + asm.cpop_into(new_hash); // x86 alignment ctx.stack_pop(num.try_into().unwrap()); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_hash); } else { // val = rb_hash_new(); - call_ptr(cb, REG0, rb_hash_new as *const u8); - + let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); let stack_ret = ctx.stack_push(Type::Hash); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, new_hash); } KeepCompiling @@ -1727,20 +1776,21 @@ fn gen_newhash( fn gen_putstring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let put_val = jit_get_arg(jit, 0); // Save the PC and SP because the callee will allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - mov(cb, C_ARG_REGS[0], REG_EC); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val); - call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8); + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into()] + ); let stack_top = ctx.stack_push(Type::CString); - mov(cb, stack_top, RAX); + asm.mov(stack_top, str_opnd); KeepCompiling } @@ -1750,7 +1800,7 @@ fn gen_putstring( fn gen_checkkeyword( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // When a keyword is unspecified past index 32, a hash will be used @@ -1766,70 +1816,68 @@ fn gen_checkkeyword( let index: i64 = jit_get_arg(jit, 1).as_i64(); // Load environment pointer EP - gen_get_ep(cb, REG0, 0); + let ep_opnd = gen_get_ep(asm, 0); // VALUE kw_bits = *(ep - bits); - let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset); + let bits_opnd = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * -bits_offset); // unsigned int b = (unsigned int)FIX2ULONG(kw_bits); // if ((b & (0x01 << idx))) { // // We can skip the FIX2ULONG conversion by shifting the bit we test let bit_test: i64 = 0x01 << (index + 1); - test(cb, bits_opnd, imm_opnd(bit_test)); - mov(cb, REG0, uimm_opnd(Qfalse.into())); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmovz(cb, REG0, REG1); + asm.test(bits_opnd, Opnd::Imm(bit_test)); + let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, ret_opnd); KeepCompiling } fn gen_jnz_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jnz_ptr(cb, target0), + BranchShape::Default => asm.jnz(target0.into()), } } fn gen_jz_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jz_ptr(cb, target0), + BranchShape::Default => asm.jz(Target::CodePtr(target0)), } } fn gen_jbe_to_target0( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, ) { match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), - BranchShape::Default => jbe_ptr(cb, target0), + BranchShape::Default => asm.jbe(Target::CodePtr(target0)), } } // Generate a jump to a stub that recompiles the current YARV instruction on failure. -// When depth_limitk is exceeded, generate a jump to a side exit. +// When depth_limit is exceeded, generate a jump to a side exit. fn jit_chain_guard( jcc: JCCKinds, jit: &JITState, ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, depth_limit: i32, side_exit: CodePtr, @@ -1848,9 +1896,9 @@ fn jit_chain_guard( idx: jit.insn_idx, }; - gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn); + gen_branch(jit, ctx, asm, ocb, bid, &deeper, None, None, target0_gen_fn); } else { - target0_gen_fn(cb, side_exit, None, BranchShape::Default); + target0_gen_fn(asm, side_exit, None, BranchShape::Default); } } @@ -1871,13 +1919,13 @@ pub const SEND_MAX_DEPTH: i32 = 5; fn gen_set_ivar( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, recv: VALUE, ivar_name: ID, ) -> CodegenStatus { // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let val_opnd = ctx.stack_pop(1); @@ -1886,36 +1934,48 @@ fn gen_set_ivar( let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) }; // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value - mov(cb, C_ARG_REGS[0], recv_opnd); - mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into())); - mov(cb, C_ARG_REGS[2], val_opnd); - call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8); + let val = asm.ccall( + rb_vm_set_ivar_idx as *const u8, + vec![ + recv_opnd, + Opnd::Imm(ivar_index.into()), + val_opnd, + ], + ); let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, val); KeepCompiling } + + // Codegen for getting an instance variable. // Preconditions: -// - receiver is in REG0 // - receiver has the same class as CLASS_OF(comptime_receiver) // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled fn gen_get_ivar( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, max_chain_depth: i32, comptime_receiver: VALUE, ivar_name: ID, - reg0_opnd: InsnOpnd, + recv: Opnd, + recv_opnd: InsnOpnd, side_exit: CodePtr, ) -> CodegenStatus { let comptime_val_klass = comptime_receiver.class_of(); let starting_context = *ctx; // make a copy for use with jit_chain_guard + // If recv isn't already a register, load it. + let recv = match recv { + Opnd::Reg(_) => recv, + _ => asm.load(recv), + }; + // Check if the comptime class uses a custom allocator let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; let uses_custom_allocator = match custom_allocator { @@ -1936,46 +1996,26 @@ fn gen_get_ivar( if !receiver_t_object || uses_custom_allocator { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) - add_comment(cb, "call rb_ivar_get()"); + asm.comment("call rb_ivar_get()"); // The function could raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG1); + jit_prepare_routine_call(jit, ctx, asm); - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name)); - call_ptr(cb, REG1, rb_ivar_get as *const u8); + let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]); - if reg0_opnd != SelfOpnd { + if recv_opnd != SelfOpnd { ctx.stack_pop(1); } + // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, ivar_val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } - /* - // FIXME: - // This check was added because of a failure in a test involving the - // Nokogiri Document class where we see a T_DATA that still has the default - // allocator. - // Aaron Patterson argues that this is a bug in the C extension, because - // people could call .allocate() on the class and still get a T_OBJECT - // For now I added an extra dynamic check that the receiver is T_OBJECT - // so we can safely pass all the tests in Shopify Core. - // - // Guard that the receiver is T_OBJECT - // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK) - add_comment(cb, "guard receiver is T_OBJECT"); - mov(cb, REG1, member_opnd(REG0, struct RBasic, flags)); - and(cb, REG1, imm_opnd(RUBY_T_MASK)); - cmp(cb, REG1, imm_opnd(T_OBJECT)); - jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit); - */ - // FIXME: Mapping the index could fail when there is too many ivar names. If we're // compiling for a branch stub that can cause the exception to be thrown from the // wrong PC. @@ -1983,16 +2023,16 @@ fn gen_get_ivar( unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize(); // Pop receiver if it's on the temp stack - if reg0_opnd != SelfOpnd { + if recv_opnd != SelfOpnd { ctx.stack_pop(1); } if USE_RVARGC != 0 { // Check that the ivar table is big enough // Check that the slot is inside the ivar table (num_slots > index) - let num_slots = mem_opnd(32, REG0, ROBJECT_OFFSET_NUMIV); - cmp(cb, num_slots, uimm_opnd(ivar_index as u64)); - jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range)); + let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); + asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); + asm.jbe(counted_exit!(ocb, side_exit, getivar_idx_out_of_range).into()); } // Compile time self is embedded and the ivar index lands within the object @@ -2002,15 +2042,15 @@ fn gen_get_ivar( // Guard that self is embedded // TODO: BT and JC is shorter - add_comment(cb, "guard embedded getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); + asm.comment("guard embedded getivar"); + let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(ROBJECT_EMBED as u64)); let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); jit_chain_guard( JCC_JZ, jit, &starting_context, - cb, + asm, ocb, max_chain_depth, side_exit, @@ -2018,76 +2058,71 @@ fn gen_get_ivar( // Load the variable let offs = ROBJECT_OFFSET_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = mem_opnd(64, REG0, offs); - mov(cb, REG1, ivar_opnd); + let ivar_opnd = Opnd::mem(64, recv, offs); // Guard that the variable is not Qundef - cmp(cb, REG1, uimm_opnd(Qundef.into())); - mov(cb, REG0, uimm_opnd(Qnil.into())); - cmove(cb, REG1, REG0); + asm.cmp(ivar_opnd, Qundef.into()); + let out_val = asm.csel_e(Qnil.into(), ivar_opnd); // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG1); + asm.mov(out_opnd, out_val); } else { // Compile time value is *not* embedded. // Guard that value is *not* embedded // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h - add_comment(cb, "guard extended getivar"); - let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS); - test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64)); - let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); + asm.comment("guard extended getivar"); + let flags_opnd = Opnd::mem(64, recv, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, Opnd::UImm(ROBJECT_EMBED as u64)); + let megamorphic_side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, max_chain_depth, - side_exit, + megamorphic_side_exit, ); if USE_RVARGC == 0 { // Check that the extended table is big enough // Check that the slot is inside the extended table (num_slots > index) - let num_slots = mem_opnd(32, REG0, ROBJECT_OFFSET_NUMIV); - cmp(cb, num_slots, uimm_opnd(ivar_index as u64)); - jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range)); + let num_slots = Opnd::mem(32, recv, ROBJECT_OFFSET_NUMIV); + asm.cmp(num_slots, Opnd::UImm(ivar_index as u64)); + asm.jbe(counted_exit!(ocb, side_exit, getivar_idx_out_of_range).into()); } // Get a pointer to the extended table - let tbl_opnd = mem_opnd(64, REG0, ROBJECT_OFFSET_AS_HEAP_IVPTR); - mov(cb, REG0, tbl_opnd); + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR)); // Read the ivar from the extended table - let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32); - mov(cb, REG0, ivar_opnd); + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); // Check that the ivar is not Qundef - cmp(cb, REG0, uimm_opnd(Qundef.into())); - mov(cb, REG1, uimm_opnd(Qnil.into())); - cmove(cb, REG0, REG1); + asm.cmp(ivar_opnd, Qundef.into()); + let out_val = asm.csel_ne(ivar_opnd, Qnil.into()); // Push the ivar on the stack let out_opnd = ctx.stack_push(Type::Unknown); - mov(cb, out_opnd, REG0); + asm.mov(out_opnd, out_val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_getinstancevariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2100,14 +2135,14 @@ fn gen_getinstancevariable( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the receiver has the same class as the one from compile time. - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - + let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_val_klass, + self_asm_opnd, SelfOpnd, comptime_val, GET_IVAR_MAX_DEPTH, @@ -2117,11 +2152,12 @@ fn gen_getinstancevariable( gen_get_ivar( jit, ctx, - cb, + asm, ocb, GET_IVAR_MAX_DEPTH, comptime_val, ivar_name, + self_asm_opnd, SelfOpnd, side_exit, ) @@ -2130,7 +2166,7 @@ fn gen_getinstancevariable( fn gen_setinstancevariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let id = jit_get_arg(jit, 0); @@ -2138,23 +2174,22 @@ fn gen_setinstancevariable( // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let val_opnd = ctx.stack_pop(1); // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), + asm.ccall( + rb_vm_setinstancevariable as *const u8, + vec![ + Opnd::const_ptr(jit.iseq as *const u8), + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + Opnd::UImm(id.into()), + val_opnd, + Opnd::const_ptr(ic as *const u8), + ] ); - mov(cb, C_ARG_REGS[3], val_opnd); - mov(cb, C_ARG_REGS[2], uimm_opnd(id.into())); - mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8)); - let iseq = VALUE(jit.iseq as usize); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq); - call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8); KeepCompiling } @@ -2162,35 +2197,28 @@ fn gen_setinstancevariable( fn gen_defined( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { - let op_type = jit_get_arg(jit, 0); + let op_type = jit_get_arg(jit, 0).as_u64(); let obj = jit_get_arg(jit, 1); let pushval = jit_get_arg(jit, 2); // Save the PC and SP because the callee may allocate // Note that this modifies REG_SP, which is why we do it first - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Get the operands from the stack let v_opnd = ctx.stack_pop(1); // Call vm_defined(ec, reg_cfp, op_type, obj, v) - mov(cb, C_ARG_REGS[0], REG_EC); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into())); - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj); - mov(cb, C_ARG_REGS[4], v_opnd); - call_ptr(cb, REG0, rb_vm_defined as *const u8); + let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { // val = pushval; // } - jit_mov_gc_ptr(jit, cb, REG1, pushval); - cmp(cb, AL, imm_opnd(0)); - mov(cb, RAX, uimm_opnd(Qnil.into())); - cmovnz(cb, RAX, REG1); + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); // Push the return value onto the stack let out_type = if pushval.special_const_p() { @@ -2199,7 +2227,7 @@ fn gen_defined( Type::Unknown }; let stack_ret = ctx.stack_push(out_type); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, out_value); KeepCompiling } @@ -2207,7 +2235,7 @@ fn gen_defined( fn gen_checktype( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let type_val = jit_get_arg(jit, 0).as_u32(); @@ -2215,54 +2243,43 @@ fn gen_checktype( // Only three types are emitted by compile.c at the moment if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val { let val_type = ctx.get_opnd_type(StackOpnd(0)); - let val = ctx.stack_pop(1); + let val = asm.load(ctx.stack_pop(1)); // Check if we know from type information - match (type_val, val_type) { - (RUBY_T_STRING, Type::TString) - | (RUBY_T_STRING, Type::CString) - | (RUBY_T_ARRAY, Type::Array) - | (RUBY_T_HASH, Type::Hash) => { - // guaranteed type match - let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64())); - return KeepCompiling; - } - _ if val_type.is_imm() || val_type.is_specific() => { - // guaranteed not to match T_STRING/T_ARRAY/T_HASH - let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64())); - return KeepCompiling; - } + match val_type.known_value_type() { + Some(value_type) => { + if value_type == type_val { + jit_putobject(jit, ctx, asm, Qtrue); + return KeepCompiling; + } else { + jit_putobject(jit, ctx, asm, Qfalse); + return KeepCompiling; + } + }, _ => (), } - mov(cb, REG0, val); - mov(cb, REG1, uimm_opnd(Qfalse.as_u64())); - - let ret = cb.new_label("ret".to_string()); + let ret = asm.new_label("ret"); if !val_type.is_heap() { // if (SPECIAL_CONST_P(val)) { // Return Qfalse via REG1 if not on heap - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_label(cb, ret); - cmp(cb, REG0, uimm_opnd(Qnil.as_u64())); - jbe_label(cb, ret); + asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(ret); + asm.cmp(val, Opnd::UImm(Qnil.into())); + asm.jbe(ret); } // Check type on object - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64)); - cmp(cb, REG0, uimm_opnd(type_val as u64)); - mov(cb, REG0, uimm_opnd(Qtrue.as_u64())); - // REG1 contains Qfalse from above - cmove(cb, REG1, REG0); - - cb.write_label(ret); + let object_type = asm.and( + Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::UImm(RUBY_T_MASK.into())); + asm.cmp(object_type, Opnd::UImm(type_val.into())); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + asm.write_label(ret); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG1); - cb.link_labels(); + asm.mov(stack_ret, ret_opnd); KeepCompiling } else { @@ -2273,48 +2290,49 @@ fn gen_checktype( fn gen_concatstrings( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let n = jit_get_arg(jit, 0); // Save the PC and SP because we are allocating - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize())); + let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize()))); // call rb_str_concat_literals(long n, const VALUE *strings); - mov(cb, C_ARG_REGS[0], imm_opnd(n.into())); - lea(cb, C_ARG_REGS[1], values_ptr); - call_ptr(cb, REG0, rb_str_concat_literals as *const u8); + let return_value = asm.ccall( + rb_str_concat_literals as *const u8, + vec![Opnd::UImm(n.into()), values_ptr] + ); ctx.stack_pop(n.as_usize()); let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, return_value); KeepCompiling } -fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) { +fn guard_two_fixnums(ctx: &mut Context, asm: &mut Assembler, side_exit: CodePtr) { // Get the stack operand types let arg1_type = ctx.get_opnd_type(StackOpnd(0)); let arg0_type = ctx.get_opnd_type(StackOpnd(1)); if arg0_type.is_heap() || arg1_type.is_heap() { - add_comment(cb, "arg is heap object"); - jmp_ptr(cb, side_exit); + asm.comment("arg is heap object"); + asm.jmp(side_exit.into()); return; } if arg0_type != Type::Fixnum && arg0_type.is_specific() { - add_comment(cb, "arg0 not fixnum"); - jmp_ptr(cb, side_exit); + asm.comment("arg0 not fixnum"); + asm.jmp(side_exit.into()); return; } if arg1_type != Type::Fixnum && arg1_type.is_specific() { - add_comment(cb, "arg1 not fixnum"); - jmp_ptr(cb, side_exit); + asm.comment("arg1 not fixnum"); + asm.jmp(side_exit.into()); return; } @@ -2329,14 +2347,14 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) // If not fixnums, fall back if arg0_type != Type::Fixnum { - add_comment(cb, "guard arg0 fixnum"); - test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm.comment("guard arg0 fixnum"); + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.jz(side_exit.into()); } if arg1_type != Type::Fixnum { - add_comment(cb, "guard arg1 fixnum"); - test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, side_exit); + asm.comment("guard arg1 fixnum"); + asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.jz(side_exit.into()); } // Set stack types in context @@ -2345,18 +2363,18 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) } // Conditional move operation used by comparison operators -type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> (); +type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd; fn gen_fixnum_cmp( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cmov_op: CmovFn, ) -> CodegenStatus { // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2373,63 +2391,60 @@ fn gen_fixnum_cmp( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Compare the arguments - xor(cb, REG0_32, REG0_32); // REG0 = Qfalse - mov(cb, REG1, arg0); - cmp(cb, REG1, arg1); - mov(cb, REG1, uimm_opnd(Qtrue.into())); - cmov_op(cb, REG0, REG1); + asm.cmp(arg0, arg1); + let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); // Push the output on the stack let dst = ctx.stack_push(Type::Unknown); - mov(cb, dst, REG0); + asm.mov(dst, bool_opnd); KeepCompiling } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_lt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_l) } fn gen_opt_le( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_le) } fn gen_opt_ge( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_ge) } fn gen_opt_gt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg) + gen_fixnum_cmp(jit, ctx, asm, ocb, Assembler::csel_g) } // Implements specialized equality for either two fixnum or two strings @@ -2437,7 +2452,7 @@ fn gen_opt_gt( fn gen_equality_specialized( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, side_exit: CodePtr, ) -> bool { @@ -2453,19 +2468,16 @@ fn gen_equality_specialized( return false; } - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); - mov(cb, REG0, a_opnd); - cmp(cb, REG0, b_opnd); + asm.cmp(a_opnd, b_opnd); - mov(cb, REG0, imm_opnd(Qfalse.into())); - mov(cb, REG1, imm_opnd(Qtrue.into())); - cmove(cb, REG0, REG1); + let val = asm.csel_ne(Qfalse.into(), Qtrue.into()); // Push the output on the stack ctx.stack_pop(2); let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, REG0); + asm.mov(dst, val); true } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } @@ -2475,43 +2487,39 @@ fn gen_equality_specialized( return false; } - // Load a and b in preparation for call later - mov(cb, C_ARG_REGS[0], a_opnd); - mov(cb, C_ARG_REGS[1], b_opnd); - // Guard that a is a String - mov(cb, REG0, C_ARG_REGS[0]); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cString }, + a_opnd, StackOpnd(1), comptime_a, SEND_MAX_DEPTH, side_exit, ); - let ret = cb.new_label("ret".to_string()); + let equal = asm.new_label("equal"); + let ret = asm.new_label("ret"); // If they are equal by identity, return true - cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]); - mov(cb, RAX, imm_opnd(Qtrue.into())); - je_label(cb, ret); + asm.cmp(a_opnd, b_opnd); + asm.je(equal); // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) let btype = ctx.get_opnd_type(StackOpnd(0)); - if btype != Type::TString && btype != Type::CString { - mov(cb, REG0, C_ARG_REGS[1]); + if btype.known_value_type() != Some(RUBY_T_STRING) { // Note: any T_STRING is valid here, but we check for a ::String for simplicity // To pass a mutable static variable (rb_cString) requires an unsafe block jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cString }, + b_opnd, StackOpnd(0), comptime_b, SEND_MAX_DEPTH, @@ -2520,14 +2528,18 @@ fn gen_equality_specialized( } // Call rb_str_eql_internal(a, b) - call_ptr(cb, REG0, rb_str_eql_internal as *const u8); + let val = asm.ccall(rb_str_eql_internal as *const u8, vec![a_opnd, b_opnd]); // Push the output on the stack - cb.write_label(ret); ctx.stack_pop(2); let dst = ctx.stack_push(Type::UnknownImm); - mov(cb, dst, RAX); - cb.link_labels(); + asm.mov(dst, val); + asm.jmp(ret); + + asm.write_label(equal); + asm.mov(dst, Qtrue.into()); + + asm.write_label(ret); true } else { @@ -2538,42 +2550,42 @@ fn gen_equality_specialized( fn gen_opt_eq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); - if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) { - jump_to_next_insn(jit, ctx, cb, ocb); + if gen_equality_specialized(jit, ctx, asm, ocb, side_exit) { + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_neq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // opt_neq is passed two rb_call_data as arguments: // first for ==, second for != let cd = jit_get_arg(jit, 1).as_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, None); + return gen_send_general(jit, ctx, asm, ocb, cd, None); } fn gen_opt_aref( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); @@ -2581,13 +2593,13 @@ fn gen_opt_aref( // Only JIT one arg calls like `ary[6]` if argc != 1 { - gen_counter_incr!(cb, oaref_argc_not_one); + gen_counter_incr!(asm, oaref_argc_not_one); return CantCompile; } // Defer compilation so we can specialize base on a runtime receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2609,109 +2621,103 @@ fn gen_opt_aref( // Pop the stack operands let idx_opnd = ctx.stack_pop(1); let recv_opnd = ctx.stack_pop(1); - mov(cb, REG0, recv_opnd); + let recv_reg = asm.load(recv_opnd); // if (SPECIAL_CONST_P(recv)) { // Bail if receiver is not a heap object - test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qfalse.into())); - je_ptr(cb, side_exit); - cmp(cb, REG0, uimm_opnd(Qnil.into())); - je_ptr(cb, side_exit); + asm.test(recv_reg, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(side_exit.into()); + asm.cmp(recv_reg, Qfalse.into()); + asm.je(side_exit.into()); + asm.cmp(recv_reg, Qnil.into()); + asm.je(side_exit.into()); // Bail if recv has a class other than ::Array. // BOP_AREF check above is only good for ::Array. - mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS)); - mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into())); - cmp(cb, REG0, REG1); + asm.cmp(unsafe { rb_cArray }.into(), Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_KLASS)); jit_chain_guard( JCC_JNE, jit, &starting_context, - cb, + asm, ocb, OPT_AREF_MAX_CHAIN_DEPTH, side_exit, ); // Bail if idx is not a FIXNUM - mov(cb, REG1, idx_opnd); - test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64)); - jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum)); + let idx_reg = asm.load(idx_opnd); + asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into()); + asm.jz(counted_exit!(ocb, side_exit, oaref_arg_not_fixnum).into()); // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. { - mov(cb, RDI, recv_opnd); - sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int - mov(cb, RSI, REG1); - call_ptr(cb, REG0, rb_ary_entry_internal as *const u8); + let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int + let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); } // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } else if comptime_recv.class_of() == unsafe { rb_cHash } { if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { return CantCompile; } - let key_opnd = ctx.stack_opnd(0); let recv_opnd = ctx.stack_opnd(1); // Guard that the receiver is a hash - mov(cb, REG0, recv_opnd); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cHash }, + recv_opnd, StackOpnd(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit, ); - // Setup arguments for rb_hash_aref(). - mov(cb, C_ARG_REGS[0], REG0); - mov(cb, C_ARG_REGS[1], key_opnd); - // Prepare to call rb_hash_aref(). It might call #hash on the key. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_hash_aref as *const u8); + // Call rb_hash_aref + let key_opnd = ctx.stack_opnd(0); + let recv_opnd = ctx.stack_opnd(1); + let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]); // Pop the key and the receiver ctx.stack_pop(2); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Jump to next instruction. This allows guard chains to share the same successor. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { // General case. Call the [] method. - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_aset( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2727,13 +2733,13 @@ fn gen_opt_aset( let side_exit = get_side_exit(jit, ocb, ctx); // Guard receiver is an Array - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cArray }, + recv, StackOpnd(2), comptime_recv, SEND_MAX_DEPTH, @@ -2741,89 +2747,87 @@ fn gen_opt_aset( ); // Guard key is a fixnum - mov(cb, REG0, key); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cInteger }, + key, StackOpnd(1), comptime_key, SEND_MAX_DEPTH, side_exit, ); - // Call rb_ary_store - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key) - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_ary_store as *const u8); + // Call rb_ary_store + let recv = ctx.stack_opnd(2); + let key = asm.load(ctx.stack_opnd(1)); + let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key) + let val = ctx.stack_opnd(0); + asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]); // rb_ary_store returns void // stored value should still be on stack - mov(cb, REG0, ctx.stack_opnd(0)); + let val = asm.load(ctx.stack_opnd(0)); // Push the return value onto the stack ctx.stack_pop(3); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } else if comptime_recv.class_of() == unsafe { rb_cHash } { let side_exit = get_side_exit(jit, ocb, ctx); // Guard receiver is a Hash - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, unsafe { rb_cHash }, + recv, StackOpnd(2), comptime_recv, SEND_MAX_DEPTH, side_exit, ); - // Call rb_hash_aset - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], key); - mov(cb, C_ARG_REGS[2], val); - // We might allocate or raise - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_hash_aset as *const u8); + // Call rb_hash_aset + let recv = ctx.stack_opnd(2); + let key = ctx.stack_opnd(1); + let val = ctx.stack_opnd(0); + let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]); // Push the return value onto the stack ctx.stack_pop(3); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } else { - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_and( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2840,36 +2844,35 @@ fn gen_opt_and( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Do the bitwise and arg0 & arg1 - mov(cb, REG0, arg0); - and(cb, REG0, arg1); + let val = asm.and(arg0, arg1); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_or( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2886,36 +2889,35 @@ fn gen_opt_or( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Do the bitwise or arg0 | arg1 - mov(cb, REG0, arg0); - or(cb, REG0, arg1); + let val = asm.or(arg0, arg1); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_minus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -2932,58 +2934,57 @@ fn gen_opt_minus( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); // Subtract arg0 - arg1 and test for overflow - mov(cb, REG0, arg0); - sub(cb, REG0, arg1); - jo_ptr(cb, side_exit); - add(cb, REG0, imm_opnd(1)); + let val_untag = asm.sub(arg0, arg1); + asm.jo(side_exit.into()); + let val = asm.add(val_untag, Opnd::Imm(1)); // Push the output on the stack let dst = ctx.stack_push(Type::Fixnum); - mov(cb, dst, REG0); + asm.store(dst, val); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_mult( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_div( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_mod( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Defer compilation so we can specialize on a runtime `self` if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -3000,77 +3001,75 @@ fn gen_opt_mod( } // Check that both operands are fixnums - guard_two_fixnums(ctx, cb, side_exit); + guard_two_fixnums(ctx, asm, side_exit); // Get the operands and destination from the stack let arg1 = ctx.stack_pop(1); let arg0 = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], arg0); - mov(cb, C_ARG_REGS[1], arg1); - // Check for arg0 % 0 - cmp(cb, C_ARG_REGS[1], imm_opnd(VALUE::fixnum_from_usize(0).as_i64())); - je_ptr(cb, side_exit); + asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64())); + asm.je(side_exit.into()); // Call rb_fix_mod_fix(VALUE recv, VALUE obj) - call_ptr(cb, REG0, rb_fix_mod_fix as *const u8); + let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); KeepCompiling } else { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } } fn gen_opt_ltlt( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_nil_p( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_empty_p( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } fn gen_opt_succ( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Delegate to send, call the method on the recv - gen_opt_send_without_block(jit, ctx, cb, ocb) + gen_opt_send_without_block(jit, ctx, asm, ocb) } + fn gen_opt_str_freeze( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { @@ -3078,11 +3077,10 @@ fn gen_opt_str_freeze( } let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, str.into()); KeepCompiling } @@ -3090,7 +3088,7 @@ fn gen_opt_str_freeze( fn gen_opt_str_uminus( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { @@ -3098,11 +3096,10 @@ fn gen_opt_str_uminus( } let str = jit_get_arg(jit, 0); - jit_mov_gc_ptr(jit, cb, REG0, str); // Push the return value onto the stack let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, str.into()); KeepCompiling } @@ -3110,43 +3107,43 @@ fn gen_opt_str_uminus( fn gen_opt_not( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_size( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_length( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_regexpmatch2( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { - return gen_opt_send_without_block(jit, ctx, cb, ocb); + return gen_opt_send_without_block(jit, ctx, asm, ocb); } fn gen_opt_case_dispatch( _jit: &mut JITState, ctx: &mut Context, - _cb: &mut CodeBlock, + _asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Normally this instruction would lookup the key in a hash and jump to an @@ -3163,7 +3160,7 @@ fn gen_opt_case_dispatch( } fn gen_branchif_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, @@ -3171,14 +3168,14 @@ fn gen_branchif_branch( assert!(target1 != None); match shape { BranchShape::Next0 => { - jz_ptr(cb, target1.unwrap()); + asm.jz(target1.unwrap().into()); } BranchShape::Next1 => { - jnz_ptr(cb, target0); + asm.jnz(target0.into()); } BranchShape::Default => { - jnz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.jnz(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3186,7 +3183,7 @@ fn gen_branchif_branch( fn gen_branchif( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3194,14 +3191,14 @@ fn gen_branchif( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if any bit (outside of the Qnil bit) is on // RUBY_Qfalse /* ...0000 0000 */ // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); + asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64())); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit); @@ -3219,7 +3216,7 @@ fn gen_branchif( gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3232,17 +3229,17 @@ fn gen_branchif( } fn gen_branchunless_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, ) { match shape { - BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()), - BranchShape::Next1 => jz_ptr(cb, target0), + BranchShape::Next0 => asm.jnz(target1.unwrap().into()), + BranchShape::Next1 => asm.jz(target0.into()), BranchShape::Default => { - jz_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.jz(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3250,7 +3247,7 @@ fn gen_branchunless_branch( fn gen_branchunless( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3258,14 +3255,15 @@ fn gen_branchunless( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if any bit (outside of the Qnil bit) is on // RUBY_Qfalse /* ...0000 0000 */ // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - test(cb, val_opnd, imm_opnd(!Qnil.as_i64())); + let not_qnil = !Qnil.as_i64(); + asm.test(val_opnd, not_qnil.into()); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit) as i32; @@ -3283,7 +3281,7 @@ fn gen_branchunless( gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3296,17 +3294,17 @@ fn gen_branchunless( } fn gen_branchnil_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape, ) { match shape { - BranchShape::Next0 => jne_ptr(cb, target1.unwrap()), - BranchShape::Next1 => je_ptr(cb, target0), + BranchShape::Next0 => asm.jne(target1.unwrap().into()), + BranchShape::Next1 => asm.je(target0.into()), BranchShape::Default => { - je_ptr(cb, target0); - jmp_ptr(cb, target1.unwrap()); + asm.je(target0.into()); + asm.jmp(target1.unwrap().into()); } } } @@ -3314,7 +3312,7 @@ fn gen_branchnil_branch( fn gen_branchnil( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3322,13 +3320,13 @@ fn gen_branchnil( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Test if the value is Qnil // RUBY_Qnil /* ...0000 1000 */ let val_opnd = ctx.stack_pop(1); - cmp(cb, val_opnd, uimm_opnd(Qnil.into())); + asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); // Get the branch target instruction offsets let next_idx = jit_next_insn_idx(jit) as i32; @@ -3346,7 +3344,7 @@ fn gen_branchnil( gen_branch( jit, ctx, - cb, + asm, ocb, jump_block, ctx, @@ -3361,7 +3359,7 @@ fn gen_branchnil( fn gen_jump( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0).as_i32(); @@ -3369,7 +3367,7 @@ fn gen_jump( // Check for interrupts, but only on backward branches that may create loops if jump_offset < 0 { let side_exit = get_side_exit(jit, ocb, ctx); - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); } // Get the branch target instruction offsets @@ -3380,7 +3378,7 @@ fn gen_jump( }; // Generate the jump instruction - gen_direct_jump(jit, ctx, jump_block, cb); + gen_direct_jump(jit, ctx, jump_block, asm); EndBlock } @@ -3391,13 +3389,13 @@ fn gen_jump( /// the guard generated for one will fail for the other. /// /// Recompile as contingency if possible, or take side exit a last resort. - fn jit_guard_known_klass( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, known_klass: VALUE, + obj_opnd: Opnd, insn_opnd: InsnOpnd, sample_instance: VALUE, max_chain_depth: i32, @@ -3405,52 +3403,48 @@ fn jit_guard_known_klass( ) { let val_type = ctx.get_opnd_type(insn_opnd); + if val_type.known_class() == Some(known_klass) { + // We already know from type information that this is a match + return; + } + if unsafe { known_klass == rb_cNilClass } { assert!(!val_type.is_heap()); - if val_type != Type::Nil { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is nil"); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is nil"); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Nil); - } + ctx.upgrade_opnd_type(insn_opnd, Type::Nil); } else if unsafe { known_klass == rb_cTrueClass } { assert!(!val_type.is_heap()); - if val_type != Type::True { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is true"); - cmp(cb, REG0, imm_opnd(Qtrue.into())); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is true"); + asm.cmp(obj_opnd, Qtrue.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::True); - } + ctx.upgrade_opnd_type(insn_opnd, Type::True); } else if unsafe { known_klass == rb_cFalseClass } { assert!(!val_type.is_heap()); - if val_type != Type::False { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is false"); - assert!(Qfalse.as_i32() == 0); - test(cb, REG0, REG0); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is false"); + assert!(Qfalse.as_i32() == 0); + asm.test(obj_opnd, obj_opnd); + jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::False); - } + ctx.upgrade_opnd_type(insn_opnd, Type::False); } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { - assert!(!val_type.is_heap()); // We will guard fixnum and bignum as though they were separate classes // BIGNUM can be handled by the general else case below - if val_type != Type::Fixnum || !val_type.is_imm() { - assert!(val_type.is_unknown()); + assert!(val_type.is_unknown()); - add_comment(cb, "guard object is fixnum"); - test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64)); - jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); - } + asm.comment("guard object is fixnum"); + asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + jit_chain_guard(JCC_JZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); + ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { assert!(!val_type.is_heap()); // We will guard STATIC vs DYNAMIC as though they were separate classes @@ -3458,10 +3452,11 @@ fn jit_guard_known_klass( if val_type != Type::ImmSymbol || !val_type.is_imm() { assert!(val_type.is_unknown()); - add_comment(cb, "guard object is static symbol"); + asm.comment("guard object is static symbol"); assert!(RUBY_SPECIAL_SHIFT == 8); - cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(0xf)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); } } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { @@ -3470,11 +3465,10 @@ fn jit_guard_known_klass( assert!(val_type.is_unknown()); // We will guard flonum vs heap float as though they were separate classes - add_comment(cb, "guard object is flonum"); - mov(cb, REG1, REG0); - and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64)); - cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64)); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard object is flonum"); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); } } else if unsafe { @@ -3491,11 +3485,9 @@ fn jit_guard_known_klass( // that its singleton class is empty, so we can't avoid the memory // access. As an example, `Object.new.singleton_class` is an object in // this situation. - add_comment(cb, "guard known object with singleton class"); - // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object. - jit_mov_gc_ptr(jit, cb, REG1, sample_instance); - cmp(cb, REG0, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard known object with singleton class"); + asm.cmp(obj_opnd, sample_instance.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { // guard elided because the context says we've already checked unsafe { @@ -3507,24 +3499,28 @@ fn jit_guard_known_klass( // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. if !val_type.is_heap() { - add_comment(cb, "guard not immediate"); + asm.comment("guard not immediate"); assert!(Qfalse.as_i32() < Qnil.as_i32()); - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64)); - jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit); - cmp(cb, REG0, imm_opnd(Qnil.into())); - jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.test(obj_opnd, Opnd::Imm(RUBY_IMMEDIATE_MASK as i64)); + jit_chain_guard(JCC_JNZ, jit, ctx, asm, ocb, max_chain_depth, side_exit); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JBE, jit, ctx, asm, ocb, max_chain_depth, side_exit); ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); } - let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS); + // If obj_opnd isn't already a register, load it. + let obj_opnd = match obj_opnd { + Opnd::Reg(_) => obj_opnd, + _ => asm.load(obj_opnd), + }; + let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS); // Bail if receiver class is different from known_klass // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class. - add_comment(cb, "guard known class"); - jit_mov_gc_ptr(jit, cb, REG1, known_klass); - cmp(cb, klass_opnd, REG1); - jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit); + asm.comment("guard known class"); + asm.cmp(klass_opnd, known_klass.into()); + jit_chain_guard(JCC_JNE, jit, ctx, asm, ocb, max_chain_depth, side_exit); if known_klass == unsafe { rb_cString } { ctx.upgrade_opnd_type(insn_opnd, Type::CString); @@ -3536,28 +3532,25 @@ fn jit_guard_known_klass( // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). fn jit_protected_callee_ancestry_guard( jit: &mut JITState, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cme: *const rb_callable_method_entry_t, side_exit: CodePtr, ) { // See vm_call_method(). - mov( - cb, - C_ARG_REGS[0], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); let def_class = unsafe { (*cme).defined_class }; - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class); // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise. // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); - call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8); - test(cb, RAX, RAX); - jz_ptr( - cb, - counted_exit!(ocb, side_exit, send_se_protected_check_failed), + let val = asm.ccall( + rb_obj_is_kind_of as *mut u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + def_class.into(), + ], ); + asm.test(val, val); + asm.jz(counted_exit!(ocb, side_exit, send_se_protected_check_failed).into()) } // Codegen for rb_obj_not(). @@ -3566,7 +3559,7 @@ fn jit_protected_callee_ancestry_guard( fn jit_rb_obj_not( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3576,23 +3569,25 @@ fn jit_rb_obj_not( ) -> bool { let recv_opnd = ctx.get_opnd_type(StackOpnd(0)); - if recv_opnd == Type::Nil || recv_opnd == Type::False { - add_comment(cb, "rb_obj_not(nil_or_false)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::True); - mov(cb, out_opnd, uimm_opnd(Qtrue.into())); - } else if recv_opnd.is_heap() || recv_opnd.is_specific() { - // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. - add_comment(cb, "rb_obj_not(truthy)"); - ctx.stack_pop(1); - let out_opnd = ctx.stack_push(Type::False); - mov(cb, out_opnd, uimm_opnd(Qfalse.into())); - } else { - // jit_guard_known_klass() already ran on the receiver which should - // have deduced deduced the type of the receiver. This case should be - // rare if not unreachable. - return false; + match recv_opnd.known_truthy() { + Some(false) => { + asm.comment("rb_obj_not(nil_or_false)"); + ctx.stack_pop(1); + let out_opnd = ctx.stack_push(Type::True); + asm.mov(out_opnd, Qtrue.into()); + }, + Some(true) => { + // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. + asm.comment("rb_obj_not(truthy)"); + ctx.stack_pop(1); + let out_opnd = ctx.stack_push(Type::False); + asm.mov(out_opnd, Qfalse.into()); + }, + _ => { + return false; + }, } + true } @@ -3600,7 +3595,7 @@ fn jit_rb_obj_not( fn jit_rb_true( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3608,10 +3603,10 @@ fn jit_rb_true( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "nil? == true"); + asm.comment("nil? == true"); ctx.stack_pop(1); let stack_ret = ctx.stack_push(Type::True); - mov(cb, stack_ret, uimm_opnd(Qtrue.into())); + asm.mov(stack_ret, Qtrue.into()); true } @@ -3619,7 +3614,7 @@ fn jit_rb_true( fn jit_rb_false( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3627,10 +3622,10 @@ fn jit_rb_false( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "nil? == false"); + asm.comment("nil? == false"); ctx.stack_pop(1); let stack_ret = ctx.stack_push(Type::False); - mov(cb, stack_ret, uimm_opnd(Qfalse.into())); + asm.mov(stack_ret, Qfalse.into()); true } @@ -3639,7 +3634,7 @@ fn jit_rb_false( fn jit_rb_obj_equal( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3647,18 +3642,15 @@ fn jit_rb_obj_equal( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "equal?"); + asm.comment("equal?"); let obj1 = ctx.stack_pop(1); let obj2 = ctx.stack_pop(1); - mov(cb, REG0, obj1); - cmp(cb, REG0, obj2); - mov(cb, REG0, uimm_opnd(Qtrue.into())); - mov(cb, REG1, uimm_opnd(Qfalse.into())); - cmovne(cb, REG0, REG1); + asm.cmp(obj1, obj2); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); let stack_ret = ctx.stack_push(Type::UnknownImm); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, ret_opnd); true } @@ -3666,7 +3658,7 @@ fn jit_rb_obj_equal( fn jit_rb_str_uplus( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3675,35 +3667,33 @@ fn jit_rb_str_uplus( _known_recv_class: *const VALUE, ) -> bool { - let recv = ctx.stack_pop(1); + asm.comment("Unary plus on string"); + let recv_opnd = asm.load(ctx.stack_pop(1)); + let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64)); - add_comment(cb, "Unary plus on string"); - mov(cb, REG0, recv); - mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - test(cb, REG1, imm_opnd(RUBY_FL_FREEZE as i64)); + let ret_label = asm.new_label("stack_ret"); - let ret_label = cb.new_label("stack_ret".to_string()); - // If the string isn't frozen, we just return it. It's already in REG0. - jz_label(cb, ret_label); + // We guard for the receiver being a ::String, so the return value is too + let stack_ret = ctx.stack_push(Type::CString); - // Str is frozen - duplicate - mov(cb, C_ARG_REGS[0], REG0); - call_ptr(cb, REG0, rb_str_dup as *const u8); - // Return value is in REG0, drop through and return it. + // If the string isn't frozen, we just return it. + asm.mov(stack_ret, recv_opnd); + asm.jz(ret_label); - cb.write_label(ret_label); - // We guard for an exact-class match on the receiver of rb_cString - let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, REG0); + // Str is frozen - duplicate it + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + asm.mov(stack_ret, ret_opnd); + + asm.write_label(ret_label); - cb.link_labels(); true } fn jit_rb_str_bytesize( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3711,14 +3701,13 @@ fn jit_rb_str_bytesize( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "String#bytesize"); + asm.comment("String#bytesize"); let recv = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], recv); - call_ptr(cb, REG0, rb_str_bytesize as *const u8); + let ret_opnd = asm.ccall(rb_str_bytesize as *const u8, vec![recv]); let out_opnd = ctx.stack_push(Type::Fixnum); - mov(cb, out_opnd, RAX); + asm.mov(out_opnd, ret_opnd); true } @@ -3730,7 +3719,7 @@ fn jit_rb_str_bytesize( fn jit_rb_str_to_s( _jit: &mut JITState, _ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3739,7 +3728,7 @@ fn jit_rb_str_to_s( known_recv_class: *const VALUE, ) -> bool { if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } { - add_comment(cb, "to_s on plain string"); + asm.comment("to_s on plain string"); // The method returns the receiver, which is already on the stack. // No stack movement. return true; @@ -3747,13 +3736,13 @@ fn jit_rb_str_to_s( false } -// Codegen for rb_str_concat() +// Codegen for rb_str_concat() -- *not* String#concat // Frequently strings are concatenated using "out_str << next_str". // This is common in Erb and similar templating languages. fn jit_rb_str_concat( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3761,14 +3750,12 @@ fn jit_rb_str_concat( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { + // The << operator can accept integer codepoints for characters + // as the argument. We only specially optimise string arguments. + // If the peeked-at compile time argument is something other than + // a string, assume it won't be a string later either. let comptime_arg = jit_peek_at_stack(jit, ctx, 0); - let comptime_arg_type = ctx.get_opnd_type(StackOpnd(0)); - - // String#<< can take an integer codepoint as an argument, but we don't optimise that. - // Also, a non-string argument would have to call .to_str on itself before being treated - // as a string, and that would require saving pc/sp, which we don't do here. - // TODO: figure out how we should optimise a string-subtype argument here - if comptime_arg_type != Type::CString && comptime_arg.class_of() != unsafe { rb_cString } { + if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } { return false; } @@ -3776,65 +3763,66 @@ fn jit_rb_str_concat( let side_exit = get_side_exit(jit, ocb, ctx); // Guard that the argument is of class String at runtime. - let arg_opnd = ctx.stack_opnd(0); - mov(cb, REG0, arg_opnd); - jit_guard_known_klass( - jit, - ctx, - cb, - ocb, - unsafe { rb_cString }, - StackOpnd(0), - comptime_arg, - SEND_MAX_DEPTH, - side_exit, - ); + let arg_type = ctx.get_opnd_type(StackOpnd(0)); let concat_arg = ctx.stack_pop(1); let recv = ctx.stack_pop(1); + // If we're not compile-time certain that this will always be a string, guard at runtime + if arg_type != Type::CString && arg_type != Type::TString { + let arg_opnd = asm.load(concat_arg); + if !arg_type.is_heap() { + asm.comment("guard arg not immediate"); + asm.test(arg_opnd, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(side_exit.into()); + asm.cmp(arg_opnd, Qnil.into()); + asm.jbe(side_exit.into()); + } + guard_object_is_string(asm, arg_opnd, side_exit); + } + // Test if string encodings differ. If different, use rb_str_append. If the same, // use rb_yjit_str_simple_append, which calls rb_str_cat. - add_comment(cb, "<< on strings"); - - // Both rb_str_append and rb_yjit_str_simple_append take identical args - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], concat_arg); + asm.comment("<< on strings"); // Take receiver's object flags XOR arg's flags. If any // string-encoding flags are different between the two, // the encodings don't match. - mov(cb, REG0, recv); - mov(cb, REG1, concat_arg); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS)); - xor(cb, REG0, mem_opnd(64, REG1, RUBY_OFFSET_RBASIC_FLAGS)); - test(cb, REG0, uimm_opnd(RUBY_ENCODING_MASK as u64)); + let recv_reg = asm.load(recv); + let concat_arg_reg = asm.load(concat_arg); + let flags_xor = asm.xor( + Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::mem(64, concat_arg_reg, RUBY_OFFSET_RBASIC_FLAGS) + ); + asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); - let enc_mismatch = cb.new_label("enc_mismatch".to_string()); - jne_label(cb, enc_mismatch); + // Push once, use the resulting operand in both branches below. + let stack_ret = ctx.stack_push(Type::CString); + + let enc_mismatch = asm.new_label("enc_mismatch"); + asm.jnz(enc_mismatch); // If encodings match, call the simple append function and jump to return - call_ptr(cb, REG0, rb_yjit_str_simple_append as *const u8); - let ret_label: usize = cb.new_label("stack_return".to_string()); - jmp_label(cb, ret_label); + let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); + let ret_label = asm.new_label("func_return"); + asm.mov(stack_ret, ret_opnd); + asm.jmp(ret_label); // If encodings are different, use a slower encoding-aware concatenate - cb.write_label(enc_mismatch); - call_ptr(cb, REG0, rb_str_buf_append as *const u8); + asm.write_label(enc_mismatch); + let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + asm.mov(stack_ret, ret_opnd); // Drop through to return - cb.write_label(ret_label); - let stack_ret = ctx.stack_push(Type::CString); - mov(cb, stack_ret, RAX); + asm.write_label(ret_label); - cb.link_labels(); true } fn jit_thread_s_current( _jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, _ci: *const rb_callinfo, _cme: *const rb_callable_method_entry_t, @@ -3842,19 +3830,17 @@ fn jit_thread_s_current( _argc: i32, _known_recv_class: *const VALUE, ) -> bool { - add_comment(cb, "Thread.current"); + asm.comment("Thread.current"); ctx.stack_pop(1); // ec->thread_ptr - let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR); - mov(cb, REG0, ec_thread_ptr); + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); // thread->self - let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF); - mov(cb, REG0, thread_self); + let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); let stack_ret = ctx.stack_push(Type::UnknownHeap); - mov(cb, stack_ret, REG0); + asm.mov(stack_ret, thread_self); true } @@ -3893,7 +3879,7 @@ unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> V fn gen_send_cfunc( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -3906,7 +3892,7 @@ fn gen_send_cfunc( // If the function expects a Ruby array of arguments if cfunc_argc < 0 && cfunc_argc != -1 { - gen_counter_incr!(cb, send_cfunc_ruby_array_varg); + gen_counter_incr!(asm, send_cfunc_ruby_array_varg); return CantCompile; } @@ -3927,19 +3913,19 @@ fn gen_send_cfunc( // If the argument count doesn't match if cfunc_argc >= 0 && cfunc_argc != passed_argc { - gen_counter_incr!(cb, send_cfunc_argc_mismatch); + gen_counter_incr!(asm, send_cfunc_argc_mismatch); return CantCompile; } // Don't JIT functions that need C stack arguments for now - if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) { - gen_counter_incr!(cb, send_cfunc_toomany_args); + if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { + gen_counter_incr!(asm, send_cfunc_toomany_args); return CantCompile; } if c_method_tracing_currently_enabled(jit) { // Don't JIT if tracing c_call or c_return - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } @@ -3947,16 +3933,10 @@ fn gen_send_cfunc( if kw_arg.is_null() { let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def }); if let Some(known_cfunc_codegen) = codegen_p { - let start_pos = cb.get_write_ptr().raw_ptr() as usize; - if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) { - let written_bytes = cb.get_write_ptr().raw_ptr() as usize - start_pos; - if written_bytes < JUMP_SIZE_IN_BYTES { - add_comment(cb, "Writing NOPs to leave room for later invalidation code"); - nop(cb, (JUMP_SIZE_IN_BYTES - written_bytes) as u32); - } + if known_cfunc_codegen(jit, ctx, asm, ocb, ci, cme, block, argc, recv_known_klass) { // cfunc codegen generated code. Terminate the block so // there isn't multiple calls in the same block. - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); return EndBlock; } } @@ -3966,57 +3946,49 @@ fn gen_send_cfunc( let side_exit = get_side_exit(jit, ocb, ctx); // Check for interrupts - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t) - add_comment(cb, "stack overflow check"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize), - ); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + asm.comment("stack overflow check"); + let stack_limit = asm.lea(ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize)); + asm.cmp(CFP, stack_limit); + asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); // Store incremented PC into current control frame in case callee raises. - jit_save_pc(jit, cb, REG0); + jit_save_pc(jit, asm); if let Some(block_iseq) = block { // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize)); - let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE); - mov(cb, block_code_opnd, REG0); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); } // Increment the stack pointer by 3 (in the callee) // sp += 3 - lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); + let sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * 3)); // Write method entry at sp[-3] // sp[-3] = me; // Put compile time cme into REG1. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + asm.mov(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -3), Opnd::UImm(cme as u64)); // Write block handler at sp[-2] // sp[-2] = block_handler; if let Some(_block_iseq) = block { // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF); - lea(cb, REG1, cfp_self); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + let block_handler = asm.or(cfp_self, Opnd::Imm(1)); + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), block_handler); } else { - let dst_opnd = mem_opnd(64, REG0, 8 * -2); - mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); + let dst_opnd = Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2); + asm.store(dst_opnd, Opnd::UImm(VM_BLOCK_HANDLER_NONE.into())); } // Write env flags at sp[-1] @@ -4025,11 +3997,12 @@ fn gen_send_cfunc( if !kw_arg.is_null() { frame_type |= VM_FRAME_FLAG_CFRAME_KW } - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), Opnd::UImm(frame_type.into())); // Allocate a new CFP (ec->cfp--) - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let new_cfp = asm.sub(ec_cfp_opnd, Opnd::UImm(RUBY_SIZEOF_CONTROL_FRAME as u64)); + asm.mov(ec_cfp_opnd, new_cfp); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { @@ -4043,22 +4016,15 @@ fn gen_send_cfunc( // }; // Can we re-use ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - mov(cb, REG1, ec_cfp_opnd); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0)); - - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0)); - mov( - cb, - mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0); - mov(cb, REG0, recv); - mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0); + let ec_cfp_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_PC), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_SP), sp); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_ISEQ), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_BLOCK_CODE), Opnd::Imm(0)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_BP), sp); + let ep = asm.sub(sp, Opnd::UImm(SIZEOF_VALUE as u64)); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_EP), ep); + asm.mov(Opnd::mem(64, ec_cfp_opnd, RUBY_OFFSET_CFP_SELF), recv); /* // Verify that we are calling the right function @@ -4074,71 +4040,69 @@ fn gen_send_cfunc( if !kw_arg.is_null() { // Build a hash from all kwargs passed - jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize)); - lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0)); - call_ptr(cb, REG0, build_kwhash as *const u8); + asm.comment("build_kwhash"); + let imemo_ci = VALUE(ci as usize); + assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, + "we assume all callinfos with kwargs are on the GC heap"); + let sp = asm.lea(ctx.sp_opnd(0)); + let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); // Replace the stack location at the start of kwargs with the new hash let stack_opnd = ctx.stack_opnd(argc - passed_argc); - mov(cb, stack_opnd, RAX); + asm.mov(stack_opnd, kwargs); } - // Copy SP into RAX because REG_SP will get overwritten - lea(cb, RAX, ctx.sp_opnd(0)); + // Copy SP because REG_SP will get overwritten + let sp = asm.lea(ctx.sp_opnd(0)); // Pop the C function arguments from the stack (in the caller) ctx.stack_pop((argc + 1).try_into().unwrap()); // Write interpreter SP into CFP. // Needed in case the callee yields to the block. - gen_save_sp(cb, ctx); + gen_save_sp(jit, asm, ctx); // Non-variadic method - if cfunc_argc >= 0 { + let args = if cfunc_argc >= 0 { // Copy the arguments from the stack to the C argument registers // self is the 0th argument and is at index argc from the stack top - for i in 0..=passed_argc as usize { - let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32); - let c_arg_reg = C_ARG_REGS[i]; - mov(cb, c_arg_reg, stack_opnd); - } + (0..=passed_argc).map(|i| + Opnd::mem(64, sp, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32) + ).collect() } - // Variadic method - if cfunc_argc == -1 { + else if cfunc_argc == -1 { // The method gets a pointer to the first argument // rb_f_puts(int argc, VALUE *argv, VALUE recv) - mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into())); - lea( - cb, - C_ARG_REGS[1], - mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32), - ); - mov( - cb, - C_ARG_REGS[2], - mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32), - ); - } + vec![ + Opnd::Imm(passed_argc.into()), + asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)), + Opnd::mem(64, sp, -(argc + 1) * SIZEOF_VALUE_I32), + ] + } + else { + panic!("unexpected cfunc_args: {}", cfunc_argc) + }; // Call the C function // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); // cfunc comes from compile-time cme->def, which we assume to be stable. // Invalidation logic is in yjit_method_lookup_change() - add_comment(cb, "call C function"); - call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) }); + asm.comment("call C function"); + let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args); // Record code position for TracePoint patching. See full_cfunc_return(). - record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos()); + record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); // Push the return value on the Ruby stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, ret); // Pop the stack frame (ec->cfp++) // Can we reuse ec_cfp_opnd from above? - let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP); - add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + let new_cfp = asm.add(ec_cfp_opnd, Opnd::UImm(RUBY_SIZEOF_CONTROL_FRAME as u64)); + asm.store(ec_cfp_opnd, new_cfp); // cfunc calls may corrupt types ctx.clear_local_types(); @@ -4148,12 +4112,12 @@ fn gen_send_cfunc( // Jump (fall through) to the call continuation block // We do this to end the current block after the call - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_return_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, @@ -4161,8 +4125,7 @@ fn gen_return_branch( match shape { BranchShape::Next0 | BranchShape::Next1 => unreachable!(), BranchShape::Default => { - mov(cb, REG0, code_ptr_opnd(target0)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.raw_ptr())); } } } @@ -4170,7 +4133,7 @@ fn gen_return_branch( fn gen_send_iseq( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4190,7 +4153,7 @@ fn gen_send_iseq( if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 { // We can't handle tailcalls - gen_counter_incr!(cb, send_iseq_tailcall); + gen_counter_incr!(asm, send_iseq_tailcall); return CantCompile; } @@ -4201,7 +4164,7 @@ fn gen_send_iseq( || get_iseq_flags_has_post(iseq) || get_iseq_flags_has_kwrest(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4209,14 +4172,14 @@ fn gen_send_iseq( // positionals, then we need to allocate a hash. For now we're going to // call that too complex and bail. if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } // If we have a method accepting no kwargs (**nil), exit if we have passed // it any kwargs. if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4231,7 +4194,7 @@ fn gen_send_iseq( // In this case (param.flags.has_block && local_iseq != iseq), // the block argument is setup as a local variable and requires // materialization (allocation). Bail. - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } } @@ -4254,7 +4217,7 @@ fn gen_send_iseq( let opts_missing: i32 = opt_num - opts_filled; if opts_filled < 0 || opts_filled > opt_num { - gen_counter_incr!(cb, send_iseq_arity_error); + gen_counter_incr!(asm, send_iseq_arity_error); return CantCompile; } @@ -4262,7 +4225,7 @@ fn gen_send_iseq( // would need to move adjust the arguments location to account for that. // For now we aren't handling this case. if doing_kw_call && opts_missing > 0 { - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4290,7 +4253,7 @@ fn gen_send_iseq( // We have so many keywords that (1 << num) encoded as a FIXNUM // (which shifts it left one more) no longer fits inside a 32-bit // immediate. - gen_counter_incr!(cb, send_iseq_complex_callee); + gen_counter_incr!(asm, send_iseq_complex_callee); return CantCompile; } @@ -4328,7 +4291,7 @@ fn gen_send_iseq( // If the keyword was never found, then we know we have a // mismatch in the names of the keyword arguments, so we need to // bail. - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); + gen_counter_incr!(asm, send_iseq_kwargs_mismatch); return CantCompile; } Some((callee_idx, _)) if callee_idx < keyword_required_num => { @@ -4341,7 +4304,7 @@ fn gen_send_iseq( } assert!(required_kwargs_filled <= keyword_required_num); if required_kwargs_filled != keyword_required_num { - gen_counter_incr!(cb, send_iseq_kwargs_mismatch); + gen_counter_incr!(asm, send_iseq_kwargs_mismatch); return CantCompile; } } @@ -4353,7 +4316,7 @@ fn gen_send_iseq( let side_exit = get_side_exit(jit, ocb, ctx); // Check for interrupts - gen_check_ints(cb, side_exit); + gen_check_ints(asm, side_exit); let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) }; let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() { @@ -4363,26 +4326,23 @@ fn gen_send_iseq( }; if let (None, Some(builtin_info)) = (block, leaf_builtin) { let builtin_argc = unsafe { (*builtin_info).argc }; - if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) { - add_comment(cb, "inlined leaf builtin"); + if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_OPNDS.len() as i32) { + asm.comment("inlined leaf builtin"); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); + let mut args = vec![EC]; // Copy self and arguments for i in 0..=builtin_argc { let stack_opnd = ctx.stack_opnd(builtin_argc - i); - let idx: usize = (i + 1).try_into().unwrap(); - let c_arg_reg = C_ARG_REGS[idx]; - mov(cb, c_arg_reg, stack_opnd); + args.push(stack_opnd); } ctx.stack_pop((builtin_argc + 1).try_into().unwrap()); - let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 }; - call_ptr(cb, REG0, builtin_func_ptr); + let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Note: assuming that the leaf builtin doesn't change local variables here. // Seems like a safe assumption. @@ -4394,13 +4354,13 @@ fn gen_send_iseq( // Stack overflow check // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2. // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) - add_comment(cb, "stack overflow check"); + asm.comment("stack overflow check"); let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap(); let locals_offs = (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32); - lea(cb, REG0, ctx.sp_opnd(locals_offs as isize)); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow)); + let stack_limit = asm.lea(ctx.sp_opnd(locals_offs as isize)); + asm.cmp(CFP, stack_limit); + asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into()); if doing_kw_call { // Here we're calling a method with keyword arguments and specifying @@ -4425,7 +4385,7 @@ fn gen_send_iseq( // keyword parameters. let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; - add_comment(cb, "keyword args"); + asm.comment("keyword args"); // This is the list of keyword arguments that the callee specified // in its initial declaration. @@ -4482,8 +4442,7 @@ fn gen_send_iseq( default_value = Qnil; } - jit_mov_gc_ptr(jit, cb, REG0, default_value); - mov(cb, default_arg, REG0); + asm.mov(default_arg, default_value.into()); caller_kwargs[kwarg_idx] = callee_kwarg; kwarg_idx += 1; @@ -4521,7 +4480,7 @@ fn gen_send_iseq( let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw) .try_into() .unwrap(); - stack_swap(ctx, cb, offset0, offset1, REG1, REG0); + stack_swap(jit, ctx, asm, offset0, offset1); // Next we're going to do some bookkeeping on our end so // that we know the order that the arguments are @@ -4536,80 +4495,70 @@ fn gen_send_iseq( // Keyword arguments cause a special extra local variable to be // pushed onto the stack that represents the parameters that weren't // explicitly given a value and have a non-constant default. - let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64()); - mov(cb, ctx.stack_opnd(-1), unspec_opnd); + let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); + asm.mov(ctx.stack_opnd(-1), unspec_opnd.into()); } // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); // Store the updated SP on the current frame (pop arguments and receiver) - add_comment(cb, "store caller sp"); - lea( - cb, - REG0, - ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)), - ); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); + asm.comment("store caller sp"); + let caller_sp = asm.lea(ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1))); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); // Store the next PC in the current frame - jit_save_pc(jit, cb, REG0); + jit_save_pc(jit, asm); if let Some(block_val) = block { // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases // with cfp->block_code. - let gc_ptr = VALUE(block_val as usize); - jit_mov_gc_ptr(jit, cb, REG0, gc_ptr); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE(block_val as usize).into()); } // Adjust the callee's stack pointer let offs = (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 }); - lea(cb, REG0, ctx.sp_opnd(offs)); + let callee_sp = asm.lea(ctx.sp_opnd(offs)); // Initialize local variables to Qnil for i in 0..num_locals { let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3); - mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into())); + asm.store(Opnd::mem(64, callee_sp, offs), Qnil.into()); } - add_comment(cb, "push env"); - // Put compile time cme into REG1. It's assumed to be valid because we are notified when + // Write the callee CME on the stack. It's assumed to be valid because we are notified when // any cme we depend on become outdated. See yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize)); // Write method entry at sp[-3] // sp[-3] = me; - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + asm.comment("push cme, block handler, frame type"); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -3), VALUE(cme as usize).into()); // Write block handler at sp[-2] // sp[-2] = block_handler; match block { Some(_) => { // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF)); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + let block_handler = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + let block_handler = asm.or(block_handler, 1.into()); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), block_handler); } None => { - mov( - cb, - mem_opnd(64, REG0, 8 * -2), - uimm_opnd(VM_BLOCK_HANDLER_NONE.into()), - ); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -2), VM_BLOCK_HANDLER_NONE.into()); } } // Write env flags at sp[-1] // sp[-1] = frame_type; let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; - mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into())); + asm.store(Opnd::mem(64, callee_sp, SIZEOF_VALUE_I32 * -1), frame_type.into()); - add_comment(cb, "push callee CFP"); + asm.comment("push callee control frame"); // Allocate a new CFP (ec->cfp--) - sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + let new_cfp = asm.sub(CFP, (RUBY_SIZEOF_CONTROL_FRAME as u64).into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Setup the new frame // *cfp = (const struct rb_control_frame_struct) { @@ -4621,20 +4570,14 @@ fn gen_send_iseq( // .block_code = 0, // .__bp__ = sp, // }; - mov(cb, REG1, recv); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1); - mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0); - sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0); - jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize)); - mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0); - mov( - cb, - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), - imm_opnd(0), - ); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), recv); + asm.mov(SP, callee_sp); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), callee_sp); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BP), callee_sp); + let callee_ep = asm.sub(callee_sp, (SIZEOF_VALUE as u64).into()); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP), callee_ep); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), VALUE(iseq as usize).into()); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). @@ -4676,7 +4619,7 @@ fn gen_send_iseq( gen_branch( jit, ctx, - cb, + asm, ocb, return_block, &return_ctx, @@ -4696,7 +4639,7 @@ fn gen_send_iseq( iseq: iseq, idx: start_pc_offset, }, - cb, + asm, ); EndBlock @@ -4705,7 +4648,7 @@ fn gen_send_iseq( fn gen_struct_aref( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4739,32 +4682,28 @@ fn gen_struct_aref( // true of the converse. let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; - add_comment(cb, "struct aref"); - - let recv = ctx.stack_pop(1); + asm.comment("struct aref"); - mov(cb, REG0, recv); + let recv = asm.load(ctx.stack_pop(1)); - if embedded != VALUE(0) { - let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off)); - mov(cb, REG0, ary_elt); + let val = if embedded != VALUE(0) { + Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + ((SIZEOF_VALUE as i32) * off)) } else { - let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR); - mov(cb, REG0, rstruct_ptr); - mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off)); - } + let rstruct_ptr = asm.load(Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR)); + Opnd::mem(64, rstruct_ptr, (SIZEOF_VALUE as i32) * off) + }; let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, REG0); + asm.mov(ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_struct_aset( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -4783,27 +4722,24 @@ fn gen_struct_aset( assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); - add_comment(cb, "struct aset"); + asm.comment("struct aset"); let val = ctx.stack_pop(1); let recv = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], recv); - mov(cb, C_ARG_REGS[1], imm_opnd(off as i64)); - mov(cb, C_ARG_REGS[2], val); - call_ptr(cb, REG0, RSTRUCT_SET as *const u8); + let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); let ret = ctx.stack_push(Type::Unknown); - mov(cb, ret, RAX); + asm.mov(ret, val); - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } fn gen_send_general( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, cd: *const rb_call_data, block: Option, @@ -4819,30 +4755,30 @@ fn gen_send_general( // see vm_call_method(). let ci = unsafe { get_call_data_ci(cd) }; // info about the call site - let argc = unsafe { vm_ci_argc(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let mid = unsafe { vm_ci_mid(ci) }; let flags = unsafe { vm_ci_flag(ci) }; // Don't JIT calls with keyword splat if flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); + gen_counter_incr!(asm, send_kw_splat); return CantCompile; } // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. if flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); + gen_counter_incr!(asm, send_args_splat); return CantCompile; } if flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); + gen_counter_incr!(asm, send_block_arg); return CantCompile; } // Defer compilation so we can specialize on class of receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -4855,13 +4791,13 @@ fn gen_send_general( // Points to the receiver operand on the stack let recv = ctx.stack_opnd(argc); let recv_opnd = StackOpnd(argc.try_into().unwrap()); - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_recv_klass, + recv, recv_opnd, comptime_recv, SEND_MAX_DEPTH, @@ -4892,7 +4828,7 @@ fn gen_send_general( if flags & VM_CALL_FCALL == 0 { // otherwise we need an ancestry check to ensure the receiver is vaild to be called // as protected - jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit); + jit_protected_callee_ancestry_guard(jit, asm, ocb, cme, side_exit); } } _ => { @@ -4909,13 +4845,13 @@ fn gen_send_general( let def_type = unsafe { get_cme_def_type(cme) }; match def_type { VM_METHOD_TYPE_ISEQ => { - return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc); + return gen_send_iseq(jit, ctx, asm, ocb, ci, cme, block, argc); } VM_METHOD_TYPE_CFUNC => { return gen_send_cfunc( jit, ctx, - cb, + asm, ocb, ci, cme, @@ -4927,7 +4863,7 @@ fn gen_send_general( VM_METHOD_TYPE_IVAR => { if argc != 0 { // Argument count mismatch. Getters take no arguments. - gen_counter_incr!(cb, send_getter_arity); + gen_counter_incr!(asm, send_getter_arity); return CantCompile; } @@ -4941,49 +4877,49 @@ fn gen_send_general( // attr_accessor is invalidated and we exit at the closest // instruction boundary which is always outside of the body of // the attr_accessor code. - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } - mov(cb, REG0, recv); let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; return gen_get_ivar( jit, ctx, - cb, + asm, ocb, SEND_MAX_DEPTH, comptime_recv, ivar_name, + recv, recv_opnd, side_exit, ); } VM_METHOD_TYPE_ATTRSET => { if flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_attrset_kwargs); + gen_counter_incr!(asm, send_attrset_kwargs); return CantCompile; } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } { - gen_counter_incr!(cb, send_ivar_set_method); + gen_counter_incr!(asm, send_ivar_set_method); return CantCompile; } else if c_method_tracing_currently_enabled(jit) { // Can't generate code for firing c_call and c_return events // See :attr-tracing: - gen_counter_incr!(cb, send_cfunc_tracing); + gen_counter_incr!(asm, send_cfunc_tracing); return CantCompile; } else { let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; - return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name); + return gen_set_ivar(jit, ctx, asm, comptime_recv, ivar_name); } } // Block method, e.g. define_method(:foo) { :my_block } VM_METHOD_TYPE_BMETHOD => { - gen_counter_incr!(cb, send_bmethod); + gen_counter_incr!(asm, send_bmethod); return CantCompile; } VM_METHOD_TYPE_ZSUPER => { - gen_counter_incr!(cb, send_zsuper_method); + gen_counter_incr!(asm, send_zsuper_method); return CantCompile; } VM_METHOD_TYPE_ALIAS => { @@ -4992,11 +4928,11 @@ fn gen_send_general( continue; } VM_METHOD_TYPE_UNDEF => { - gen_counter_incr!(cb, send_undef_method); + gen_counter_incr!(asm, send_undef_method); return CantCompile; } VM_METHOD_TYPE_NOTIMPLEMENTED => { - gen_counter_incr!(cb, send_not_implemented_method); + gen_counter_incr!(asm, send_not_implemented_method); return CantCompile; } // Send family of methods, e.g. call/apply @@ -5004,22 +4940,22 @@ fn gen_send_general( let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; match opt_type { OPTIMIZED_METHOD_TYPE_SEND => { - gen_counter_incr!(cb, send_optimized_method_send); + gen_counter_incr!(asm, send_optimized_method_send); return CantCompile; } OPTIMIZED_METHOD_TYPE_CALL => { - gen_counter_incr!(cb, send_optimized_method_call); + gen_counter_incr!(asm, send_optimized_method_call); return CantCompile; } OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { - gen_counter_incr!(cb, send_optimized_method_block_call); + gen_counter_incr!(asm, send_optimized_method_block_call); return CantCompile; } OPTIMIZED_METHOD_TYPE_STRUCT_AREF => { return gen_struct_aref( jit, ctx, - cb, + asm, ocb, ci, cme, @@ -5031,7 +4967,7 @@ fn gen_send_general( return gen_struct_aset( jit, ctx, - cb, + asm, ocb, ci, cme, @@ -5045,11 +4981,11 @@ fn gen_send_general( } } VM_METHOD_TYPE_MISSING => { - gen_counter_incr!(cb, send_missing_method); + gen_counter_incr!(asm, send_missing_method); return CantCompile; } VM_METHOD_TYPE_REFINED => { - gen_counter_incr!(cb, send_refined_method); + gen_counter_incr!(asm, send_refined_method); return CantCompile; } _ => { @@ -5062,29 +4998,29 @@ fn gen_send_general( fn gen_opt_send_without_block( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, cb, ocb, cd, None) + gen_send_general(jit, ctx, asm, ocb, cd, None) } fn gen_send( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd = jit_get_arg(jit, 0).as_ptr(); let block = jit_get_arg(jit, 1).as_optional_ptr(); - return gen_send_general(jit, ctx, cb, ocb, cd, block); + return gen_send_general(jit, ctx, asm, ocb, cd, block); } fn gen_invokesuper( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr(); @@ -5092,7 +5028,7 @@ fn gen_invokesuper( // Defer compilation so we can specialize on class of receiver if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5123,26 +5059,26 @@ fn gen_invokesuper( unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; let ci = unsafe { get_call_data_ci(cd) }; - let argc = unsafe { vm_ci_argc(ci) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let ci_flags = unsafe { vm_ci_flag(ci) }; // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. if ci_flags & VM_CALL_ARGS_SPLAT != 0 { - gen_counter_incr!(cb, send_args_splat); + gen_counter_incr!(asm, send_args_splat); return CantCompile; } if ci_flags & VM_CALL_KWARG != 0 { - gen_counter_incr!(cb, send_keywords); + gen_counter_incr!(asm, send_keywords); return CantCompile; } if ci_flags & VM_CALL_KW_SPLAT != 0 { - gen_counter_incr!(cb, send_kw_splat); + gen_counter_incr!(asm, send_kw_splat); return CantCompile; } if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { - gen_counter_incr!(cb, send_block_arg); + gen_counter_incr!(asm, send_block_arg); return CantCompile; } @@ -5182,16 +5118,15 @@ fn gen_invokesuper( return CantCompile; } - add_comment(cb, "guard known me"); - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - let ep_me_opnd = mem_opnd( + asm.comment("guard known me"); + let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + let ep_me_opnd = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32), ); - jit_mov_gc_ptr(jit, cb, REG1, me_as_value); - cmp(cb, ep_me_opnd, REG1); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed)); + asm.cmp(ep_me_opnd, me_as_value.into()); + asm.jne(counted_exit!(ocb, side_exit, invokesuper_me_changed).into()); if block.is_none() { // Guard no block passed @@ -5200,21 +5135,18 @@ fn gen_invokesuper( // // TODO: this could properly forward the current block handler, but // would require changes to gen_send_* - add_comment(cb, "guard no block given"); + asm.comment("guard no block given"); // EP is in REG0 from above - let ep_specval_opnd = mem_opnd( + let ep_opnd = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + let ep_specval_opnd = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), ); - cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into())); - jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block)); + asm.cmp(ep_specval_opnd, VM_BLOCK_HANDLER_NONE.into()); + asm.jne(counted_exit!(ocb, side_exit, invokesuper_block).into()); } - // Points to the receiver operand on the stack - let recv = ctx.stack_opnd(argc); - mov(cb, REG0, recv); - // We need to assume that both our current method entry and the super // method entry we invoke remain stable assume_method_lookup_stable(jit, ocb, current_defined_class, me); @@ -5224,9 +5156,9 @@ fn gen_invokesuper( ctx.clear_local_types(); match cme_def_type { - VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc), + VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, asm, ocb, ci, cme, block, argc), VM_METHOD_TYPE_CFUNC => { - gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null()) + gen_send_cfunc(jit, ctx, asm, ocb, ci, cme, block, argc, ptr::null()) } _ => unreachable!(), } @@ -5235,7 +5167,7 @@ fn gen_invokesuper( fn gen_leave( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // Only the return value should be on the stack @@ -5243,32 +5175,34 @@ fn gen_leave( // Create a side-exit to fall back to the interpreter let side_exit = get_side_exit(jit, ocb, ctx); - - // Load environment pointer EP from CFP - mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); + let mut ocb_asm = Assembler::new(); // Check for interrupts - add_comment(cb, "check for interrupts"); - gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt)); - - // Load the return value - mov(cb, REG0, ctx.stack_pop(1)); + gen_check_ints(asm, counted_exit!(ocb, side_exit, leave_se_interrupt)); + ocb_asm.compile(ocb.unwrap()); // Pop the current frame (ec->cfp++) // Note: the return PC is already in the previous CFP - add_comment(cb, "pop stack frame"); - add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64)); - mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP); + asm.comment("pop stack frame"); + let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, incr_cfp); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), incr_cfp); + + // Load the return value + let retval_opnd = ctx.stack_pop(1); + + // Move the return value into the C return register for gen_leave_exit() + asm.mov(C_RET_OPND, retval_opnd); // Reload REG_SP for the caller and write the return value. // Top of the stack is REG_SP[0] since the caller has sp_offset=1. - mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP)); - mov(cb, mem_opnd(64, REG_SP, 0), REG0); + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + asm.mov(Opnd::mem(64, SP, 0), C_RET_OPND); // Jump to the JIT return address on the frame that was just popped let offset_to_jit_return = -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32); - jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return)); + asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return)); EndBlock } @@ -5276,20 +5210,21 @@ fn gen_leave( fn gen_getglobal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let gid = jit_get_arg(jit, 0); // Save the PC and SP because we might make a Ruby call for warning - jit_prepare_routine_call(jit, ctx, cb, REG0); - - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); + jit_prepare_routine_call(jit, ctx, asm); - call_ptr(cb, REG0, rb_gvar_get as *const u8); + let val_opnd = asm.ccall( + rb_gvar_get as *const u8, + vec![ gid.into() ] + ); let top = ctx.stack_push(Type::Unknown); - mov(cb, top, RAX); + asm.mov(top, val_opnd); KeepCompiling } @@ -5297,22 +5232,22 @@ fn gen_getglobal( fn gen_setglobal( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let gid = jit_get_arg(jit, 0); // Save the PC and SP because we might make a Ruby call for // Kernel#set_trace_var - jit_prepare_routine_call(jit, ctx, cb, REG0); - - mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64())); - - let val = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[1], val); - - call_ptr(cb, REG0, rb_gvar_set as *const u8); + jit_prepare_routine_call(jit, ctx, asm); + + asm.ccall( + rb_gvar_set as *const u8, + vec![ + gid.into(), + ctx.stack_pop(1), + ], + ); KeepCompiling } @@ -5320,23 +5255,20 @@ fn gen_setglobal( fn gen_anytostring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Save the PC and SP since we might call #to_s - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let str = ctx.stack_pop(1); let val = ctx.stack_pop(1); - mov(cb, C_ARG_REGS[0], str); - mov(cb, C_ARG_REGS[1], val); - - call_ptr(cb, REG0, rb_obj_as_string_result as *const u8); + let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); // Push the return value let stack_ret = ctx.stack_push(Type::TString); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } @@ -5344,11 +5276,11 @@ fn gen_anytostring( fn gen_objtostring( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5358,13 +5290,13 @@ fn gen_objtostring( if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } { let side_exit = get_side_exit(jit, ocb, ctx); - mov(cb, REG0, recv); jit_guard_known_klass( jit, ctx, - cb, + asm, ocb, comptime_recv.class_of(), + recv, StackOpnd(0), comptime_recv, SEND_MAX_DEPTH, @@ -5374,28 +5306,25 @@ fn gen_objtostring( KeepCompiling } else { let cd = jit_get_arg(jit, 0).as_ptr(); - gen_send_general(jit, ctx, cb, ocb, cd, None) + gen_send_general(jit, ctx, asm, ocb, cd, None) } } fn gen_intern( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); let str = ctx.stack_pop(1); - - mov(cb, C_ARG_REGS[0], str); - - call_ptr(cb, REG0, rb_str_intern as *const u8); + let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, sym); KeepCompiling } @@ -5403,7 +5332,7 @@ fn gen_intern( fn gen_toregexp( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let opt = jit_get_arg(jit, 0).as_i64(); @@ -5411,34 +5340,43 @@ fn gen_toregexp( // Save the PC and SP because this allocates an object and could // raise an exception. - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); - let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize))); + let values_ptr = asm.lea(ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)))); ctx.stack_pop(cnt); - mov(cb, C_ARG_REGS[0], imm_opnd(0)); - mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap())); - lea(cb, C_ARG_REGS[2], values_ptr); - call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8); + let ary = asm.ccall( + rb_ary_tmp_new_from_values as *const u8, + vec![ + Opnd::Imm(0), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + values_ptr, + ] + ); // Save the array so we can clear it later - push(cb, RAX); - push(cb, RAX); // Alignment - mov(cb, C_ARG_REGS[0], RAX); - mov(cb, C_ARG_REGS[1], imm_opnd(opt)); - call_ptr(cb, REG0, rb_reg_new_ary as *const u8); + asm.cpush(ary); + asm.cpush(ary); // Alignment + + let val = asm.ccall( + rb_reg_new_ary as *const u8, + vec![ + ary, + Opnd::Imm(opt), + ] + ); // The actual regex is in RAX now. Pop the temp array from // rb_ary_tmp_new_from_values into C arg regs so we can clear it - pop(cb, REG1); // Alignment - pop(cb, C_ARG_REGS[0]); + let ary = asm.cpop(); // Alignment + asm.cpop_into(ary); // The value we want to push on the stack is in RAX right now let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); // Clear the temp array. - call_ptr(cb, REG0, rb_ary_clear as *const u8); + asm.ccall(rb_ary_clear as *const u8, vec![ary]); KeepCompiling } @@ -5446,7 +5384,7 @@ fn gen_toregexp( fn gen_getspecial( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // This takes two arguments, key and type @@ -5462,60 +5400,59 @@ fn gen_getspecial( // Fetch a "special" backref based on a char encoded by shifting by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); - mov(cb, C_ARG_REGS[0], RAX); + asm.comment("rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); let rt_u8: u8 = (rtype >> 1).try_into().unwrap(); - match rt_u8.into() { + let val = match rt_u8.into() { '&' => { - add_comment(cb, "rb_reg_last_match"); - call_ptr(cb, REG0, rb_reg_last_match as *const u8); + asm.comment("rb_reg_last_match"); + asm.ccall(rb_reg_last_match as *const u8, vec![backref]) } '`' => { - add_comment(cb, "rb_reg_match_pre"); - call_ptr(cb, REG0, rb_reg_match_pre as *const u8); + asm.comment("rb_reg_match_pre"); + asm.ccall(rb_reg_match_pre as *const u8, vec![backref]) } '\'' => { - add_comment(cb, "rb_reg_match_post"); - call_ptr(cb, REG0, rb_reg_match_post as *const u8); + asm.comment("rb_reg_match_post"); + asm.ccall(rb_reg_match_post as *const u8, vec![backref]) } '+' => { - add_comment(cb, "rb_reg_match_last"); - call_ptr(cb, REG0, rb_reg_match_last as *const u8); + asm.comment("rb_reg_match_last"); + asm.ccall(rb_reg_match_last as *const u8, vec![backref]) } _ => panic!("invalid back-ref"), - } + }; let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } else { // Fetch the N-th match from the last backref based on type shifted by 1 // Can raise if matchdata uninitialized - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // call rb_backref_get() - add_comment(cb, "rb_backref_get"); - call_ptr(cb, REG0, rb_backref_get as *const u8); + asm.comment("rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); // rb_reg_nth_match((int)(type >> 1), backref); - add_comment(cb, "rb_reg_nth_match"); - mov( - cb, - C_ARG_REGS[0], - imm_opnd((rtype >> 1).try_into().unwrap()), + asm.comment("rb_reg_nth_match"); + let val = asm.ccall( + rb_reg_nth_match as *const u8, + vec![ + Opnd::Imm((rtype >> 1).try_into().unwrap()), + backref, + ] ); - mov(cb, C_ARG_REGS[1], RAX); - call_ptr(cb, REG0, rb_reg_nth_match as *const u8); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } @@ -5524,22 +5461,24 @@ fn gen_getspecial( fn gen_getclassvariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // rb_vm_getclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); - - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64())); - - call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8); + jit_prepare_routine_call(jit, ctx, asm); + + let val_opnd = asm.ccall( + rb_vm_getclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit_get_arg(jit, 0).as_u64()), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + ], + ); - let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, stack_top, RAX); + let top = ctx.stack_push(Type::Unknown); + asm.mov(top, val_opnd); KeepCompiling } @@ -5547,20 +5486,22 @@ fn gen_getclassvariable( fn gen_setclassvariable( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { // rb_vm_setclassvariable can raise exceptions. - jit_prepare_routine_call(jit, ctx, cb, REG0); - - let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ); - mov(cb, C_ARG_REGS[0], cfp_iseq_opnd); - mov(cb, C_ARG_REGS[1], REG_CFP); - mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64())); - mov(cb, C_ARG_REGS[3], ctx.stack_pop(1)); - mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64())); - - call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8); + jit_prepare_routine_call(jit, ctx, asm); + + asm.ccall( + rb_vm_setclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit_get_arg(jit, 0).as_u64()), + ctx.stack_pop(1), + Opnd::UImm(jit_get_arg(jit, 1).as_u64()), + ], + ); KeepCompiling } @@ -5568,7 +5509,7 @@ fn gen_setclassvariable( fn gen_opt_getinlinecache( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { let jump_offset = jit_get_arg(jit, 0); @@ -5591,21 +5532,36 @@ fn gen_opt_getinlinecache( // Cache is keyed on a certain lexical scope. Use the interpreter's cache. let side_exit = get_side_exit(jit, ocb, ctx); + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + // Call function to verify the cache. It doesn't allocate or call methods. - mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8)); - mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8); + let ret_val = asm.ccall( + rb_vm_ic_hit_p as *const u8, + vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] + ); + + // Check the result. SysV only specifies one byte for _Bool return values, + // so it's important we only check one bit to ignore the higher bits in the register. + asm.test(ret_val, 1.into()); + asm.jz(counted_exit!(ocb, side_exit, opt_getinlinecache_miss).into()); - // Check the result. _Bool is one byte in SysV. - test(cb, AL, AL); - jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss)); + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + + let ic_entry = asm.load(Opnd::mem( + 64, + inline_cache, + RUBY_OFFSET_IC_ENTRY + )); + + let ic_entry_val = asm.load(Opnd::mem( + 64, + ic_entry, + RUBY_OFFSET_ICE_VALUE + )); // Push ic->entry->value - mov(cb, REG0, const_ptr_opnd(ic as *mut u8)); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY)); let stack_top = ctx.stack_push(Type::Unknown); - mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE)); - mov(cb, stack_top, REG0); + asm.store(stack_top, ic_entry_val); } else { // Optimize for single ractor mode. // FIXME: This leaks when st_insert raises NoMemoryError @@ -5617,7 +5573,7 @@ fn gen_opt_getinlinecache( // constants referenced within the current block. assume_stable_constant_names(jit, ocb); - jit_putobject(jit, ctx, cb, unsafe { (*ice).value }); + jit_putobject(jit, ctx, asm, unsafe { (*ice).value }); } // Jump over the code for filling the cache @@ -5629,7 +5585,7 @@ fn gen_opt_getinlinecache( iseq: jit.iseq, idx: jump_idx, }, - cb, + asm, ); EndBlock } @@ -5640,11 +5596,11 @@ fn gen_opt_getinlinecache( fn gen_getblockparamproxy( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { if !jit_at_current_insn(jit) { - defer_compilation(jit, ctx, cb, ocb); + defer_compilation(jit, ctx, asm, ocb); return EndBlock; } @@ -5667,79 +5623,64 @@ fn gen_getblockparamproxy( } // Load environment pointer EP from CFP - gen_get_ep(cb, REG0, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( + let flag_check = Opnd::mem( 64, - REG0, + ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), ); - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); - jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified)); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.jnz(counted_exit!(ocb, side_exit, gbpp_block_param_modified).into()); // Load the block handler for the current frame // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - REG0, - mem_opnd( - 64, - REG0, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32)) ); // Specialize compilation for the case where no block handler is present if comptime_handler.as_u64() == 0 { // Bail if there is a block handler - cmp(cb, REG0, uimm_opnd(0)); + asm.cmp(block_handler, Opnd::UImm(0)); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, SEND_MAX_DEPTH, side_exit, ); - jit_putobject(jit, ctx, cb, Qnil); + jit_putobject(jit, ctx, asm, Qnil); } else { // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P(). - and(cb, REG0_8, imm_opnd(0x3)); + let block_handler = asm.and(block_handler, 0x3.into()); // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null. - cmp(cb, REG0_8, imm_opnd(0x1)); + asm.cmp(block_handler, 0x1.into()); jit_chain_guard( JCC_JNZ, jit, &starting_context, - cb, + asm, ocb, SEND_MAX_DEPTH, side_exit, ); // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. - mov( - cb, - REG0, - const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()), - ); assert!(!unsafe { rb_block_param_proxy }.special_const_p()); let top = ctx.stack_push(Type::Unknown); - mov(cb, top, REG0); + asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); } - jump_to_next_insn(jit, ctx, cb, ocb); + jump_to_next_insn(jit, ctx, asm, ocb); EndBlock } @@ -5747,91 +5688,82 @@ fn gen_getblockparamproxy( fn gen_getblockparam( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) -> CodegenStatus { // EP level let level = jit_get_arg(jit, 1).as_u32(); // Save the PC and SP because we might allocate - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // A mirror of the interpreter code. Checking for the case // where it's pushing rb_block_param_proxy. let side_exit = get_side_exit(jit, ocb, ctx); // Load environment pointer EP from CFP - gen_get_ep(cb, REG1, level); + let ep_opnd = gen_get_ep(asm, level); // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero - let flag_check = mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32), - ); + let flag_check = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32)); // FIXME: This is testing bits in the same place that the WB check is testing. // We should combine these at some point - test( - cb, - flag_check, - uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), - ); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); // If the frame flag has been modified, then the actual proc value is // already in the EP and we should just use the value. - let frame_flag_modified = cb.new_label("frame_flag_modified".to_string()); - jnz_label(cb, frame_flag_modified); + let frame_flag_modified = asm.new_label("frame_flag_modified"); + asm.jnz(frame_flag_modified); // This instruction writes the block handler to the EP. If we need to // fire a write barrier for the write, then exit (we'll let the // interpreter handle it so it can fire the write barrier). // flags & VM_ENV_FLAG_WB_REQUIRED - let flags_opnd = mem_opnd( + let flags_opnd = Opnd::mem( 64, - REG1, + ep_opnd, SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); - test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED.into())); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - jnz_ptr(cb, side_exit); - - // Load the block handler for the current frame - // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) - mov( - cb, - C_ARG_REGS[1], - mem_opnd( - 64, - REG1, - (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), - ), - ); + asm.jnz(side_exit.into()); // Convert the block handler in to a proc // call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler) - mov(cb, C_ARG_REGS[0], REG_EC); - call_ptr(cb, REG0, rb_vm_bh_to_procval as *const u8); + let proc = asm.ccall( + rb_vm_bh_to_procval as *const u8, + vec![ + EC, + // The block handler for the current frame + // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) + Opnd::mem( + 64, + ep_opnd, + (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32), + ), + ] + ); // Load environment pointer EP from CFP (again) - gen_get_ep(cb, REG1, level); - - // Set the frame modified flag - or(cb, flag_check, uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into())); + let ep_opnd = gen_get_ep(asm, level); // Write the value at the environment pointer let idx = jit_get_arg(jit, 0).as_i32(); let offs = -(SIZEOF_VALUE as i32 * idx); - mov(cb, mem_opnd(64, REG1, offs), RAX); + asm.mov(Opnd::mem(64, ep_opnd, offs), proc); - cb.write_label(frame_flag_modified); + // Set the frame modified flag + let flag_check = Opnd::mem(64, ep_opnd, (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32)); + let modified_flag = asm.or(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.store(flag_check, modified_flag); + + asm.write_label(frame_flag_modified); // Push the proc on the stack let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, RAX, mem_opnd(64, REG1, offs)); - mov(cb, stack_ret, RAX); - - cb.link_labels(); + let ep_opnd = gen_get_ep(asm, level); + asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); KeepCompiling } @@ -5839,41 +5771,35 @@ fn gen_getblockparam( fn gen_invokebuiltin( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc"); // ec, self, and arguments - if bf_argc + 2 > C_ARG_REGS.len() { + if bf_argc + 2 > C_ARG_OPNDS.len() { return CantCompile; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); + jit_prepare_routine_call(jit, ctx, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals for i in 0..bf_argc { let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32); - let c_arg_reg = C_ARG_REGS[2 + i]; - mov(cb, c_arg_reg, stack_opnd); + args.push(stack_opnd); } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value ctx.stack_pop(bf_argc); let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } @@ -5884,7 +5810,7 @@ fn gen_invokebuiltin( fn gen_opt_invokebuiltin_delegate( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, _ocb: &mut OutlinedCb, ) -> CodegenStatus { let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr(); @@ -5892,40 +5818,33 @@ fn gen_opt_invokebuiltin_delegate( let start_index = jit_get_arg(jit, 1).as_i32(); // ec, self, and arguments - if bf_argc + 2 > (C_ARG_REGS.len() as i32) { + if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) { return CantCompile; } // If the calls don't allocate, do they need up to date PC, SP? - jit_prepare_routine_call(jit, ctx, cb, REG0); - - if bf_argc > 0 { - // Load environment pointer EP from CFP - mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP)); - } + jit_prepare_routine_call(jit, ctx, asm); // Call the builtin func (ec, recv, arg1, arg2, ...) - mov(cb, C_ARG_REGS[0], REG_EC); - mov( - cb, - C_ARG_REGS[1], - mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), - ); + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; // Copy arguments from locals - for i in 0..bf_argc { - let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; - let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; - let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32)); - let offs: usize = (i + 2) as usize; - let c_arg_reg = C_ARG_REGS[offs]; - mov(cb, c_arg_reg, local_opnd); + if bf_argc > 0 { + // Load environment pointer EP from CFP + let ep = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + + for i in 0..bf_argc { + let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; + let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; + let local_opnd = Opnd::mem(64, ep, offs * (SIZEOF_VALUE as i32)); + args.push(local_opnd); + } } - call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8); + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); // Push the return value let stack_ret = ctx.stack_push(Type::Unknown); - mov(cb, stack_ret, RAX); + asm.mov(stack_ret, val); KeepCompiling } @@ -5951,6 +5870,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_setn => Some(gen_setn), YARVINSN_topn => Some(gen_topn), YARVINSN_adjuststack => Some(gen_adjuststack), + YARVINSN_getlocal => Some(gen_getlocal), YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), @@ -5974,6 +5894,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), YARVINSN_splatarray => Some(gen_splatarray), + YARVINSN_concatarray => Some(gen_concatarray), YARVINSN_newrange => Some(gen_newrange), YARVINSN_putstring => Some(gen_putstring), YARVINSN_expandarray => Some(gen_expandarray), @@ -6035,7 +5956,7 @@ fn get_gen_fn(opcode: VALUE) -> Option { type MethodGenFn = fn( jit: &mut JITState, ctx: &mut Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ci: *const rb_callinfo, cme: *const rb_callable_method_entry_t, @@ -6072,7 +5993,7 @@ pub struct CodegenGlobals { inline_frozen_bytes: usize, // Methods for generating code for hardcoded (usually C) methods - method_codegen_table: HashMap, + method_codegen_table: HashMap, } /// For implementing global code invalidation. A position in the inline @@ -6138,8 +6059,8 @@ impl CodegenGlobals { half_size ); - let cb = CodeBlock::new(first_half); - let ocb = OutlinedCb::wrap(CodeBlock::new(second_half)); + let cb = CodeBlock::new(first_half, false); + let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true)); (cb, ocb) }; @@ -6165,7 +6086,7 @@ impl CodegenGlobals { let mut codegen_globals = CodegenGlobals { inline_cb: cb, outlined_cb: ocb, - leave_exit_code: leave_exit_code, + leave_exit_code, stub_exit_code: stub_exit_code, outline_full_cfunc_return_pos: cfunc_exit_code, global_inval_patches: Vec::new(), @@ -6291,7 +6212,7 @@ impl CodegenGlobals { CodegenGlobals::get_instance().outline_full_cfunc_return_pos } - pub fn look_up_codegen_method(method_serial: u64) -> Option { + pub fn look_up_codegen_method(method_serial: usize) -> Option { let table = &CodegenGlobals::get_instance().method_codegen_table; let option_ref = table.get(&method_serial); @@ -6306,7 +6227,7 @@ impl CodegenGlobals { mod tests { use super::*; - fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) { + fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) { let blockid = BlockId { iseq: ptr::null(), idx: 0, @@ -6316,6 +6237,7 @@ mod tests { return ( JITState::new(&block), Context::new(), + Assembler::new(), CodeBlock::new_dummy(256 * 1024), OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), ); @@ -6330,29 +6252,31 @@ mod tests { #[test] fn test_gen_exit() { - let (_, ctx, mut cb, _) = setup_codegen(); - gen_exit(0 as *mut VALUE, &ctx, &mut cb); + let (_, ctx, mut asm, mut cb, _) = setup_codegen(); + gen_exit(0 as *mut VALUE, &ctx, &mut asm); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_get_side_exit() { - let (mut jit, ctx, _, mut ocb) = setup_codegen(); - get_side_exit(&mut jit, &mut ocb, &ctx); + let (mut jit, ctx, _, _, mut ocb) = setup_codegen(); + get_side_exit(&mut jit, &mut ocb, &ctx); assert!(ocb.unwrap().get_write_pos() > 0); } #[test] fn test_gen_check_ints() { - let (_, _ctx, mut cb, mut ocb) = setup_codegen(); + let (_, _ctx, mut asm, mut cb, mut ocb) = setup_codegen(); let side_exit = ocb.unwrap().get_write_ptr(); - gen_check_ints(&mut cb, side_exit); + gen_check_ints(&mut asm, side_exit); } #[test] fn test_gen_nop() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_nop(&mut jit, &mut context, &mut asm, &mut ocb); + asm.compile(&mut cb); assert_eq!(status, KeepCompiling); assert_eq!(context.diff(&Context::new()), 0); @@ -6361,9 +6285,9 @@ mod tests { #[test] fn test_gen_pop() { - let (mut jit, _, mut cb, mut ocb) = setup_codegen(); + let (mut jit, _, mut asm, mut cb, mut ocb) = setup_codegen(); let mut context = Context::new_with_stack_size(1); - let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_pop(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); assert_eq!(context.diff(&Context::new()), 0); @@ -6371,9 +6295,9 @@ mod tests { #[test] fn test_gen_dup() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); - let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_dup(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6381,12 +6305,13 @@ mod tests { assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0))); assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_dupn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); @@ -6394,7 +6319,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_dupn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6403,16 +6328,18 @@ mod tests { assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + // TODO: this is writing zero bytes on x86. Why? + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_swap() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); - let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_swap(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1)); @@ -6424,58 +6351,61 @@ mod tests { #[test] fn test_putnil() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putnil(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::Nil); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_putobject_qtrue() { // Test gen_putobject with Qtrue - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); let mut value_array: [u64; 2] = [0, Qtrue.into()]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::True); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_putobject_fixnum() { // Test gen_putobject with a Fixnum to test another conditional branch - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); // The Fixnum 7 is encoded as 7 * 2 + 1, or 15 let mut value_array: [u64; 2] = [0, 15]; let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); assert_eq!(status, KeepCompiling); assert_eq!(tmp_type_top, Type::Fixnum); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_int2fix() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); jit.opcode = YARVINSN_putobject_INT2FIX_0_.as_usize(); - let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_putobject_int2fix(&mut jit, &mut context, &mut asm, &mut ocb); let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0)); @@ -6486,16 +6416,17 @@ mod tests { #[test] fn test_putself() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); - let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putself(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_setn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Fixnum); context.stack_push(Type::Flonum); context.stack_push(Type::CString); @@ -6504,7 +6435,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_setn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6512,12 +6443,13 @@ mod tests { assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); } #[test] fn test_gen_topn() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Flonum); context.stack_push(Type::CString); @@ -6525,7 +6457,7 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_topn(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); @@ -6533,12 +6465,13 @@ mod tests { assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(1))); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() > 0); // Write some movs } #[test] fn test_gen_adjuststack() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); context.stack_push(Type::Flonum); context.stack_push(Type::CString); context.stack_push(Type::Fixnum); @@ -6547,20 +6480,21 @@ mod tests { let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; jit.pc = pc; - let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb); + let status = gen_adjuststack(&mut jit, &mut context, &mut asm, &mut ocb); assert_eq!(status, KeepCompiling); assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0))); + asm.compile(&mut cb); assert!(cb.get_write_pos() == 0); // No instructions written } #[test] fn test_gen_leave() { - let (mut jit, mut context, mut cb, mut ocb) = setup_codegen(); + let (mut jit, mut context, mut asm, mut cb, mut ocb) = setup_codegen(); // Push return value context.stack_push(Type::Fixnum); - gen_leave(&mut jit, &mut context, &mut cb, &mut ocb); + gen_leave(&mut jit, &mut context, &mut asm, &mut ocb); } } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 8242c9477ea946..fa82dcc30892b0 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1,11 +1,14 @@ -use crate::asm::x86_64::*; +//use crate::asm::x86_64::*; use crate::asm::*; +use crate::backend::ir::*; use crate::codegen::*; use crate::virtualmem::CodePtr; use crate::cruby::*; use crate::options::*; use crate::stats::*; use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; use core::ffi::c_void; use std::cell::*; use std::hash::{Hash, Hasher}; @@ -66,7 +69,7 @@ impl Type { } else if val.flonum_p() { Type::Flonum } else { - unreachable!() + unreachable!("Illegal value: {:?}", val) } } else { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. @@ -126,6 +129,60 @@ impl Type { } } + /// Returns an Option with the T_ value type if it is known, otherwise None + pub fn known_value_type(&self) -> Option { + match self { + Type::Nil => Some(RUBY_T_NIL), + Type::True => Some(RUBY_T_TRUE), + Type::False => Some(RUBY_T_FALSE), + Type::Fixnum => Some(RUBY_T_FIXNUM), + Type::Flonum => Some(RUBY_T_FLOAT), + Type::Array => Some(RUBY_T_ARRAY), + Type::Hash => Some(RUBY_T_HASH), + Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL), + Type::TString | Type::CString => Some(RUBY_T_STRING), + Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None + } + } + + /// Returns an Option with the class if it is known, otherwise None + pub fn known_class(&self) -> Option { + unsafe { + match self { + Type::Nil => Some(rb_cNilClass), + Type::True => Some(rb_cTrueClass), + Type::False => Some(rb_cFalseClass), + Type::Fixnum => Some(rb_cInteger), + Type::Flonum => Some(rb_cFloat), + Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol), + Type::CString => Some(rb_cString), + _ => None, + } + } + } + + /// Returns an Option with the exact value if it is known, otherwise None + #[allow(unused)] // not yet used + pub fn known_exact_value(&self) -> Option { + match self { + Type::Nil => Some(Qnil), + Type::True => Some(Qtrue), + Type::False => Some(Qfalse), + _ => None, + } + } + + /// Returns an Option with the exact value if it is known, otherwise None + pub fn known_truthy(&self) -> Option { + match self { + Type::Nil => Some(false), + Type::False => Some(false), + Type::UnknownHeap => Some(true), + Type::Unknown | Type::UnknownImm => None, + _ => Some(true) + } + } + /// Compute a difference between two value types /// Returns 0 if the two are the same /// Returns > 0 if different but compatible @@ -198,7 +255,7 @@ pub enum InsnOpnd { /// Code generation context /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. -#[derive(Copy, Clone, Default, Debug)] +#[derive(Copy, Clone, Default, PartialEq, Debug)] pub struct Context { // Number of values currently on the temporary stack stack_size: u16, @@ -244,7 +301,7 @@ pub enum BranchShape { // Branch code generation function signature type BranchGenFn = - fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option, shape: BranchShape) -> (); + fn(cb: &mut Assembler, target0: CodePtr, target1: Option, shape: BranchShape) -> (); /// Store info about an outgoing branch in a code segment /// Note: care must be taken to minimize the size of branch objects @@ -334,7 +391,7 @@ pub struct Block { // FIXME: should these be code pointers instead? // Offsets for GC managed objects in the mainline code block - gc_object_offsets: Vec, + gc_obj_offsets: Vec, // CME dependencies of this block, to help to remove all pointers to this // block in the system. @@ -526,7 +583,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { } // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; @@ -584,7 +641,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { } // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); let value_ptr: *const u8 = value_code_ptr.raw_ptr(); @@ -752,7 +809,7 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { } // Run write barriers for all objects in generated code. - for offset in &block.gc_object_offsets { + for offset in &block.gc_obj_offsets { let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); @@ -788,7 +845,7 @@ impl Block { end_addr: None, incoming: Vec::new(), outgoing: Vec::new(), - gc_object_offsets: Vec::new(), + gc_obj_offsets: Vec::new(), cme_dependencies: Vec::new(), entry_exit: None, }; @@ -849,8 +906,9 @@ impl Block { self.end_idx = end_idx; } - pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) { - self.gc_object_offsets.push(ptr_offset); + pub fn add_gc_obj_offset(self: &mut Block, ptr_offset: u32) { + self.gc_obj_offsets.push(ptr_offset); + incr_counter!(num_gc_obj_refs); } /// Instantiate a new CmeDependency struct and add it to the list of @@ -914,15 +972,15 @@ impl Context { } /// Get an operand for the adjusted stack pointer address - pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd { + pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd { let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes; let offset = offset as i32; - return mem_opnd(64, REG_SP, offset); + return Opnd::mem(64, SP, offset); } /// Push one new value on the temp stack with an explicit mapping /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd { + pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return self.stack_push_mapping((mapping, Type::Unknown)); @@ -945,22 +1003,22 @@ impl Context { // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - return mem_opnd(64, REG_SP, offset); + return Opnd::mem(64, SP, offset); } /// Push one new value on the temp stack /// Return a pointer to the new stack top - pub fn stack_push(&mut self, val_type: Type) -> X86Opnd { + pub fn stack_push(&mut self, val_type: Type) -> Opnd { return self.stack_push_mapping((MapToStack, val_type)); } /// Push the self value on the stack - pub fn stack_push_self(&mut self) -> X86Opnd { + pub fn stack_push_self(&mut self) -> Opnd { return self.stack_push_mapping((MapToSelf, Type::Unknown)); } /// Push a local variable on the stack - pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd { + pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { if local_idx >= MAX_LOCAL_TYPES { return self.stack_push(Type::Unknown); } @@ -970,12 +1028,12 @@ impl Context { // Pop N values off the stack // Return a pointer to the stack top before the pop operation - pub fn stack_pop(&mut self, n: usize) -> X86Opnd { + pub fn stack_pop(&mut self, n: usize) -> Opnd { assert!(n <= self.stack_size.into()); // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - let top = mem_opnd(64, REG_SP, offset); + let top = Opnd::mem(64, SP, offset); // Clear the types of the popped values for i in 0..n { @@ -994,10 +1052,10 @@ impl Context { } /// Get an operand pointing to a slot on the temp stack - pub fn stack_opnd(&self, idx: i32) -> X86Opnd { + pub fn stack_opnd(&self, idx: i32) -> Opnd { // SP points just above the topmost value let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32); - let opnd = mem_opnd(64, REG_SP, offset); + let opnd = Opnd::mem(64, SP, offset); return opnd; } @@ -1362,9 +1420,6 @@ fn gen_block_series_body( .incoming .push(last_branchref.clone()); - // This block should immediately follow the last branch - assert!(new_blockref.borrow().start_addr == last_branch.end_addr); - // Track the block batch.push(new_blockref.clone()); @@ -1372,6 +1427,20 @@ fn gen_block_series_body( last_blockref = new_blockref; } + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq); + if iseq_location.contains(substr) { + let last_block = last_blockref.borrow(); + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx); + println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx)); + } + } + } + Some(first_block) } @@ -1439,12 +1508,19 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { // Rewrite the branch assert!(branch.dst_addrs[0].is_some()); cb.set_write_ptr(branch.start_addr.unwrap()); + + let mut asm = Assembler::new(); + asm.comment("regenerate_branch"); + (branch.gen_fn)( - cb, + &mut asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], branch.shape, ); + + asm.compile(cb); + branch.end_addr = Some(cb.get_write_ptr()); // The block may have shrunk after the branch is rewritten @@ -1470,7 +1546,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { } /// Create a new outgoing branch entry for a block -fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef { +fn make_branch_entry(block: &BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef { let branch = Branch { // Block this is attached to block: block.clone(), @@ -1506,19 +1582,25 @@ fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> /// Generated code calls this function with the SysV calling convention. /// See [get_branch_target]. -extern "sysv64" fn branch_stub_hit( - branch_ptr: *const c_void, - target_idx: u32, - ec: EcPtr, -) -> *const u8 { - with_vm_lock(src_loc!(), || { - branch_stub_hit_body(branch_ptr, target_idx, ec) - }) +c_callable! { + fn branch_stub_hit( + branch_ptr: *const c_void, + target_idx: u32, + ec: EcPtr, + ) -> *const u8 { + with_vm_lock(src_loc!(), || { + branch_stub_hit_body(branch_ptr, target_idx, ec) + }) + } } /// Called by the generated code when a branch stub is executed /// Triggers compilation of branches and code patching fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 { + if get_option!(dump_insns) { + println!("branch_stub_hit"); + } + assert!(!branch_ptr.is_null()); //branch_ptr is actually: @@ -1696,15 +1778,23 @@ fn get_branch_target( // This means the branch stub owns its own reference to the branch let branch_ptr: *const RefCell = BranchRef::into_raw(branchref.clone()); - // Call branch_stub_hit(branch_idx, target_idx, ec) - mov(ocb, C_ARG_REGS[2], REG_EC); - mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64)); - mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8)); - call_ptr(ocb, REG0, branch_stub_hit as *mut u8); + let mut asm = Assembler::new(); + + // Call branch_stub_hit(branch_ptr, target_idx, ec) + let jump_addr = asm.ccall( + branch_stub_hit as *mut u8, + vec![ + Opnd::const_ptr(branch_ptr as *const u8), + Opnd::UImm(target_idx as u64), + EC, + ] + ); // Jump to the address returned by the // branch_stub_hit call - jmp_rm(ocb, RAX); + asm.jmp_opnd(jump_addr); + + asm.compile(ocb); if ocb.has_dropped_bytes() { None // No space @@ -1713,10 +1803,39 @@ fn get_branch_target( } } +impl Assembler +{ + // Mark the start position of a patchable branch in the machine code + fn mark_branch_start(&mut self, branchref: &BranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr| { + let mut branch = branchref.borrow_mut(); + branch.start_addr = Some(code_ptr); + }); + } + + // Mark the end position of a patchable branch in the machine code + fn mark_branch_end(&mut self, branchref: &BranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr| { + let mut branch = branchref.borrow_mut(); + branch.end_addr = Some(code_ptr); + }); + } +} + pub fn gen_branch( jit: &JITState, src_ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, target0: BlockId, ctx0: &Context, @@ -1724,7 +1843,7 @@ pub fn gen_branch( ctx1: Option<&Context>, gen_fn: BranchGenFn, ) { - let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn); + let branchref = make_branch_entry(&jit.get_block(), src_ctx, gen_fn); // Get the branch targets or stubs let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb); @@ -1750,12 +1869,13 @@ pub fn gen_branch( }; // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - regenerate_branch(cb, &mut branch); + asm.mark_branch_start(&branchref); + gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default); + asm.mark_branch_end(&branchref); } fn gen_jump_branch( - cb: &mut CodeBlock, + asm: &mut Assembler, target0: CodePtr, _target1: Option, shape: BranchShape, @@ -1765,12 +1885,12 @@ fn gen_jump_branch( } if shape == BranchShape::Default { - jmp_ptr(cb, target0); + asm.jmp(target0.into()); } } -pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) { - let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch); +pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { + let branchref = make_branch_entry(&jit.get_block(), ctx, gen_jump_branch); let mut branch = branchref.borrow_mut(); branch.targets[0] = Some(target0); @@ -1789,16 +1909,18 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut branch.shape = BranchShape::Default; // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - branch.end_addr = Some(cb.get_write_ptr()); + asm.mark_branch_start(&branchref); + gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + asm.mark_branch_end(&branchref); } else { // This None target address signals gen_block_series() to compile the // target block right after this one (fallthrough). branch.dst_addrs[0] = None; branch.shape = BranchShape::Next0; - branch.start_addr = Some(cb.get_write_ptr()); - branch.end_addr = Some(cb.get_write_ptr()); + + // The branch is effectively empty (a noop) + asm.mark_branch_start(&branchref); + asm.mark_branch_end(&branchref); } } @@ -1806,7 +1928,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut pub fn defer_compilation( jit: &JITState, cur_ctx: &Context, - cb: &mut CodeBlock, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) { if cur_ctx.chain_depth != 0 { @@ -1821,7 +1943,7 @@ pub fn defer_compilation( next_ctx.chain_depth += 1; let block_rc = jit.get_block(); - let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch); + let branch_rc = make_branch_entry(&jit.get_block(), cur_ctx, gen_jump_branch); let mut branch = branch_rc.borrow_mut(); let block = block_rc.borrow(); @@ -1834,9 +1956,9 @@ pub fn defer_compilation( branch.dst_addrs[0] = get_branch_target(blockid, &next_ctx, &branch_rc, 0, ocb); // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - branch.end_addr = Some(cb.get_write_ptr()); + asm.mark_branch_start(&branch_rc); + gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + asm.mark_branch_end(&branch_rc); } // Remove all references to a block then free it. @@ -1897,11 +2019,22 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // machine code that some other thread is running. let block = blockref.borrow(); - let cb = CodegenGlobals::get_inline_cb(); + let mut cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); verify_blockid(block.blockid); + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(block.blockid.iseq); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx); + } + } + } + // Remove this block from the version array remove_block_version(blockref); @@ -1934,12 +2067,19 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region // Patch in a jump to block.entry_exit. + let cur_pos = cb.get_write_ptr(); cb.set_write_ptr(block_start); - jmp_ptr(cb, block_entry_exit); + + let mut asm = Assembler::new(); + asm.jmp(block_entry_exit.into()); + asm.compile(&mut cb); + assert!( - cb.get_write_ptr() < block_end, - "invalidation wrote past end of block" + cb.get_write_ptr() <= block_end, + "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + block.code_size(), + cb.get_write_ptr().into_i64() - block_start.into_i64(), ); cb.set_write_ptr(cur_pos); } @@ -1993,17 +2133,14 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Rewrite the branch with the new jump target address + let branch_end_addr = branch.end_addr; regenerate_branch(cb, &mut branch); if target_next && branch.end_addr > block.end_addr { - dbg!( - branch.block.borrow().blockid.idx, - block.blockid.idx, - branch.end_addr, - block.end_addr, - block.code_size() - ); - panic!("yjit invalidate rewrote branch past end of invalidated block"); + panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); + } + if !target_next && branch.end_addr > branch_end_addr { + panic!("invalidated branch grew in size: {:?}", branch); } } @@ -2016,13 +2153,12 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // change this in the future when we support optional parameters because // they enter the function with a non-zero PC if block.blockid.idx == 0 { + // TODO: + // We could reset the exec counter to zero in rb_iseq_reset_jit_func() + // so that we eventually compile a new entry point when useful unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) }; } - // TODO: - // May want to recompile a new entry point (for interpreter entry blocks) - // This isn't necessary for correctness - // FIXME: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 919557817218a1..2f823e1b615354 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -106,162 +106,13 @@ pub use autogened::*; // TODO: For #defines that affect memory layout, we need to check for them // on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. -// TODO: -// Temporary, these external bindings will likely be auto-generated -// and textually included in this file +// These are functions we expose from vm_insnhelper.c, not in any header. +// Parsing it would result in a lot of duplicate definitions. +// Use bindgen for functions that are defined in headers or in yjit.c. #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { - #[link_name = "rb_insn_name"] - pub fn raw_insn_name(insn: VALUE) -> *const c_char; - - #[link_name = "rb_insn_len"] - pub fn raw_insn_len(v: VALUE) -> c_int; - - #[link_name = "rb_yarv_class_of"] - pub fn CLASS_OF(v: VALUE) -> VALUE; - - #[link_name = "rb_get_ec_cfp"] - pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr; - - #[link_name = "rb_get_cfp_pc"] - pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_sp"] - pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_self"] - pub fn get_cfp_self(cfp: CfpPtr) -> VALUE; - - #[link_name = "rb_get_cfp_ep"] - pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE; - - #[link_name = "rb_get_cfp_ep_level"] - pub fn get_cfp_ep_level(cfp: CfpPtr, lv: u32) -> *const VALUE; - - #[link_name = "rb_get_cme_def_type"] - pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; - - #[link_name = "rb_get_cme_def_body_attr_id"] - pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; - - #[link_name = "rb_get_cme_def_body_optimized_type"] - pub fn get_cme_def_body_optimized_type( - cme: *const rb_callable_method_entry_t, - ) -> method_optimized_type; - - #[link_name = "rb_get_cme_def_body_optimized_index"] - pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint; - - #[link_name = "rb_get_cme_def_body_cfunc"] - pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t) - -> *mut rb_method_cfunc_t; - - #[link_name = "rb_get_def_method_serial"] - /// While this returns a uintptr_t in C, we always use it as a Rust u64 - pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64; - - #[link_name = "rb_get_def_original_id"] - pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID; - - #[link_name = "rb_get_mct_argc"] - pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int; - - #[link_name = "rb_get_mct_func"] - pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8; - - #[link_name = "rb_get_def_iseq_ptr"] - pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr; - - #[link_name = "rb_iseq_encoded_size"] - pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_local_iseq"] - pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr; - - #[link_name = "rb_get_iseq_body_iseq_encoded"] - pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE; - - #[link_name = "rb_get_iseq_body_stack_max"] - pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_flags_has_opt"] - pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kw"] - pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_rest"] - pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_post"] - pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_kwrest"] - pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_block"] - pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"] - pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool; - - #[link_name = "rb_get_iseq_body_local_table_size"] - pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_keyword"] - pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct; - - #[link_name = "rb_get_iseq_body_param_size"] - pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint; - - #[link_name = "rb_get_iseq_body_param_lead_num"] - pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_num"] - pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int; - - #[link_name = "rb_get_iseq_body_param_opt_table"] - pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE; - - #[link_name = "rb_get_cikw_keyword_len"] - pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int; - - #[link_name = "rb_get_cikw_keywords_idx"] - pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE; - - #[link_name = "rb_get_call_data_ci"] - pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; - - #[link_name = "rb_yarv_str_eql_internal"] - pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; - - #[link_name = "rb_yarv_ary_entry_internal"] - pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE; - - #[link_name = "rb_yarv_fix_mod_fix"] - pub fn rb_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; - - #[link_name = "rb_FL_TEST"] - pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_FL_TEST_RAW"] - pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; - - #[link_name = "rb_RB_TYPE_P"] - pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; - - #[link_name = "rb_BASIC_OP_UNREDEFINED_P"] - pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool; - - #[link_name = "rb_RSTRUCT_LEN"] - pub fn RSTRUCT_LEN(st: VALUE) -> c_long; - - #[link_name = "rb_RSTRUCT_SET"] - pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE); - - // Ruby only defines these in vm_insnhelper.c, not in any header. - // Parsing it would result in a lot of duplicate definitions. pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; + pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_defined( ec: EcPtr, reg_cfp: CfpPtr, @@ -283,28 +134,65 @@ extern "C" { ic: ICVARC, ) -> VALUE; pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; - - #[link_name = "rb_vm_ci_argc"] - pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int; - - #[link_name = "rb_vm_ci_mid"] - pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID; - - #[link_name = "rb_vm_ci_flag"] - pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint; - - #[link_name = "rb_vm_ci_kwarg"] - pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; - - #[link_name = "rb_METHOD_ENTRY_VISI"] - pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; - pub fn rb_str_bytesize(str: VALUE) -> VALUE; - - #[link_name = "rb_RCLASS_ORIGIN"] - pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE; } +// Renames +pub use rb_insn_name as raw_insn_name; +pub use rb_insn_len as raw_insn_len; +pub use rb_yarv_class_of as CLASS_OF; +pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_pc as get_cfp_pc; +pub use rb_get_cfp_sp as get_cfp_sp; +pub use rb_get_cfp_self as get_cfp_self; +pub use rb_get_cfp_ep as get_cfp_ep; +pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_get_cme_def_type as get_cme_def_type; +pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; +pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; +pub use rb_get_cme_def_body_optimized_index as get_cme_def_body_optimized_index; +pub use rb_get_cme_def_body_cfunc as get_cme_def_body_cfunc; +pub use rb_get_def_method_serial as get_def_method_serial; +pub use rb_get_def_original_id as get_def_original_id; +pub use rb_get_mct_argc as get_mct_argc; +pub use rb_get_mct_func as get_mct_func; +pub use rb_get_def_iseq_ptr as get_def_iseq_ptr; +pub use rb_iseq_encoded_size as get_iseq_encoded_size; +pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; +pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; +pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; +pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; +pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; +pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; +pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; +pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; +pub use rb_get_iseq_flags_has_accepts_no_kwarg as get_iseq_flags_has_accepts_no_kwarg; +pub use rb_get_iseq_body_local_table_size as get_iseq_body_local_table_size; +pub use rb_get_iseq_body_param_keyword as get_iseq_body_param_keyword; +pub use rb_get_iseq_body_param_size as get_iseq_body_param_size; +pub use rb_get_iseq_body_param_lead_num as get_iseq_body_param_lead_num; +pub use rb_get_iseq_body_param_opt_num as get_iseq_body_param_opt_num; +pub use rb_get_iseq_body_param_opt_table as get_iseq_body_param_opt_table; +pub use rb_get_cikw_keyword_len as get_cikw_keyword_len; +pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; +pub use rb_get_call_data_ci as get_call_data_ci; +pub use rb_yarv_str_eql_internal as rb_str_eql_internal; +pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; +pub use rb_yarv_fix_mod_fix as rb_fix_mod_fix; +pub use rb_FL_TEST as FL_TEST; +pub use rb_FL_TEST_RAW as FL_TEST_RAW; +pub use rb_RB_TYPE_P as RB_TYPE_P; +pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; +pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; +pub use rb_RSTRUCT_SET as RSTRUCT_SET; +pub use rb_vm_ci_argc as vm_ci_argc; +pub use rb_vm_ci_mid as vm_ci_mid; +pub use rb_vm_ci_flag as vm_ci_flag; +pub use rb_vm_ci_kwarg as vm_ci_kwarg; +pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI; +pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; + /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { use std::ffi::CStr; @@ -596,14 +484,21 @@ impl From for i32 { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; assert!(uimm <= (i32::MAX as usize)); - uimm as i32 + uimm.try_into().unwrap() + } +} + +impl From for u16 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + uimm.try_into().unwrap() } } /// Produce a Ruby string from a Rust string slice #[cfg(feature = "asm_comments")] pub fn rust_str_to_ruby(str: &str) -> VALUE { - unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) } + unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } } /// Produce a Ruby symbol from a Rust string slice diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index fed132588cf769..591408e1da644e 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22; pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608; pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42; pub type ruby_encoding_consts = u32; +pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0; +pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1; +pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2; +pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3; +pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4; +pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5; +pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6; +pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7; +pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8; +pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9; +pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; +pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; +pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; +pub type ruby_preserved_encindex = u32; extern "C" { pub fn rb_obj_info_dump(obj: VALUE); } @@ -534,6 +548,20 @@ pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9; pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10; pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11; pub type rb_method_type_t = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_method_cfunc_struct { + pub func: ::std::option::Option VALUE>, + pub invoker: ::std::option::Option< + unsafe extern "C" fn( + recv: VALUE, + argc: ::std::os::raw::c_int, + argv: *const VALUE, + func: ::std::option::Option VALUE>, + ) -> VALUE, + >, + pub argc: ::std::os::raw::c_int, +} pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0; pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1; pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2; @@ -649,6 +677,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; +extern "C" { + pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; } @@ -779,171 +810,173 @@ pub const YARVINSN_pop: ruby_vminsn_type = 34; pub const YARVINSN_dup: ruby_vminsn_type = 35; pub const YARVINSN_dupn: ruby_vminsn_type = 36; pub const YARVINSN_swap: ruby_vminsn_type = 37; -pub const YARVINSN_topn: ruby_vminsn_type = 38; -pub const YARVINSN_setn: ruby_vminsn_type = 39; -pub const YARVINSN_adjuststack: ruby_vminsn_type = 40; -pub const YARVINSN_defined: ruby_vminsn_type = 41; -pub const YARVINSN_checkmatch: ruby_vminsn_type = 42; -pub const YARVINSN_checkkeyword: ruby_vminsn_type = 43; -pub const YARVINSN_checktype: ruby_vminsn_type = 44; -pub const YARVINSN_defineclass: ruby_vminsn_type = 45; -pub const YARVINSN_definemethod: ruby_vminsn_type = 46; -pub const YARVINSN_definesmethod: ruby_vminsn_type = 47; -pub const YARVINSN_send: ruby_vminsn_type = 48; -pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 49; -pub const YARVINSN_objtostring: ruby_vminsn_type = 50; -pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 51; -pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 52; -pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 53; -pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 54; -pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 55; -pub const YARVINSN_invokesuper: ruby_vminsn_type = 56; -pub const YARVINSN_invokeblock: ruby_vminsn_type = 57; -pub const YARVINSN_leave: ruby_vminsn_type = 58; -pub const YARVINSN_throw: ruby_vminsn_type = 59; -pub const YARVINSN_jump: ruby_vminsn_type = 60; -pub const YARVINSN_branchif: ruby_vminsn_type = 61; -pub const YARVINSN_branchunless: ruby_vminsn_type = 62; -pub const YARVINSN_branchnil: ruby_vminsn_type = 63; -pub const YARVINSN_opt_getinlinecache: ruby_vminsn_type = 64; -pub const YARVINSN_opt_setinlinecache: ruby_vminsn_type = 65; -pub const YARVINSN_once: ruby_vminsn_type = 66; -pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 67; -pub const YARVINSN_opt_plus: ruby_vminsn_type = 68; -pub const YARVINSN_opt_minus: ruby_vminsn_type = 69; -pub const YARVINSN_opt_mult: ruby_vminsn_type = 70; -pub const YARVINSN_opt_div: ruby_vminsn_type = 71; -pub const YARVINSN_opt_mod: ruby_vminsn_type = 72; -pub const YARVINSN_opt_eq: ruby_vminsn_type = 73; -pub const YARVINSN_opt_neq: ruby_vminsn_type = 74; -pub const YARVINSN_opt_lt: ruby_vminsn_type = 75; -pub const YARVINSN_opt_le: ruby_vminsn_type = 76; -pub const YARVINSN_opt_gt: ruby_vminsn_type = 77; -pub const YARVINSN_opt_ge: ruby_vminsn_type = 78; -pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 79; -pub const YARVINSN_opt_and: ruby_vminsn_type = 80; -pub const YARVINSN_opt_or: ruby_vminsn_type = 81; -pub const YARVINSN_opt_aref: ruby_vminsn_type = 82; -pub const YARVINSN_opt_aset: ruby_vminsn_type = 83; -pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 84; -pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 85; -pub const YARVINSN_opt_length: ruby_vminsn_type = 86; -pub const YARVINSN_opt_size: ruby_vminsn_type = 87; -pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 88; -pub const YARVINSN_opt_succ: ruby_vminsn_type = 89; -pub const YARVINSN_opt_not: ruby_vminsn_type = 90; -pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 91; -pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 92; -pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 93; -pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 94; -pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 95; -pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 96; -pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 97; -pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 98; -pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 99; -pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 100; -pub const YARVINSN_trace_nop: ruby_vminsn_type = 101; -pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 102; -pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 103; -pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 104; -pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 105; -pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 106; -pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 107; -pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 108; -pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 109; -pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 110; -pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 111; -pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 112; -pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 113; -pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 114; -pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 115; -pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 116; -pub const YARVINSN_trace_putnil: ruby_vminsn_type = 117; -pub const YARVINSN_trace_putself: ruby_vminsn_type = 118; -pub const YARVINSN_trace_putobject: ruby_vminsn_type = 119; -pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 120; -pub const YARVINSN_trace_putstring: ruby_vminsn_type = 121; -pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 122; -pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 123; -pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 124; -pub const YARVINSN_trace_intern: ruby_vminsn_type = 125; -pub const YARVINSN_trace_newarray: ruby_vminsn_type = 126; -pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 127; -pub const YARVINSN_trace_duparray: ruby_vminsn_type = 128; -pub const YARVINSN_trace_duphash: ruby_vminsn_type = 129; -pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 130; -pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 131; -pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 132; -pub const YARVINSN_trace_newhash: ruby_vminsn_type = 133; -pub const YARVINSN_trace_newrange: ruby_vminsn_type = 134; -pub const YARVINSN_trace_pop: ruby_vminsn_type = 135; -pub const YARVINSN_trace_dup: ruby_vminsn_type = 136; -pub const YARVINSN_trace_dupn: ruby_vminsn_type = 137; -pub const YARVINSN_trace_swap: ruby_vminsn_type = 138; -pub const YARVINSN_trace_topn: ruby_vminsn_type = 139; -pub const YARVINSN_trace_setn: ruby_vminsn_type = 140; -pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 141; -pub const YARVINSN_trace_defined: ruby_vminsn_type = 142; -pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 143; -pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 144; -pub const YARVINSN_trace_checktype: ruby_vminsn_type = 145; -pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 146; -pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 147; -pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 148; -pub const YARVINSN_trace_send: ruby_vminsn_type = 149; -pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 150; -pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 151; -pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 152; -pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 153; -pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 154; -pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 155; -pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 156; -pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 157; -pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 158; -pub const YARVINSN_trace_leave: ruby_vminsn_type = 159; -pub const YARVINSN_trace_throw: ruby_vminsn_type = 160; -pub const YARVINSN_trace_jump: ruby_vminsn_type = 161; -pub const YARVINSN_trace_branchif: ruby_vminsn_type = 162; -pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 163; -pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 164; -pub const YARVINSN_trace_opt_getinlinecache: ruby_vminsn_type = 165; -pub const YARVINSN_trace_opt_setinlinecache: ruby_vminsn_type = 166; -pub const YARVINSN_trace_once: ruby_vminsn_type = 167; -pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 168; -pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 169; -pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 170; -pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 171; -pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 172; -pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 173; -pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 174; -pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 175; -pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 176; -pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 177; -pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 178; -pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 179; -pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 180; -pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 181; -pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 182; -pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 183; -pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 184; -pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 185; -pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 186; -pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 187; -pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 188; -pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 189; -pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 190; -pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 191; -pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 192; -pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 193; -pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 194; -pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 195; -pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 196; -pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 197; -pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 198; -pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 199; -pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 200; -pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 201; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 202; +pub const YARVINSN_opt_reverse: ruby_vminsn_type = 38; +pub const YARVINSN_topn: ruby_vminsn_type = 39; +pub const YARVINSN_setn: ruby_vminsn_type = 40; +pub const YARVINSN_adjuststack: ruby_vminsn_type = 41; +pub const YARVINSN_defined: ruby_vminsn_type = 42; +pub const YARVINSN_checkmatch: ruby_vminsn_type = 43; +pub const YARVINSN_checkkeyword: ruby_vminsn_type = 44; +pub const YARVINSN_checktype: ruby_vminsn_type = 45; +pub const YARVINSN_defineclass: ruby_vminsn_type = 46; +pub const YARVINSN_definemethod: ruby_vminsn_type = 47; +pub const YARVINSN_definesmethod: ruby_vminsn_type = 48; +pub const YARVINSN_send: ruby_vminsn_type = 49; +pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 50; +pub const YARVINSN_objtostring: ruby_vminsn_type = 51; +pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 52; +pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 53; +pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 54; +pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 55; +pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 56; +pub const YARVINSN_invokesuper: ruby_vminsn_type = 57; +pub const YARVINSN_invokeblock: ruby_vminsn_type = 58; +pub const YARVINSN_leave: ruby_vminsn_type = 59; +pub const YARVINSN_throw: ruby_vminsn_type = 60; +pub const YARVINSN_jump: ruby_vminsn_type = 61; +pub const YARVINSN_branchif: ruby_vminsn_type = 62; +pub const YARVINSN_branchunless: ruby_vminsn_type = 63; +pub const YARVINSN_branchnil: ruby_vminsn_type = 64; +pub const YARVINSN_opt_getinlinecache: ruby_vminsn_type = 65; +pub const YARVINSN_opt_setinlinecache: ruby_vminsn_type = 66; +pub const YARVINSN_once: ruby_vminsn_type = 67; +pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 68; +pub const YARVINSN_opt_plus: ruby_vminsn_type = 69; +pub const YARVINSN_opt_minus: ruby_vminsn_type = 70; +pub const YARVINSN_opt_mult: ruby_vminsn_type = 71; +pub const YARVINSN_opt_div: ruby_vminsn_type = 72; +pub const YARVINSN_opt_mod: ruby_vminsn_type = 73; +pub const YARVINSN_opt_eq: ruby_vminsn_type = 74; +pub const YARVINSN_opt_neq: ruby_vminsn_type = 75; +pub const YARVINSN_opt_lt: ruby_vminsn_type = 76; +pub const YARVINSN_opt_le: ruby_vminsn_type = 77; +pub const YARVINSN_opt_gt: ruby_vminsn_type = 78; +pub const YARVINSN_opt_ge: ruby_vminsn_type = 79; +pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 80; +pub const YARVINSN_opt_and: ruby_vminsn_type = 81; +pub const YARVINSN_opt_or: ruby_vminsn_type = 82; +pub const YARVINSN_opt_aref: ruby_vminsn_type = 83; +pub const YARVINSN_opt_aset: ruby_vminsn_type = 84; +pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 85; +pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 86; +pub const YARVINSN_opt_length: ruby_vminsn_type = 87; +pub const YARVINSN_opt_size: ruby_vminsn_type = 88; +pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 89; +pub const YARVINSN_opt_succ: ruby_vminsn_type = 90; +pub const YARVINSN_opt_not: ruby_vminsn_type = 91; +pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 92; +pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 93; +pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 94; +pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 95; +pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 96; +pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 97; +pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 98; +pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 99; +pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 100; +pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 101; +pub const YARVINSN_trace_nop: ruby_vminsn_type = 102; +pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 103; +pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 104; +pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 105; +pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 106; +pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 107; +pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 108; +pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 109; +pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 110; +pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 111; +pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 112; +pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 113; +pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 114; +pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 115; +pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 116; +pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 117; +pub const YARVINSN_trace_putnil: ruby_vminsn_type = 118; +pub const YARVINSN_trace_putself: ruby_vminsn_type = 119; +pub const YARVINSN_trace_putobject: ruby_vminsn_type = 120; +pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 121; +pub const YARVINSN_trace_putstring: ruby_vminsn_type = 122; +pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 123; +pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 124; +pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 125; +pub const YARVINSN_trace_intern: ruby_vminsn_type = 126; +pub const YARVINSN_trace_newarray: ruby_vminsn_type = 127; +pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 128; +pub const YARVINSN_trace_duparray: ruby_vminsn_type = 129; +pub const YARVINSN_trace_duphash: ruby_vminsn_type = 130; +pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 131; +pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 132; +pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 133; +pub const YARVINSN_trace_newhash: ruby_vminsn_type = 134; +pub const YARVINSN_trace_newrange: ruby_vminsn_type = 135; +pub const YARVINSN_trace_pop: ruby_vminsn_type = 136; +pub const YARVINSN_trace_dup: ruby_vminsn_type = 137; +pub const YARVINSN_trace_dupn: ruby_vminsn_type = 138; +pub const YARVINSN_trace_swap: ruby_vminsn_type = 139; +pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 140; +pub const YARVINSN_trace_topn: ruby_vminsn_type = 141; +pub const YARVINSN_trace_setn: ruby_vminsn_type = 142; +pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 143; +pub const YARVINSN_trace_defined: ruby_vminsn_type = 144; +pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 145; +pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 146; +pub const YARVINSN_trace_checktype: ruby_vminsn_type = 147; +pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 148; +pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 149; +pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 150; +pub const YARVINSN_trace_send: ruby_vminsn_type = 151; +pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 152; +pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 153; +pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 154; +pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 155; +pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 156; +pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 157; +pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 158; +pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 159; +pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 160; +pub const YARVINSN_trace_leave: ruby_vminsn_type = 161; +pub const YARVINSN_trace_throw: ruby_vminsn_type = 162; +pub const YARVINSN_trace_jump: ruby_vminsn_type = 163; +pub const YARVINSN_trace_branchif: ruby_vminsn_type = 164; +pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 165; +pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 166; +pub const YARVINSN_trace_opt_getinlinecache: ruby_vminsn_type = 167; +pub const YARVINSN_trace_opt_setinlinecache: ruby_vminsn_type = 168; +pub const YARVINSN_trace_once: ruby_vminsn_type = 169; +pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 170; +pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 171; +pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 172; +pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 173; +pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 174; +pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 175; +pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 176; +pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 177; +pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 178; +pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 179; +pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 180; +pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 181; +pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 182; +pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 183; +pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 184; +pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 185; +pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 186; +pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 187; +pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 188; +pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 189; +pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 190; +pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 191; +pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 192; +pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 193; +pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 194; +pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 195; +pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 196; +pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 197; +pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 198; +pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 199; +pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 200; +pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 201; +pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 202; +pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 203; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 204; pub type ruby_vminsn_type = u32; extern "C" { pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; @@ -967,6 +1000,9 @@ extern "C" { extern "C" { pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; } +extern "C" { + pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_barrier(); } @@ -984,6 +1020,12 @@ extern "C" { extern "C" { pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); } +extern "C" { + pub fn rb_yjit_icache_invalidate( + start: *mut ::std::os::raw::c_void, + end: *mut ::std::os::raw::c_void, + ); +} extern "C" { pub fn rb_yjit_exit_locations_dict( yjit_raw_samples: *mut VALUE, @@ -1003,6 +1045,9 @@ extern "C" { extern "C" { pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE); } +extern "C" { + pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} extern "C" { pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; } @@ -1018,7 +1063,129 @@ extern "C" { extern "C" { pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; } +extern "C" { + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; +} +extern "C" { + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; +} pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +extern "C" { + pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; +} +extern "C" { + pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; +} +extern "C" { + pub fn rb_get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_cikw_keywords_idx( + cikw: *const rb_callinfo_kwarg, + idx: ::std::os::raw::c_int, + ) -> VALUE; +} +extern "C" { + pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; +} +extern "C" { + pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; +} +extern "C" { + pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; +} +extern "C" { + pub fn rb_get_cme_def_body_optimized_type( + cme: *const rb_callable_method_entry_t, + ) -> method_optimized_type; +} +extern "C" { + pub fn rb_get_cme_def_body_optimized_index( + cme: *const rb_callable_method_entry_t, + ) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_cme_def_body_cfunc( + cme: *const rb_callable_method_entry_t, + ) -> *mut rb_method_cfunc_t; +} +extern "C" { + pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize; +} +extern "C" { + pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; +} +extern "C" { + pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; +} +extern "C" { + pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; +} +extern "C" { + pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_flags_has_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool; +} +extern "C" { + pub fn rb_get_iseq_body_param_keyword( + iseq: *const rb_iseq_t, + ) -> *const rb_seq_param_keyword_struct; +} +extern "C" { + pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; +} extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; } @@ -1028,6 +1195,15 @@ extern "C" { extern "C" { pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; } +extern "C" { + pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; +} +extern "C" { + pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} extern "C" { pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); } @@ -1037,9 +1213,54 @@ extern "C" { extern "C" { pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t; } +extern "C" { + pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE; +} +extern "C" { + pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE; +} +extern "C" { + pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE; +} +extern "C" { + pub fn rb_yarv_class_of(obj: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; +} +extern "C" { + pub fn rb_yarv_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; +} extern "C" { pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); } +extern "C" { + pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; +} +extern "C" { + pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; +} +extern "C" { + pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; +} +extern "C" { + pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); +} +extern "C" { + pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; +} +extern "C" { + pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; +} +extern "C" { + pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; +} extern "C" { pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; } diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 2082648c4a32ca..3d1c5b33fd9093 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -1,6 +1,9 @@ use crate::core::*; use crate::cruby::*; use crate::yjit::yjit_enabled_p; +use crate::asm::CodeBlock; +use crate::codegen::CodePtr; +use std::fmt::Write; /// Primitive called in yjit.rb /// Produce a string representing the disassembly for an ISEQ @@ -26,14 +29,16 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU // Get the iseq pointer from the wrapper let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - let out_string = disasm_iseq(iseq); + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = disasm_iseq_insn_range(iseq, 0, 9999); return rust_str_to_ruby(&out_string); } } #[cfg(feature = "disasm")] -fn disasm_iseq(iseq: IseqPtr) -> String { +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { let mut out = String::from(""); // Get a list of block versions generated for this iseq @@ -65,15 +70,6 @@ fn disasm_iseq(iseq: IseqPtr) -> String { total_code_size += blockref.borrow().code_size(); } - // Initialize capstone - use capstone::prelude::*; - let cs = Capstone::new() - .x86() - .mode(arch::x86::ArchMode::Mode64) - .syntax(arch::x86::ArchSyntax::Intel) - .build() - .unwrap(); - out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len())); out.push_str(&format!( "TOTAL INLINE CODE SIZE: {} bytes\n", @@ -84,49 +80,82 @@ fn disasm_iseq(iseq: IseqPtr) -> String { for block_idx in 0..block_list.len() { let block = block_list[block_idx].borrow(); let blockid = block.get_blockid(); - let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap().raw_ptr(); - let end_addr = block.get_end_addr().unwrap().raw_ptr(); - let code_size = block.code_size(); - - // Write some info about the current block - let block_ident = format!( - "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", - block_idx + 1, - block_list.len(), - blockid.idx, - end_idx, - code_size - ); - out.push_str(&format!("== {:=<60}\n", block_ident)); - - // Disassemble the instructions - let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; - let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); - - // For each instruction in this block - for insn in insns.as_ref() { - // Comments for this block - if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) { - for comment in comment_list { - out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment)); + if blockid.idx >= start_idx && blockid.idx < end_idx { + let end_idx = block.get_end_idx(); + let start_addr = block.get_start_addr().unwrap().raw_ptr(); + let end_addr = block.get_end_addr().unwrap().raw_ptr(); + let code_size = block.code_size(); + + // Write some info about the current block + let block_ident = format!( + "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", + block_idx + 1, + block_list.len(), + blockid.idx, + end_idx, + code_size + ); + out.push_str(&format!("== {:=<60}\n", block_ident)); + + // Disassemble the instructions + out.push_str(&disasm_addr_range(global_cb, start_addr, code_size)); + + // If this is not the last block + if block_idx < block_list.len() - 1 { + // Compute the size of the gap between this block and the next + let next_block = block_list[block_idx + 1].borrow(); + let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); + let gap_size = (next_start_addr as usize) - (end_addr as usize); + + // Log the size of the gap between the blocks if nonzero + if gap_size > 0 { + out.push_str(&format!("... {} byte gap ...\n", gap_size)); } } - out.push_str(&format!(" {}\n", insn)); } + } + + return out; +} - // If this is not the last block - if block_idx < block_list.len() - 1 { - // Compute the size of the gap between this block and the next - let next_block = block_list[block_idx + 1].borrow(); - let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr(); - let gap_size = (next_start_addr as usize) - (end_addr as usize); - // Log the size of the gap between the blocks if nonzero - if gap_size > 0 { - out.push_str(&format!("... {} byte gap ...\n", gap_size)); +#[cfg(feature = "disasm")] +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: *const u8, code_size: usize) -> String { + let mut out = String::from(""); + + // Initialize capstone + use capstone::prelude::*; + + #[cfg(target_arch = "x86_64")] + let mut cs = Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .syntax(arch::x86::ArchSyntax::Intel) + .build() + .unwrap(); + + #[cfg(target_arch = "aarch64")] + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .detail(true) + .build() + .unwrap(); + cs.set_skipdata(true); + + // Disassemble the instructions + let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) }; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = cb.comments_at(insn.address() as usize) { + for comment in comment_list { + write!(&mut out, " \x1b[1m# {}\x1b[0m\n", comment).unwrap(); } } + write!(&mut out, " {}\n", insn).unwrap(); } return out; diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index 6329c70f873981..9cdef0d8bb95d2 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -528,8 +528,6 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { return; } - use crate::asm::x86_64::jmp_ptr; - // Stop other ractors since we are going to patch machine code. with_vm_lock(src_loc!(), || { // Make it so all live block versions are no longer valid branch targets @@ -561,13 +559,18 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { // Apply patches let old_pos = cb.get_write_pos(); - let patches = CodegenGlobals::take_global_inval_patches(); + let mut patches = CodegenGlobals::take_global_inval_patches(); + patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr()); + let mut last_patch_end = std::ptr::null(); for patch in &patches { - cb.set_write_ptr(patch.inline_patch_pos); - jmp_ptr(cb, patch.outlined_target_pos); + assert!(last_patch_end <= patch.inline_patch_pos.raw_ptr(), "patches should not overlap"); - // FIXME: Can't easily check we actually wrote out the JMP at the moment. - // assert!(!cb.has_dropped_bytes(), "patches should have space and jump offsets should fit in JMP rel32"); + let mut asm = crate::backend::ir::Assembler::new(); + asm.jmp(patch.outlined_target_pos.into()); + + cb.set_write_ptr(patch.inline_patch_pos); + asm.compile(cb); + last_patch_end = cb.get_write_ptr().raw_ptr(); } cb.set_pos(old_pos); diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 6772f551a87b99..9b19c7d6a0ad65 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -3,7 +3,12 @@ #![allow(clippy::too_many_arguments)] // :shrug: #![allow(clippy::identity_op)] // Sometimes we do it for style +// Temporary while switching to the new backend +#![allow(dead_code)] +#![allow(unused)] + mod asm; +mod backend; mod codegen; mod core; mod cruby; diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 704c709baeaad8..2e141445f13677 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -1,7 +1,7 @@ use std::ffi::CStr; // Command-line options -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Debug)] #[repr(C)] pub struct Options { // Size of the executable memory block to allocate in MiB @@ -30,6 +30,12 @@ pub struct Options { /// Dump compiled and executed instructions for debugging pub dump_insns: bool, + /// Dump all compiled instructions in inlined CodeBlock + pub dump_disasm: bool, + + /// Print when specific ISEQ items are compiled or invalidated + pub dump_iseq_disasm: Option, + /// Verify context objects (debug mode only) pub verify_ctx: bool, @@ -50,8 +56,10 @@ pub static mut OPTIONS: Options = Options { gen_stats: false, gen_trace_exits: false, dump_insns: false, + dump_disasm: false, verify_ctx: false, global_constant_state: false, + dump_iseq_disasm: None, }; /// Macro to get an option value by name @@ -64,6 +72,16 @@ macro_rules! get_option { } pub(crate) use get_option; +/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same. +macro_rules! get_option_ref { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($option_name:ident) => { + unsafe { &(OPTIONS.$option_name) } + }; +} +pub(crate) use get_option_ref; + /// Expected to receive what comes after the third dash in "--yjit-*". /// Empty string means user passed only "--yjit". C code rejects when /// they pass exact "--yjit-". @@ -105,11 +123,16 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { } }, + ("dump-iseq-disasm", _) => unsafe { + OPTIONS.dump_iseq_disasm = Some(opt_val.to_string()); + }, + ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true }, ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true }, ("stats", "") => unsafe { OPTIONS.gen_stats = true }, ("trace-exits", "") => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true }, ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true }, + ("dump-disasm", "") => unsafe { OPTIONS.dump_disasm = true }, ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true }, ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true }, diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index 02fbce47d895e6..bea57e4fc24a5d 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -1,7 +1,6 @@ #![allow(dead_code)] // Some functions for print debugging in here -use crate::asm::x86_64::*; -use crate::asm::*; +use crate::backend::ir::*; use crate::cruby::*; use std::slice; @@ -71,34 +70,39 @@ macro_rules! offset_of { #[allow(unused)] pub(crate) use offset_of; -#[cfg(test)] -mod tests { - #[test] - fn min_max_preserved_after_cast_to_usize() { - use crate::utils::IntoUsize; - - let min: usize = u64::MIN.as_usize(); - assert_eq!(min, u64::MIN.try_into().unwrap()); - let max: usize = u64::MAX.as_usize(); - assert_eq!(max, u64::MAX.try_into().unwrap()); - - let min: usize = u32::MIN.as_usize(); - assert_eq!(min, u32::MIN.try_into().unwrap()); - let max: usize = u32::MAX.as_usize(); - assert_eq!(max, u32::MAX.try_into().unwrap()); - } - - #[test] - fn test_offset_of() { - #[repr(C)] - struct Foo { - a: u8, - b: u64, - } +// Convert a CRuby UTF-8-encoded RSTRING into a Rust string. +// This should work fine on ASCII strings and anything else +// that is considered legal UTF-8, including embedded nulls. +fn ruby_str_to_rust(v: VALUE) -> String { + // Make sure the CRuby encoding is UTF-8 compatible + let encoding = unsafe { rb_ENCODING_GET(v) } as u32; + assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII); + + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; + String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation +} - assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front"); - assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold"); +// Location is the file defining the method, colon, method name. +// Filenames are sometimes internal strings supplied to eval, +// so be careful with them. +pub fn iseq_get_location(iseq: IseqPtr) -> String { + let iseq_path = unsafe { rb_iseq_path(iseq) }; + let iseq_method = unsafe { rb_iseq_method_name(iseq) }; + + let mut s = if iseq_path == Qnil { + "None".to_string() + } else { + ruby_str_to_rust(iseq_path) + }; + s.push_str(":"); + if iseq_method == Qnil { + s.push_str("None"); + } else { + s.push_str(& ruby_str_to_rust(iseq_method)); } + s } // TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper @@ -116,123 +120,148 @@ yjit_print_iseq(const rb_iseq_t *iseq) } */ -// Save caller-save registers on the stack before a C call -fn push_regs(cb: &mut CodeBlock) { - push(cb, RAX); - push(cb, RCX); - push(cb, RDX); - push(cb, RSI); - push(cb, RDI); - push(cb, R8); - push(cb, R9); - push(cb, R10); - push(cb, R11); - pushfq(cb); -} - -// Restore caller-save registers from the after a C call -fn pop_regs(cb: &mut CodeBlock) { - popfq(cb); - pop(cb, R11); - pop(cb, R10); - pop(cb, R9); - pop(cb, R8); - pop(cb, RDI); - pop(cb, RSI); - pop(cb, RDX); - pop(cb, RCX); - pop(cb, RAX); -} - -pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_int_fn(val: i64) { - println!("{}", val); +#[cfg(target_arch = "aarch64")] +macro_rules! c_callable { + (fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { extern "C" fn $f $args $(-> $ret)? $body }; +} + +#[cfg(target_arch = "x86_64")] +macro_rules! c_callable { + (fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { extern "sysv64" fn $f $args $(-> $ret)? $body }; +} +pub(crate) use c_callable; + +pub fn print_int(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_int_fn(val: i64) { + println!("{}", val); + } } - push_regs(cb); + asm.cpush_all(); - match opnd { - X86Opnd::Mem(_) | X86Opnd::Reg(_) => { + let argument = match opnd { + Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => { // Sign-extend the value if necessary - if opnd.num_bits() < 64 { - movsx(cb, C_ARG_REGS[0], opnd); + if opnd.rm_num_bits() < 64 { + asm.load_sext(opnd) } else { - mov(cb, C_ARG_REGS[0], opnd); + opnd } - } - X86Opnd::Imm(_) | X86Opnd::UImm(_) => { - mov(cb, C_ARG_REGS[0], opnd); - } + }, + Opnd::Imm(_) | Opnd::UImm(_) => opnd, _ => unreachable!(), - } + }; - mov(cb, RAX, const_ptr_opnd(print_int_fn as *const u8)); - call(cb, RAX); - pop_regs(cb); + asm.ccall(print_int_fn as *const u8, vec![argument]); + asm.cpop_all(); } /// Generate code to print a pointer -pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_ptr_fn(ptr: *const u8) { - println!("{:p}", ptr); +pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_ptr_fn(ptr: *const u8) { + println!("{:p}", ptr); + } } - assert!(opnd.num_bits() == 64); + assert!(opnd.rm_num_bits() == 64); - push_regs(cb); - mov(cb, C_ARG_REGS[0], opnd); - mov(cb, RAX, const_ptr_opnd(print_ptr_fn as *const u8)); - call(cb, RAX); - pop_regs(cb); + asm.cpush_all(); + asm.ccall(print_ptr_fn as *const u8, vec![opnd]); + asm.cpop_all(); } /// Generate code to print a value -pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) { - extern "sysv64" fn print_value_fn(val: VALUE) { - unsafe { rb_obj_info_dump(val) } +pub fn print_value(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_value_fn(val: VALUE) { + unsafe { rb_obj_info_dump(val) } + } } - assert!(opnd.num_bits() == 64); - - push_regs(cb); + assert!(matches!(opnd, Opnd::Value(_))); - mov(cb, RDI, opnd); - mov(cb, RAX, const_ptr_opnd(print_value_fn as *const u8)); - call(cb, RAX); - - pop_regs(cb); + asm.cpush_all(); + asm.ccall(print_value_fn as *const u8, vec![opnd]); + asm.cpop_all(); } /// Generate code to print constant string to stdout -pub fn print_str(cb: &mut CodeBlock, str: &str) { - extern "sysv64" fn print_str_cfun(ptr: *const u8, num_bytes: usize) { - unsafe { - let slice = slice::from_raw_parts(ptr, num_bytes); - let str = std::str::from_utf8(slice).unwrap(); - println!("{}", str); +pub fn print_str(asm: &mut Assembler, str: &str) { + c_callable!{ + fn print_str_cfun(ptr: *const u8, num_bytes: usize) { + unsafe { + let slice = slice::from_raw_parts(ptr, num_bytes); + let str = std::str::from_utf8(slice).unwrap(); + println!("{}", str); + } } } - let bytes = str.as_ptr(); - let num_bytes = str.len(); + asm.cpush_all(); + + let string_data = asm.new_label("string_data"); + let after_string = asm.new_label("after_string"); + + asm.jmp(after_string); + asm.write_label(string_data); + asm.bake_string(str); + asm.write_label(after_string); + + let opnd = asm.lea_label(string_data); + asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]); + + asm.cpop_all(); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::asm::CodeBlock; + + #[test] + fn min_max_preserved_after_cast_to_usize() { + use crate::utils::IntoUsize; + + let min: usize = u64::MIN.as_usize(); + assert_eq!(min, u64::MIN.try_into().unwrap()); + let max: usize = u64::MAX.as_usize(); + assert_eq!(max, u64::MAX.try_into().unwrap()); - push_regs(cb); + let min: usize = u32::MIN.as_usize(); + assert_eq!(min, u32::MIN.try_into().unwrap()); + let max: usize = u32::MAX.as_usize(); + assert_eq!(max, u32::MAX.try_into().unwrap()); + } - // Load the string address and jump over the string data - lea(cb, C_ARG_REGS[0], mem_opnd(8, RIP, 5)); - jmp32(cb, num_bytes as i32); + #[test] + fn test_offset_of() { + #[repr(C)] + struct Foo { + a: u8, + b: u64, + } - // Write the string chars and a null terminator - for i in 0..num_bytes { - cb.write_byte(unsafe { *bytes.add(i) }); + assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front"); + assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold"); } - // Pass the string length as an argument - mov(cb, C_ARG_REGS[1], uimm_opnd(num_bytes as u64)); + #[test] + fn test_print_int() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); - // Call the print function - mov(cb, RAX, const_ptr_opnd(print_str_cfun as *const u8)); - call(cb, RAX); + print_int(&mut asm, Opnd::Imm(42)); + asm.compile(&mut cb); + } + + #[test] + fn test_print_str() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); - pop_regs(cb); + print_str(&mut asm, "Hello, world!"); + asm.compile(&mut cb); + } } diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs index 6a8e27447e1549..8d34e521b9c5d1 100644 --- a/yjit/src/virtualmem.rs +++ b/yjit/src/virtualmem.rs @@ -192,6 +192,11 @@ impl CodePtr { ptr as i64 } + pub fn into_u64(self) -> u64 { + let CodePtr(ptr) = self; + ptr as u64 + } + pub fn into_usize(self) -> usize { let CodePtr(ptr) = self; ptr as usize diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 192e9753d976d6..5cd23f066f52d8 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -22,7 +22,7 @@ pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool { } /// Is YJIT on? The interpreter uses this function to decide whether to increment -/// ISEQ call counters. See mjit_exec(). +/// ISEQ call counters. See jit_exec(). /// This is used frequently since it's used on every method call in the interpreter. #[no_mangle] pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int { @@ -91,8 +91,8 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA if cfg!(debug_assertions) { let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb().unwrap(); - cb.set_pos(cb.get_mem_size() - 1); - ocb.set_pos(ocb.get_mem_size() - 1); + cb.set_pos(cb.get_mem_size()); + ocb.set_pos(ocb.get_mem_size()); } return Qnil;